@@ -1,22 +1,105 @@
#' Create a start event column.
#'
#' Create a new column which marks the beginning of each time series in a tibble.
#' The effect is the same as \code{start_event} from \code{itsadug}, but it works
#' with tibbles.
#' Create a new column which marks the beginning of each series in a tibble (for example, time series).
#'
#' @param tibble A tibble arranged according to the time series.
#' @param event_col A string with the name of the column that defines the time series.
#' @param tibble A tibble arranged according to the series.
#' @param series_col The name of the column that defines the group of series, as an unquoted expression.
#'
#' @return A tibble with an extra column that marks the begninning of the series.
#'
#' @examples
#' library(dplyr)
#' series_tbl <- tibble(
#' time_series = rep(1:5, 3),
#' group = rep(c("a", "b", "c"), each = 5)
#' ) %>%
#' create_start_event(group)
#'
#' @export
create_event_start <- function(tibble, event_col) {
dplyr::mutate(
tibble,
start.event = ifelse(
as.character(tibble[[event_col]]) == dplyr::lag(as.character(tibble[[event_col]]), default = FALSE),
FALSE,
TRUE
)
create_start_event <- function(tibble, series_col) {
series_col_q <- dplyr::enquo(series_col)
series_col_name <- rlang::quo_name(series_col_q)

dplyr::mutate(
tibble,
start_event = ifelse(
as.character(tibble[[series_col_name]]) == dplyr::lag(as.character(tibble[[series_col_name]]), default = FALSE),
FALSE,
TRUE
)
)
}

#' Get all predictions from a GAM model.
#'
#' It returns a tibble with the predictions from all the terms in a \link[mgcv]{gam} or \link[mgcv]{bam} model.
#'
#' @param model A \code{gam} or \code{bam} model object.
#' @param exclude_terms Terms to be excluded from the prediction. Term names should be given as they appear in the model summary (for example, \code{"s(x0,x1)"}).
#' @param length_out An integer indicating how many values along the numeric predictors to use for predicting the outcome term (the default is \code{50}).
#' @param values User supplied values for numeric terms as a named list.
#'
#' @return A tibble with predictions from a a \link[mgcv]{gam} or \link[mgcv]{bam} model.
#'
#' @examples
#' library(mgcv)
#' set.seed(10)
#' data <- gamSim(4)
#' model <- gam(y ~ fac + s(x2) + s(x2, by = fac) + s(x0), data = data)
#'
#' # get predictions
#' p <- predict_gam(model)
#'
#' # get predictions excluding x0 (the coefficient of x0 is set to 0)
#' p_2 <- predict_gam(model, exclude_terms = "s(x0)")
#'
#' # get predictions with chosen values of x0
#'
#' p_3 <- predict_gam(model, values = list(x0 = c(0.250599, 0.503313, 0.756028)))
#'
#' @export
predict_gam <- function(model, exclude_terms = NULL, length_out = 50, values = NULL) {
n_terms <- length(model[["var.summary"]])

term_list <- list()

for (term in 1:n_terms) {
term_summary <- model[["var.summary"]][[term]]
term_name <- names(model[["var.summary"]])[term]

if (term_name %in% names(values)) {
new_term <- values[[which(names(values) == term_name)]]
} else {
if (is.numeric(term_summary)) {

min_value <- min(term_summary)
max_value <- max(term_summary)

new_term <- seq(min_value, max_value, length.out = length_out)

} else if (is.factor(term_summary)) {

new_term <- levels(term_summary)

} else {
stop("The terms are not numeric or factor.\n")
}
}

term_list <- append(term_list, list(new_term))

names(term_list)[term] <- term_name
}

new_data <- expand.grid(term_list)

predicted <- as.data.frame(mgcv::predict.gam(model, new_data, exclude = exclude_terms, se.fit = TRUE))

predictions <- cbind(new_data, predicted)

predictions <- tibble::as_tibble(predictions)

return(predictions)
}

#' Get predictions from a GAM model.
@@ -200,6 +283,12 @@ get_gam_predictions <- function(model, time_series, series_length = 25, conditio
#' @param facet_terms An unquoted formula with the terms used for faceting.
#' @param conditions A list of quosures with \link[rlang]{quos} specifying the levels to plot from the model terms not among \code{time_series}, \code{comparison}, or \code{facet_terms}.
#'
#' @examples
#' # see vignette
#' \dontrun{
#' vignette("plot-smooths", package = "tidymv")
#' }
#'
#' @importFrom magrittr "%>%"
#' @importFrom rlang ":="
#' @importFrom rlang "quo_name"
@@ -264,141 +353,17 @@ plot_smooths <- function(model, time_series, comparison = NULL, facet_terms = NU
return(smooths_plot)
}


#' Plot GAM estimate smooths and difference curve.
#'
#' It plots comparison smooths from the estimates of a \link[mgcv]{gam} or \link[mgcv]{bam}
#' and the difference curve. Significant differences are marked with red areas.
#'
#' @param model A \code{gam} or \code{bam} model object.
#' @param view The predictor determining the time series.
#' @param comparison The levels for the comparison as a named list.
#' @param conditions The values to use for other predictors as a named list.
#' @param rm_re Whether to remove random effects (the default is \code{FALSE}).
#' @param bw Whether to plot in black and white (the default is \code{FALSE}).
#' @param ylim Limits of the y-axis of the smooths panel.
#'
#' @importFrom magrittr "%>%"
#' @export
plot_gamsd <- function(model, view, comparison, conditions = NULL, rm_re = FALSE, bw = FALSE, ylim = NULL) {
.Deprecated("plot_smooth", msg = "'plot_gamsd' is deprecated and will be removed, use 'plot_smooths()' and 'plot_difference()'.\n")

diff.df <- itsadug::plot_diff(
model,
view = view,
comp = comparison,
cond = conditions,
rm.ranef = rm_re,
plot = FALSE,
print.summary = FALSE)

main.condition <- list(
seq(min(diff.df[[view]]), max(diff.df[[view]]), length = 100)
)
names(main.condition) <- view

condition = c(main.condition, conditions)

smooth.df <- itsadug::get_predictions(
model,
cond = c(comparison, condition),
rm.ranef = rm_re
)

sig.diff <- itsadug::find_difference(
diff.df$est, diff.df$CI, diff.df[[view]]
)

ymin.sm <- smooth.df$fit - smooth.df$CI
ymax.sm <- smooth.df$fit + smooth.df$CI

fit <- "fit"
comp.column <- names(comparison)

annotate <- ggplot2::annotate(
"rect",
xmin = sig.diff$start, xmax = sig.diff$end,
ymin = -Inf, ymax = Inf, alpha = 0.1,
fill = ifelse(bw == FALSE, "red", "black")
)

is.sig <- is.null(sig.diff) == FALSE

smooth.plot <- smooth.df %>%
ggplot2::ggplot(
ggplot2::aes_string(view, fit)
) +
{if (is.sig) {annotate}} +
ggplot2::geom_ribbon(
ggplot2::aes(ymin = ymin.sm,
ymax = ymax.sm,
group = smooth.df[[comp.column]]
),
alpha = 0.2,
colour = "NA"
) +
{if (bw == FALSE) {
ggplot2::aes(fill = smooth.df[[comp.column]])}
} +
{if (bw == FALSE) {
ggplot2::geom_line(
ggplot2::aes(colour = smooth.df[[comp.column]])
)
}
else {
ggplot2::geom_line(
ggplot2::aes(linetype = smooth.df[[comp.column]])
)
}
} +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
legend.position = "top"
) +
ggplot2::xlim(min(diff.df[[view]]), max(diff.df[[view]])) +
{if (bw == FALSE) {
ggplot2::scale_colour_discrete(name = names(comparison))
}
else {
ggplot2::scale_linetype_discrete(name = names(comparison))
}
} +
{if (bw == FALSE) {ggplot2::scale_fill_discrete(name = names(comparison))}} +
{if (!is.null(ylim)) {ggplot2::ylim(ylim[1], ylim[2])}}

ymin.di <- diff.df$est - diff.df$CI
ymax.di <- diff.df$est + diff.df$CI

est <- "est"

diff.plot <- diff.df %>%
ggplot2::ggplot(
ggplot2::aes_string(view, est)
) +
ggplot2::geom_hline(yintercept = 0, size = 0.3) +
{if (is.sig) {annotate}} +
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = ymin.di,
ymax = ymax.di
),
alpha = 0.2
) +
ggplot2::geom_line() +
ggplot2::xlim(min(diff.df[[view]]), max(diff.df[[view]])) +
ggplot2::theme_bw()

cowplot::plot_grid(smooth.plot, diff.plot, align = "v", nrow = 2, rel_heights = c(2/3, 1/3))
}

#' Plot difference smooth from a GAM.
#'
#' It plots the difference smooth from a \link[mgcv]{gam} or \link[mgcv]{bam}.
#' Significant differences are marked with red areas.
#'
#' @examples
#' # see vignette
#' \dontrun{
#' vignette("plot-smooths", package = "tidymv")
#' }
#'
#' @inheritParams get_gam_predictions
#' @param time_series An unquoted expression indicating the model term that defines the time series.
#' @param difference A named list with the levels to compute the difference of.
@@ -455,3 +420,106 @@ plot_difference <- function(model, time_series, difference, conditions = NULL, s

return(diff_plot)
}

#' Smooths and confidence intervals.
#'
#' It provides a `geom` for plotting GAM smooths with confidence intervals from the output of \link[tidymv]{predict_gam}. It inherits the following `aes` from a call to `ggplot`:
#' \itemize{
#' \item The term defining the x-axis.
#' \item The fitted values (the \code{fit} column in the tibble returned by \link[tidymv]{predict_gam}).
#' \item The standard error of the fit (the \code{se.fit} column in the tibble returned by \link[tidymv]{predict_gam}).
#' }
#'
#' @param group The optional grouping factor.
#' @param ci_z The z-value for calculating the CIs (the default is \code{1.96} for 95 percent CI).
#' @param ci_alpha Transparency value of CIs (the default is \code{0.1}).
#' @param data The data to be displayed in this layer. If \code{NULL}, it is inherited.
#' @param ... Arguments passed to \code{geom_path()}.
#'
#' @examples
#' library(mgcv)
#' library(ggplot2)
#' set.seed(10)
#' data <- gamSim(4)
#' model <- gam(y ~ fac + s(x2) + s(x2, by = fac), data = data)
#'
#' # get predictions
#' p <- predict_gam(model)
#'
#' # plot smooths and confidence intervals
#' ggplot(p, aes(x2, fit)) + geom_smooth_ci(fac)
#'
#' @export
geom_smooth_ci <- function(group = NULL, ci_z = 1.96, ci_alpha = 0.1, data = NULL, ...) {
group_q <- rlang::enquo(group)

if (rlang::quo_is_null(group_q)) {
group_q <- NULL
}

if (is.null(group_q)) {
list(
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = fit - (se.fit * ci_z),
ymax = fit + (se.fit * ci_z)
),
alpha = ci_alpha,
data = data
),
ggplot2::geom_path(
data = data,
...
)
)
} else {
list(
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = fit - (se.fit * ci_z),
ymax = fit + (se.fit * ci_z),
group = !!group_q
),
alpha = ci_alpha,
data = data
),
ggplot2::geom_path(
ggplot2::aes(
colour = as.factor(!!group_q), linetype = as.factor(!!group_q)
),
data = data,
...
),
ggplot2::scale_colour_discrete(name = rlang::quo_name(group_q)),
ggplot2::scale_linetype_discrete(name = rlang::quo_name(group_q))
)
}

}

#' Plot GAM estimate smooths and difference curve.
#'
#' It plots comparison smooths from the estimates of a \link[mgcv]{gam} or \link[mgcv]{bam}
#' and the difference curve. Significant differences are marked with red areas.
#'
#' @param model A \code{gam} or \code{bam} model object.
#' @param view The predictor determining the time series.
#' @param comparison The levels for the comparison as a named list.
#' @param conditions The values to use for other predictors as a named list.
#' @param rm_re Whether to remove random effects (the default is \code{FALSE}).
#' @param bw Whether to plot in black and white (the default is \code{FALSE}).
#' @param ylim Limits of the y-axis of the smooths panel.
#'
#' @importFrom magrittr "%>%"
#' @name plot_gamsd-defunct
#' @seealso \code{\link{tidymv-defunct}}
#' @keywords internal
NULL

#' @rdname tidymv-defunct
#' @section This function is deprecated and has been removed. Please, use \link[tidymv]{plot_smooths} and \link[tidymv]{plot_difference}.
#'
#' @export
plot_gamsd <- function(...) {
.Defunct("plot_smooth", msg = "'plot_gamsd' was deprecated and has been removed, use 'plot_smooths()' and 'plot_difference()'.\n")
}
@@ -0,0 +1,8 @@
#' @title Deprecated functions in package \pkg{tidymv}.
#' @description The functions listed below are defunct in
#' the near future. When possible, alternative functions with similar
#' functionality are also mentioned. Help pages for defunct functions are
#' available at \code{help("<function>-defunct")}.
#' @name tidymv-defunct
#' @keywords internal
NULL
@@ -1,7 +1,8 @@
#' tidymv: Tidy Model Visualisation.
#' tidymv: Plotting for generalised additive models.
#'
#' This package provides functions for model visualisation using tidy tools from
#' the tidyverse.
#' This package provides functions for visualising generalised additive models
#' and get predicted values using tidy tools from the tidyverse. The name stands
#' for TIDY Model Visualisation.
#'
#' @docType package
#' @name tidymv
@@ -1,9 +1,9 @@
# `tidymv`: Tidy Model Visualisation
# `tidymv`: Plotting for generalised additive models

This is the repository of the `R` package `tidymv`. This package provides functions for the visualisation of GAM(M)s using tidy tools from the `tidyverse`. `tidymv` is based on the `itsadug` package, and indeed it uses some of its functions under the hood.
This is the repository of the `R` package `tidymv`. This package provides functions for the visualisation of GAM(M)s and the generation of model-based predicted values using tidy tools from the `tidyverse`. `tidymv` uses some functions from the `itsadug` package.

## Installation

To install the package, use `devtools::install_github("stefanocoretta/tidymv@v1.5.4", build_opts = c("--no-resave-data", "--no-manual"))`. To learn how to use the package, do `vignette("plot-smooths", package = "tidymv")` after the installation.
To install the package, use `devtools::install_github("stefanocoretta/tidymv@v2.0.0", build_opts = c("--no-resave-data", "--no-manual"))`. To learn how to use the package, check out the vignettes (for example, `vignette("predict-gam", package = "tidymv")`).

If you wish to install the development version, use `devtools::install_github("stefanocoretta/tidymv", build_opts = c("--no-resave-data", "--no-manual"))`.

This file was deleted.

This file was deleted.

@@ -1,10 +1,10 @@
---
title: "Plotting GAMs"
title: "Plot smooths from GAMs"
author: "Stefano Coretta"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Plotting GAMs}
%\VignetteIndexEntry{Plotting smooths}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
@@ -0,0 +1,153 @@
---
title: "Get model predictions and plot them with `ggplot2`"
author: "Stefano Coretta"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Get and plot model predictions}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
out.width = "300px", fig.align = "center", dpi = 300
)
library(ggplot2)
theme_set(theme_bw())
library(dplyr)
library(mgcv)
library(tidymv)
```

While `plot_smooths()` offers a streamlined way of plotting predicted smooths from a GAM model (see `vignette("plot-smooths", package = "tidymv")`), it is too constrained for other more complex cases.

The most general solution is to get the predicted values of the outcome variable according to all the combinations of terms in the model and use this dataframe for plotting.
This method grants the user maximum control over what can be plotted and how to transform the data (if necessary).

I will illustrate how to use the function `predict_gam()` to create a prediction dataframe and how this dataframe can be used for plotting different cases.

## Smooths

First of all let's generate some simulated data and create a GAM model with a factor `by` variable.

```{r model}
library(mgcv)
set.seed(10)
data <- gamSim(4, 400)
model <- gam(
y ~
fac +
s(x2, by = fac),
data = data
)
summary(model)
```

We can extract the predicted values with `predict_gam()`.
The predicted values of the outcome variable are in the column `fit`, while `fit.se` reports the standard error of the predicted values.

```{r model-p}
model_p <- predict_gam(model)
model_p
```

Now plotting can be done with `ggplot2`.
The convenience function `geom_smooth_ci()` can be used to plot the predicted smooths with confidence intervals.

```{r model-plot}
model_p %>%
ggplot(aes(x2, fit)) +
geom_smooth_ci(fac)
```

## Surface smooths

Now let's plot a model that has a tensor product interaction term (`ti()`).

```{r model-2}
model_2 <- gam(
y ~
s(x2) +
s(f1) +
ti(x2, f1),
data = data
)
summary(model_2)
```

Let's get the prediction dataframe and produce a contour plot.
We can adjust labels and aesthetics using the usual `ggplot2` methods.

```{r model-2-p}
model_2_p <- predict_gam(model_2)
model_2_p
```

```{r model-2-plot}
model_2_p %>%
ggplot(aes(x2, f1, z = fit)) +
geom_raster(aes(fill = fit)) +
geom_contour(colour = "white") +
scale_fill_continuous(name = "y") +
theme_minimal() +
theme(legend.position = "top")
```

## Smooths at specified values of a continuous predictor

To plot the smooths across a few values of a continuous predictor, we can use the `values` argument in `predict_gam()`.

```{r model-2-values}
predict_gam(model_2, values = list(f1 = c(0.5, 1, 1.5))) %>%
ggplot(aes(x2, fit)) +
geom_smooth_ci(f1)
```

## Exclude terms (like random effects)

It is possible to exclude terms when predicting values by means of the `exclude_terms` argument.
This can be useful when there are random effects, like in the following model.

```{r model-3}
data_re <- data %>%
mutate(rand = rep(letters[1:4], each = 100), rand = as.factor(rand))
model_3 <- gam(
y ~
s(x2) +
s(x2, rand, bs = "fs", m = 1),
data = data_re
)
summary(model_3)
```

`exclude_terms` takes a character vector of term names, as they appear in the output of `summary()` (rather than as they are specified in the model formula).
For example, to remove the term `s(x2, fac, bs = "fs", m = 1)`, `"s(x2,fac)"` should be used since this is how the summary output reports this term.
The output still contains the excluded columns.
The predicted values of the outcome variable are not affected by the value the excluded terms (the predicted values are repeated for each value of the excluded terms).
In other words, the coefficients for the excluded terms are set to 0 when predicting.
We can filter the predicted dataset to get unique predicted values by choosing any value or level of the excluded terms.\footnote{Alternatively, we can use `splice()`: `group_by(a) %>% splice(1)`. See `?splice`.}

```{r model-3-plot}
predict_gam(model_3, exclude_terms = "s(x2,rand)") %>%
filter(rand == "a") %>%
ggplot(aes(x2, fit)) +
geom_smooth_ci()
```

Of course, it is possible to plot the predicted values of random effects if we wishes so.
In the following example, the random effect `rand` is not excluded when predicting, and it is used to facet the plot.

```{r model-3-rand}
predict_gam(model_3) %>%
ggplot(aes(x2, fit)) +
geom_smooth_ci() +
facet_wrap(~rand)
```