diff --git a/DESCRIPTION b/DESCRIPTION index 7e35b3fae..525ff9cb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: sdmTMB Title: Spatial and Spatiotemporal SPDE-Based GLMMs with 'TMB' -Version: 0.5.0.9000 +Version: 0.5.0.9001 Authors@R: c( person(c("Sean", "C."), "Anderson", , "sean@seananderson.ca", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 5f4935a9f..4275e9cb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # sdmTMB (development version) +* Simplify the internal treatment of extra time slices (`extra_time`). #329 + This is less bug prone and also fixes a recently created bug. #335 + # sdmTMB 0.5.0 * Overhaul residuals vignette ('article') diff --git a/R/fit.R b/R/fit.R index 311f62516..145de3070 100644 --- a/R/fit.R +++ b/R/fit.R @@ -241,6 +241,9 @@ NULL #' `extra_time` can also be used to fill in missing time steps for the purposes #' of a random walk or AR(1) process if the gaps between time steps are uneven. #' +#' `extra_time` can include only extra time steps or all time steps including +#' those found in the fitted data. This latter option may be simpler. +#' #' **Regularization and priors** #' #' You can achieve regularization via penalties (priors) on the fixed effect @@ -775,19 +778,7 @@ sdmTMB <- function( offset <- data[[offset]] } - if (!is.null(extra_time)) { # for forecasting or interpolating - data <- expand_time(df = data, time_slices = extra_time, time_column = time, - weights = weights, offset = offset, upr = upr) - offset <- data[["__sdmTMB_offset__"]] # expanded - weights <- data[["__weight_sdmTMB__"]] # expanded - upr <- data[["__dcens_upr__"]] # expanded - spde$loc_xy <- as.matrix(data[,spde$xy_cols,drop=FALSE]) - spde$A_st <- fmesher::fm_basis(spde$mesh, loc = spde$loc_xy) - spde$sdm_spatial_id <- seq(1, nrow(data)) # FIXME? - } else { - data[["__fake_data__"]] <- FALSE - } - check_irregalar_time(data, time, spatiotemporal, time_varying) + check_irregalar_time(data, time, spatiotemporal, time_varying, extra_time = extra_time) spatial_varying_formula <- spatial_varying # save it if (!is.null(spatial_varying)) { @@ -804,7 +795,6 @@ sdmTMB <- function( if (length(attr(z_i, "contrasts")) && !.int && !omit_spatial_intercept) { # factors with ~ 0 or ~ -1 msg <- c("Detected predictors with factor levels in `spatial_varying` with the intercept omitted from the `spatial_varying` formula.", "You likely want to set `spatial = 'off'` since the constant spatial field (`omega_s`) also represents a spatial intercept.`") - # "As of version 0.3.1, sdmTMB turns off the constant spatial field `omega_s` when `spatial_varying` is specified so that the intercept or factor-level means are fully described by the spatially varying random fields `zeta_s`.") cli_inform(paste(msg, collapse = " ")) } .int <- grep("(Intercept)", colnames(z_i)) @@ -1052,7 +1042,8 @@ sdmTMB <- function( X_ij_list <- list() for (i in seq_len(n_m)) X_ij_list[[i]] <- X_ij[[i]] - n_t <- length(unique(data[[time]])) + time_df <- make_time_lu(data[[time]], full_time_vec = union(data[[time]], extra_time)) + n_t <- nrow(time_df) random_walk <- if (!is.null(time_varying)) switch(time_varying_type, rw = 1L, rw0 = 2L, ar1 = 0L) else 0L tmb_data <- list( @@ -1064,7 +1055,7 @@ sdmTMB <- function( A_st = spde$A_st, sim_re = if ("sim_re" %in% names(experimental)) as.integer(experimental$sim_re) else rep(0L, 6), A_spatial_index = spde$sdm_spatial_id - 1L, - year_i = make_year_i(data[[time]]), + year_i = time_df$year_i[match(data[[time]], time_df$time_from_data)], ar1_fields = ar1_fields, simulate_t = rep(1L, n_t), rw_fields = rw_fields, @@ -1375,16 +1366,9 @@ sdmTMB <- function( prof <- c("b_j") if (delta) prof <- c(prof, "b_j2") - lu <- make_year_lu(data[[time]]) - fd <- data[['__fake_data__']] - tmp <- data[!fd,,drop=FALSE] - # strip fake data from A matrix: - if (sum(fd) > 0L) spde <- make_mesh(tmp, spde$xy_cols, mesh = spde$mesh) - tmp[['__fake_data__']] <- tmp[['__weight_sdmTMB__']] <- - tmp[['__sdmTMB_offset__']] <- tmp[['__dcens_upr__']] <- NULL out_structure <- structure(list( - data = tmp, - offset = offset[!fd], + data = data, + offset = offset, spde = spde, formula = original_formula, split_formula = split_formula, @@ -1393,10 +1377,10 @@ sdmTMB <- function( threshold_function = thresh[[1]]$threshold_func, epsilon_predictor = epsilon_predictor, time = time, - time_lu = lu, + time_lu = time_df, family = family, smoothers = sm, - response = y_i[!fd,,drop=FALSE], + response = y_i, tmb_data = tmb_data, tmb_params = tmb_params, tmb_map = tmb_map, @@ -1593,12 +1577,12 @@ parse_spatial_arg <- function(spatial) { spatial } -check_irregalar_time <- function(data, time, spatiotemporal, time_varying) { +check_irregalar_time <- function(data, time, spatiotemporal, time_varying, extra_time) { if (any(spatiotemporal %in% c("ar1", "rw")) || !is.null(time_varying)) { if (!is.numeric(data[[time]])) { cli_abort("Time column should be integer or numeric if using AR(1) or random walk processes.") } - ti <- sort(unique(data[[time]])) + ti <- sort(union(unique(data[[time]]), extra_time)) if (length(unique(diff(ti))) > 1L) { missed <- find_missing_time(data[[time]]) msg <- c( diff --git a/R/index.R b/R/index.R index d67d79d78..3f5e5b469 100644 --- a/R/index.R +++ b/R/index.R @@ -128,17 +128,6 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95, obj$fit_obj$control$parallel <- 1L } - # FIXME parallel setup here? - if (!"fake_nd" %in% names(obj)) { # old sdmTMB versions... - predicted_time <- sort(unique(obj$data[[obj$fit_obj$time]])) - fitted_time <- get_fitted_time(obj$fit_obj) - if (!all(fitted_time %in% predicted_time)) { - cli_abort(paste0("Some of the fitted time elements were not predicted ", - "on with `predict.sdmTMB()`. Either supply all time elements to ", - "predict() or update sdmTMB and re-fit your object.")) - } - } - assert_that(!is.null(area)) if (length(area) > 1L) { n_fakend <- if (!is.null(obj$fake_nd)) nrow(obj$fake_nd) else 0L @@ -244,7 +233,15 @@ get_generic <- function(obj, value_name, bias_correct = FALSE, level = 0.95, d$lwr <- as.numeric(trans(d$trans_est + stats::qnorm((1-level)/2) * d$se)) d$upr <- as.numeric(trans(d$trans_est + stats::qnorm(1-(1-level)/2) * d$se)) - d[[time_name]] <- get_fitted_time(obj$fit_obj) + if ("pred_tmb_data" %in% names(obj)) { # standard case + ii <- sort(unique(obj$pred_tmb_data$proj_year)) + } else { # fit with do_index = TRUE + ii <- sort(unique(obj$fit_obj$tmb_data$proj_year)) + } + d <- d[d$est != 0, ,drop=FALSE] # these were not predicted on + lu <- obj$fit_obj$time_lu + tt <- lu$time_from_data[match(ii, lu$year_i)] + d[[time_name]] <- tt # d$max_gradient <- max(conv$final_grads) # d$bad_eig <- conv$bad_eig diff --git a/R/predict.R b/R/predict.R index 877744df4..2dc46cb29 100644 --- a/R/predict.R +++ b/R/predict.R @@ -277,6 +277,10 @@ predict.sdmTMB <- function(object, newdata = NULL, xy_cols <- object$spde$xy_cols } + if (object$version < numeric_version("0.5.0.9001")) { + cli_abort("This model was fit with an older version of sdmTMB before internal handling of `extra_time` was simplified. Please refit your model before predicting on it (or install version 0.5.0 or 0.5.0.9000).") + } + if (is_present(tmbstan_model)) { deprecate_stop("0.2.2", "predict.sdmTMB(tmbstan_model)", "predict.sdmTMB(mcmc_samples)") } @@ -315,9 +319,6 @@ predict.sdmTMB <- function(object, newdata = NULL, if (is.null(newdata)) { if (is_delta(object) || nsim > 0 || type == "response" || !is.null(mcmc_samples) || se_fit || !is.null(re_form) || !is.null(re_form_iid) || !is.null(offset) || isTRUE(object$family$delta)) { newdata <- object$data - if (!is.null(object$extra_time)) { # issue #273 - newdata <- newdata[!newdata[[object$time]] %in% object$extra_time,] - } nd_arg_was_null <- TRUE # will be used to carry over the offset } } @@ -345,7 +346,6 @@ predict.sdmTMB <- function(object, newdata = NULL, tmb_data <- object$tmb_data tmb_data$do_predict <- 1L no_spatial <- as.logical(object$tmb_data$no_spatial) - fake_nd <- NULL if (!is.null(newdata)) { if (any(!xy_cols %in% names(newdata)) && isFALSE(pop_pred) && !no_spatial) @@ -375,31 +375,15 @@ predict.sdmTMB <- function(object, newdata = NULL, } check_time_class(object, newdata) - original_time <- as.integer(get_fitted_time(object)) - new_data_time <- as.integer(sort(unique(newdata[[object$time]]))) + original_time <- object$time_lu$time_from_data + new_data_time <- unique(newdata[[object$time]]) if (!all(new_data_time %in% original_time)) cli_abort(c("Some new time elements were found in `newdata`. ", - "For now, make sure only time elements from the original dataset are present.", "If you would like to predict on new time elements,", "see the `extra_time` argument in `?sdmTMB`.") ) - if (!identical(new_data_time, original_time) & isFALSE(pop_pred)) { - missing_time <- original_time[!original_time %in% new_data_time] - fake_nd_list <- list() - fake_nd <- newdata[1L,,drop=FALSE] - for (.t in seq_along(missing_time)) { - fake_nd[[object$time]] <- missing_time[.t] - fake_nd_list[[.t]] <- fake_nd - } - fake_nd <- do.call("rbind", fake_nd_list) - newdata[["_sdmTMB_fake_nd_"]] <- FALSE - fake_nd[["_sdmTMB_fake_nd_"]] <- TRUE - newdata <- rbind(newdata, fake_nd) - if (!is.null(offset)) offset <- c(offset, rep(0, nrow(fake_nd))) # issue 270 - } - # If making population predictions (with standard errors), we don't need # to worry about space, so fill in dummy values if the user hasn't made any: fake_spatial_added <- FALSE @@ -519,7 +503,8 @@ predict.sdmTMB <- function(object, newdata = NULL, tmb_data$proj_X_ij <- proj_X_ij tmb_data$proj_X_rw_ik <- proj_X_rw_ik tmb_data$proj_RE_indexes <- proj_RE_indexes - tmb_data$proj_year <- make_year_i(nd[[object$time]]) + time_lu <- object$time_lu + tmb_data$proj_year <- time_lu$year_i[match(nd[[object$time]], time_lu$time_from_data)] # was make_year_i(nd[[object$time]]) tmb_data$proj_lon <- newdata[[xy_cols[[1]]]] tmb_data$proj_lat <- newdata[[xy_cols[[2]]]] tmb_data$calc_se <- as.integer(se_fit) @@ -690,9 +675,6 @@ predict.sdmTMB <- function(object, newdata = NULL, } } - if (!is.null(fake_nd)) { - out <- out[-seq(nrow(out) - nrow(fake_nd) + 1, nrow(out)), ,drop=FALSE] # issue #273 - } return(out) } @@ -853,7 +835,6 @@ predict.sdmTMB <- function(object, newdata = NULL, nd[[paste0("zeta_s_", object$spatial_varying[z])]] <- r$zeta_s_A[,z,1] } nd$epsilon_st <- r$epsilon_st_A_vec[,1]# DELTA FIXME - nd <- nd[!nd[[object$time]] %in% object$extra_time, , drop = FALSE] # issue 270 obj <- object } @@ -886,14 +867,10 @@ predict.sdmTMB <- function(object, newdata = NULL, nd[["_sdmTMB_time"]] <- NULL if (no_spatial) nd[["est_rf"]] <- NULL if (no_spatial) nd[["est_non_rf"]] <- NULL - if ("_sdmTMB_fake_nd_" %in% names(nd)) { - nd <- nd[!nd[["_sdmTMB_fake_nd_"]],,drop=FALSE] - } - nd[["_sdmTMB_fake_nd_"]] <- NULL row.names(nd) <- NULL if (return_tmb_object) { - return(list(data = nd, report = r, obj = obj, fit_obj = object, pred_tmb_data = tmb_data, fake_nd = fake_nd)) + return(list(data = nd, report = r, obj = obj, fit_obj = object, pred_tmb_data = tmb_data)) } else { if (visreg_df) { # for visreg & related, return consistent objects with lm(), gam() etc. diff --git a/R/print.R b/R/print.R index c3c3a0af3..9043152a9 100644 --- a/R/print.R +++ b/R/print.R @@ -167,7 +167,7 @@ print_time_varying <- function(x, m = 1) { tv_names <- colnames(model.matrix(x$time_varying, x$data)) mm_tv <- cbind(round(as.numeric(b_rw_t_est), 2L), round(as.numeric(b_rw_t_se), 2L)) colnames(mm_tv) <- c("coef.est", "coef.se") - time_slices <- get_fitted_time(x) + time_slices <- x$time_lu$time_from_data row.names(mm_tv) <- paste(rep(tv_names, each = length(time_slices)), time_slices, sep = "-") } else { mm_tv <- NULL diff --git a/R/tmb-sim.R b/R/tmb-sim.R index 903ba9623..1427a2edd 100644 --- a/R/tmb-sim.R +++ b/R/tmb-sim.R @@ -416,9 +416,6 @@ simulate.sdmTMB <- function(object, nsim = 1L, seed = sample.int(1e6, 1L), } ret <- do.call(cbind, ret) - if (!is.null(object$extra_time)) { - ret <- ret[seq(1, nrow(object$data)),,drop=FALSE] # drop extra time rows - } attr(ret, "type") <- type ret } diff --git a/R/utils.R b/R/utils.R index ea12a7062..83193957c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -166,11 +166,21 @@ make_year_i <- function(x) { x - min(x) } -make_year_lu <- function(x) { - ret <- unique(data.frame(year_i = make_year_i(x), time_from_data = x, stringsAsFactors = FALSE)) - ret <- ret[order(ret$year_i),,drop=FALSE] - row.names(ret) <- NULL - ret +make_time_lu <- function(time_vec_from_data, full_time_vec = sort(unique(time_vec_from_data))) { + if (!all(time_vec_from_data %in% full_time_vec)) { + stop("All time elements not in full time vector.") + } + lu <- unique( + data.frame( + year_i = make_year_i(full_time_vec), + time_from_data = full_time_vec, + stringsAsFactors = FALSE + ) + ) + lu$extra_time <- !lu$time_from_data %in% time_vec_from_data + lu <- lu[order(lu$time_from_data),] + row.names(lu) <- NULL + lu } check_offset <- function(formula) { diff --git a/man/sdmTMB.Rd b/man/sdmTMB.Rd index a3a2bcde9..00d1f624b 100644 --- a/man/sdmTMB.Rd +++ b/man/sdmTMB.Rd @@ -274,6 +274,9 @@ time slices with process error. \code{extra_time} can also be used to fill in missing time steps for the purposes of a random walk or AR(1) process if the gaps between time steps are uneven. +\code{extra_time} can include only extra time steps or all time steps including +those found in the fitted data. This latter option may be simpler. + \strong{Regularization and priors} You can achieve regularization via penalties (priors) on the fixed effect diff --git a/src/sdmTMB.cpp b/src/sdmTMB.cpp index 268a65d87..f63fdcf33 100644 --- a/src/sdmTMB.cpp +++ b/src/sdmTMB.cpp @@ -1225,7 +1225,7 @@ Type objective_function::operator()() // Total biomass etc.: vector total(n_t); - total.setZero(); + total.setZero(); // important; 0s are filtered out after as not predicted on vector mu_combined(n_p); mu_combined.setZero(); @@ -1235,8 +1235,8 @@ Type objective_function::operator()() Type t2; int link_tmp; - if (n_m > 1) { // delta model - for (int i = 0; i < n_p; i++) { + for (int i = 0; i < n_p; i++) { + if (n_m > 1) { // delta model if (poisson_link_delta) { // Type R1 = Type(1.) - exp(-exp(proj_eta(i,0))); // Type R2 = exp(proj_eta(i,0)) / R1 * exp(proj_eta(i,1)) @@ -1247,9 +1247,7 @@ Type objective_function::operator()() mu_combined(i) = t1 * t2; } total(proj_year(i)) += mu_combined(i) * area_i(i); - } - } else { // non-delta model - for (int i = 0; i < n_p; i++) { + } else { // non-delta model mu_combined(i) = InverseLink(proj_eta(i,0), link(0)); total(proj_year(i)) += mu_combined(i) * area_i(i); } diff --git a/tests/testthat/test-extra-time.R b/tests/testthat/test-extra-time.R index 6195142cd..800684bc4 100644 --- a/tests/testthat/test-extra-time.R +++ b/tests/testthat/test-extra-time.R @@ -83,8 +83,8 @@ test_that("extra_time, newdata, get_index() work", { expect_equal(ind2[ind2$year %in% pcod$year, "est"], ind3[ind3$year %in% pcod$year, "est"]) expect_identical(as.numeric(sort(unique(ind3$year))), as.numeric(sort(unique(pcod$year)))) - p$fake_nd <- NULL # mimic old sdmTMB - expect_error(ind4 <- get_index(p)) + # p$fake_nd <- NULL # mimic old sdmTMB + # expect_error(ind4 <- get_index(p)) # missing some original time: nd <- replicate_df(pcod, "year", unique(pcod$year)) @@ -232,3 +232,156 @@ test_that("update() works on an extra_time model", { m2 <- update(m, time_varying_type = "ar1", extra_time = m$extra_time, mesh = m$spde) expect_s3_class(m2, "sdmTMB") }) + +test_that("prediction with newdata = NULL for non-delta models with extra_time works #335", { + m <- sdmTMB( + density ~ 1, + data = pcod, + spatial = "off", + spatiotemporal = "off", + time = "year", + family = tweedie(), + extra_time = 2018:2020 + ) + p1 <- predict(m) # was broken + p2 <- predict(m, newdata = pcod) + expect_equal(p1, p2, tolerance = 0.0001) + + # what if extra_time includes some fitted years? + m <- sdmTMB( + density ~ 1, + data = pcod, + spatial = "off", + spatiotemporal = "off", + time = "year", + family = tweedie(), + extra_time = 2017:2020 + ) + p1 <- predict(m) + p2 <- predict(m, newdata = pcod) + expect_equal(p1, p2, tolerance = 0.0001) + + m <- update(m, family = delta_gamma()) + p1 <- predict(m) + p2 <- predict(m, newdata = pcod) + expect_equal(p1, p2, tolerance = 0.0001) +}) + +test_that("make_time_lu works", { + # extra time on end + x <- make_time_lu(c(1, 2, 3), c(1, 2, 3, 4)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, 1:4) + expect_equal(x$extra_time, c(FALSE, FALSE, FALSE, TRUE)) + + # no extra time + x <- make_time_lu(c(1, 2, 3), c(1, 2, 3)) + expect_equal(x$year_i, 0:2) + expect_equal(x$time_from_data, 1:3) + expect_equal(x$extra_time, c(FALSE, FALSE, FALSE)) + + # missing element in full vector + expect_error(x <- make_time_lu(c(1, 2, 3, 4), c(1, 2, 3)), regexp = "time") + + # extra time on end with gap + x <- make_time_lu(c(1, 2, 3), c(1, 2, 3, 5)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, c(1, 2, 3, 5)) + expect_equal(x$extra_time, c(FALSE, FALSE, FALSE, TRUE)) + + # gap in middle + x <- make_time_lu(c(1, 2, 4), c(1, 2, 3, 4)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, c(1, 2, 3, 4)) + expect_equal(x$extra_time, c(FALSE, FALSE, TRUE, FALSE)) + + # extra time at beginning + x <- make_time_lu(c(1, 2, 3), c(0, 1, 2, 3)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, 0:3) + expect_equal(x$extra_time, c(TRUE, FALSE, FALSE, FALSE)) + + # order scrambled + x <- make_time_lu(c(1, 3, 2), c(0, 1, 2, 3)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, 0:3) + expect_equal(x$extra_time, c(TRUE, FALSE, FALSE, FALSE)) + + # order scrambled in full vector + x <- make_time_lu(c(1, 3, 2), c(0, 2, 3, 1)) + expect_equal(x$year_i, 0:3) + expect_equal(x$time_from_data, 0:3) + expect_equal(x$extra_time, c(TRUE, FALSE, FALSE, FALSE)) + + # do it in sdmTMB() + m <- sdmTMB( + density ~ 1, + data = pcod, + spatial = "off", + spatiotemporal = "off", + time = "year", + family = tweedie(), + extra_time = 2018:2020, + do_fit = FALSE + ) + x <- m$time_lu + expect_equal(x$year_i, 0:11) + expect_equal(x$time_from_data, + c(sort(unique(pcod$year)), 2018:2020)) + expect_equal(sum(x$extra_time), 3) + + # do it with estimation + m <- sdmTMB( + density ~ 1, + data = pcod, + spatial = "off", + spatiotemporal = "off", + time = "year", + family = tweedie(), + extra_time = 2018:2020 + ) + + # do it with estimation and a random walk + m <- sdmTMB( + density ~ 1, + data = pcod, + spatial = "off", + spatiotemporal = "rw", + mesh = make_mesh(pcod, c("X", "Y"), cutoff = 40), + time = "year", + family = tweedie(), + extra_time = 2018:2020 + ) + s <- as.list(m$sd_report, "Estimate") + expect_equal(max(m$time_lu$year_i) + 1, dim(s$epsilon_st)[2]) # extra slices there? + + # prediction? + p1 <- predict(m, newdata = pcod) + p2 <- predict(m) + expect_equal(p1, p2) + + nd <- replicate_df(qcs_grid, "year", c(unique(pcod$year), 2018:2020)) + p3 <- predict(m, newdata = nd) + expect_equal(unique(p3$year), c(2003L, 2004L, 2005L, 2007L, 2009L, 2011L, 2013L, 2015L, 2017L, + 2018L, 2019L, 2020L)) + + if (FALSE) { + library(ggplot2) + ggplot(p3, aes(X, Y, fill = est)) + + geom_raster() + + facet_wrap(~year) + + scale_fill_viridis_c() + } +}) + + +test_that("old versions get flagged pre 0.5.0.9001", { + m <- sdmTMB( + density ~ 0, + data = pcod, + spatial = "off", + family = tweedie() + ) + m$version <- numeric_version('0.5.0') + expect_error(p <- predict(m), "extra") +})