-
Notifications
You must be signed in to change notification settings - Fork 31
/
generate.R
141 lines (130 loc) · 5.46 KB
/
generate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#' Generate responses from a mable
#'
#' Use a model's fitted distribution to simulate additional data with similar
#' behaviour to the response. This is a tidy implementation of
#' [stats::simulate()].
#'
#' Innovations are sampled by the model's assumed error distribution.
#' If `bootstrap` is `TRUE`, innovations will be sampled from the model's
#' residuals. If `new_data` contains the `.innov` column, those values will be
#' treated as innovations for the simulated paths.
#'
#' @param x A mable.
#' @param new_data The data to be generated (time index and exogenous regressors)
#' @param h The simulation horizon (can be used instead of `new_data` for regular
#' time series with no exogenous regressors).
#' @param times The number of replications.
#' @param seed The seed for the random generation from distributions.
#' @param ... Additional arguments for individual simulation methods.
#'
#' @examplesIf requireNamespace("fable", quietly = TRUE)
#' library(fable)
#' library(dplyr)
#' UKLungDeaths <- as_tsibble(cbind(mdeaths, fdeaths), pivot_longer = FALSE)
#' UKLungDeaths %>%
#' model(lm = TSLM(mdeaths ~ fourier("year", K = 4) + fdeaths)) %>%
#' generate(UKLungDeaths, times = 5)
#' @export
generate.mdl_df <- function(x, new_data = NULL, h = NULL, times = 1, seed = NULL, ...){
mdls <- mable_vars(x)
if(!is.null(new_data)){
x <- bind_new_data(x, new_data)
}
kv <- c(key_vars(x), ".model")
x <- tidyr::pivot_longer(as_tibble(x), all_of(mdls),
names_to = ".model", values_to = ".sim")
# Evaluate simulations
x[[".sim"]] <- map2(x[[".sim"]],
x[["new_data"]] %||% rep(list(NULL), length.out = NROW(x)),
generate, h = h, times = times, seed = seed, ...)
x[["new_data"]] <- NULL
unnest_tsbl(x, ".sim", parent_key = kv)
}
#' @rdname generate.mdl_df
#'
#' @param bootstrap If TRUE, then forecast distributions are computed using simulation with resampled errors.
#' @param bootstrap_block_size The bootstrap block size specifies the number of contiguous residuals to be taken in each bootstrap sample.
#'
#' @export
generate.mdl_ts <- function(x, new_data = NULL, h = NULL, times = 1, seed = NULL,
bootstrap = FALSE, bootstrap_block_size = 1, ...){
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
stats::runif(1)
if (is.null(seed))
RNGstate <- get(".Random.seed", envir = .GlobalEnv)
else {
R.seed <- get(".Random.seed", envir = .GlobalEnv)
set.seed(seed)
RNGstate <- structure(seed, kind = as.list(RNGkind()))
on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
}
if(is.null(new_data)){
new_data <- make_future_data(x$data, h)
}
if(is.null(new_data[[".rep"]])){
kv <- c(".rep", key_vars(new_data))
idx <- index_var(new_data)
intvl <- tsibble::interval(new_data)
new_data <- vctrs::vec_rbind(
!!!set_names(rep(list(as_tibble(new_data)), times), seq_len(times)),
.names_to = ".rep"
)
new_data <- build_tsibble(new_data, index = !!idx, key = !!kv, interval = intvl)
}
if(bootstrap) {
res <- residuals(x$fit)
f_mean <- if(length(x$response) == 1) mean else colMeans
res <- stats::na.omit(res) - f_mean(res, na.rm = TRUE)
i <- if(bootstrap_block_size == 1) {
sample.int(NROW(res), nrow(new_data), replace = TRUE)
} else {
if(any(has_gaps(x$data)$.gaps)) abort("Residuals must be regularly spaced without gaps to use a block bootstrap method.")
kr <- tsibble::key_rows(new_data)
ki <- lapply(lengths(kr), function(n) block_bootstrap(NROW(res), bootstrap_block_size, size = n))
vec_c(!!!ki)
}
new_data$.innov <- if(length(x$response) == 1) res[i] else res[i,]
}
# Compute specials with new_data
x$model$stage <- "generate"
x$model$add_data(new_data)
specials <- tryCatch(parse_model_rhs(x$model),
error = function(e){
abort(sprintf(
"%s
Unable to compute required variables from provided `new_data`.
Does your model require extra variables to produce simulations?", e$message))
}, interrupt = function(e) {
stop("Terminated by user", call. = FALSE)
})
x$model$remove_data()
x$model$stage <- NULL
.sim <- generate(x[["fit"]], new_data = new_data, specials = specials, ...)
.sim_cols <- setdiff(names(.sim), names(new_data))
# TODO: Breaking change, .sim should be response variable name
# For now, only do this with multivariate models but this change will be made for v1.0.0
resp_vars <- vapply(x$response, expr_name, character(1L), USE.NAMES = FALSE)
if (length(resp_vars) > 1) {
.sim_cols <- resp_vars
.sim[resp_vars] <- split(.sim$.sim, col(.sim$.sim))
.sim$.sim <- NULL
}
# Back-transform forecast distributions
bt <- map(x$transformation, function(x){
bt <- invert_transformation(x)
env <- new_environment(new_data, get_env(bt))
set_env(bt, env)
})
.sim[.sim_cols] <- .mapply(function(f, x) f(x), list(bt, .sim[.sim_cols]), NULL)
.sim
}
block_bootstrap <- function (n, window_size, size = n) {
n_blocks <- size%/%window_size + 2
bx <- numeric(n_blocks * window_size)
for (i in seq_len(n_blocks)) {
block_pos <- sample(seq_len(n - window_size + 1), 1)
bx[((i - 1) * window_size + 1):(i * window_size)] <- block_pos:(block_pos + window_size - 1)
}
start_from <- sample.int(window_size, 1)
bx[seq(start_from, length.out = size)]
}