Skip to content

Commit bb4d3dd

Browse files
committed
update to allow for specific values of first-order and seasonal differencing outside of forecast::auto.arima
1 parent f66e771 commit bb4d3dd

15 files changed

+251
-164
lines changed

Diff for: NAMESPACE

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(simulate,sarimaTD)
4+
export(do_difference)
45
export(do_initial_transform)
5-
export(do_seasonal_difference)
66
export(fit_sarima)
77
export(interpolate_and_clean_missing)
88
export(invert_bc_transform)
9+
export(invert_difference)
910
export(invert_initial_transform)
10-
export(invert_seasonal_difference)
1111
export(sarimaTD_FF)

Diff for: R/estimation.R

+12-13
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#' in case of observations of 0, and also ensures that the de-transformed
1414
#' values will always be at least -0.5, so that they round up to non-negative
1515
#' values.
16-
#' @param seasonal_difference boolean; take a seasonal difference before passing
17-
#' to auto.arima?
16+
#' @param sarimaTD_d integer order of first differencing done before passing to
17+
#' auto.arima
18+
#' @param sarimaTD_D integer order of seasonal differencing done before passing
19+
#' to auto.arima
1820
#' @param d order of first differencing argument to auto.arima.
1921
#' @param D order of seasonal differencing argument to auto.arima.
2022
#' @param ... arguments passed on to forecast::auto.arima
@@ -37,7 +39,8 @@ fit_sarima <- function(
3739
ts_frequency,
3840
transformation = "box-cox",
3941
bc_gamma = 0.5,
40-
seasonal_difference = TRUE,
42+
sarimaTD_d = 0,
43+
sarimaTD_D = 1,
4144
d = NA,
4245
D = NA,
4346
...) {
@@ -66,15 +69,10 @@ fit_sarima <- function(
6669
transformation = transformation,
6770
bc_params = est_bc_params)
6871

69-
## Initial seasonal differencing, if necessary
70-
if(seasonal_difference) {
71-
differenced_y <- do_seasonal_difference(
72-
y = transformed_y,
73-
ts_frequency = ts_frequency)
74-
} else {
75-
differenced_y <- ts(transformed_y, frequency = ts_frequency)
76-
}
77-
72+
## Initial differencing, if necessary
73+
differenced_y <- do_difference(transformed_y, d = sarimaTD_d, D = sarimaTD_D,
74+
frequency = ts_frequency)
75+
7876
## Get SARIMA fit
7977
if(identical(transformation, "forecast-box-cox")) {
8078
## box-cox transformation done by auto.arima
@@ -98,7 +96,8 @@ fit_sarima <- function(
9896
}
9997

10098
sarima_fit$sarimaTD_call <- match.call()
101-
for(param_name in c("y", "ts_frequency", "transformation", "seasonal_difference", "d", "D")) {
99+
for(param_name in c("y", "ts_frequency", "transformation", "sarimaTD_d",
100+
"sarimaTD_D", "d", "D")) {
102101
sarima_fit[[paste0("sarimaTD_used_", param_name)]] <- get(param_name)
103102
}
104103
if(identical(transformation, "box-cox")) {

Diff for: R/prediction.R

+22-34
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#' Simulate predictive trajectories from an ARIMA model
44
#'
55
#' This is directly taken from the forecast.Arima function from the forecast
6-
#' package, but it returns the simulated trajectories which were not returned in
7-
#' the original function definition. This function calls
6+
#' package, but it returns the simulated trajectories which were not returned
7+
#' in the original function definition. This function calls
88
#' forecast:::simulate.Arima, which is NOT exported from the forecast package!!
99
#'
1010
#' @param object an Arima fit object (with class "Arima")
@@ -63,12 +63,12 @@ simulate.sarimaTD <- function(
6363

6464
if(is.null(object$sarimaTD_call)) {
6565
transformation <- "none"
66-
seasonal_difference <- FALSE
66+
sarimaTD_d <- sarimaTD_D <- 0
6767
ts_frequency <- object$arma[5]
6868
} else {
6969
transformation <- object$sarimaTD_used_transformation
70-
seasonal_difference <-
71-
object$sarimaTD_used_seasonal_difference
70+
sarimaTD_d <- object$sarimaTD_used_sarimaTD_d
71+
sarimaTD_D <- object$sarimaTD_used_sarimaTD_D
7272
ts_frequency <- object$arma[5]
7373
}
7474

@@ -85,14 +85,9 @@ simulate.sarimaTD <- function(
8585
bc_params = est_bc_params)
8686

8787
## Initial seasonal differencing, if necessary
88-
if(seasonal_difference) {
89-
differenced_y <- do_seasonal_difference(
90-
y = transformed_y,
91-
ts_frequency = ts_frequency)
92-
} else {
93-
differenced_y <- ts(transformed_y, frequency = ts_frequency)
94-
}
95-
88+
differenced_y <- do_difference(transformed_y, d = sarimaTD_d, D = sarimaTD_D,
89+
frequency = ts_frequency)
90+
9691
## Drop leading missing values, fill in internal missing values via linear
9792
## interpolation. This is necessary to ensure non-missing predictions if the
9893
## sarima model has a moving average component.
@@ -120,27 +115,20 @@ simulate.sarimaTD <- function(
120115
## Get to trajectories for originally observed time series ("orig") by
121116
## adding seasonal lag of incidence and inverting the transformation
122117
orig_trajectory_samples <- raw_trajectory_samples
123-
if(seasonal_difference) {
124-
for(i in seq_len(nsim)) {
125-
orig_trajectory_samples[i, ] <-
126-
invert_seasonal_difference(
127-
dy = raw_trajectory_samples[i, ],
128-
y = transformed_y,
129-
ts_frequency = ts_frequency)
130-
orig_trajectory_samples[i, ] <-
131-
invert_initial_transform(
132-
y = orig_trajectory_samples[i, ],
133-
transformation = transformation,
134-
bc_params = est_bc_params)
135-
}
136-
} else {
137-
for(i in seq_len(nsim)) {
138-
orig_trajectory_samples[i, ] <-
139-
invert_initial_transform(
140-
y = raw_trajectory_samples[i, ],
141-
transformation = transformation,
142-
bc_params = est_bc_params)
143-
}
118+
for(i in seq_len(nsim)) {
119+
orig_trajectory_samples[i, ] <-
120+
invert_difference(
121+
dy = raw_trajectory_samples[i, ],
122+
y = transformed_y,
123+
d = sarimaTD_d,
124+
D = sarimaTD_D,
125+
frequency = ts_frequency)
126+
127+
orig_trajectory_samples[i, ] <-
128+
invert_initial_transform(
129+
y = orig_trajectory_samples[i, ],
130+
transformation = transformation,
131+
bc_params = est_bc_params)
144132
}
145133

146134
attr(orig_trajectory_samples, "seed") <- seed

Diff for: R/utils.R

+65-22
Original file line numberDiff line numberDiff line change
@@ -89,45 +89,88 @@ invert_bc_transform <- function(b, lambda, gamma) {
8989
return(z - gamma)
9090
}
9191

92-
#' Do first-order seasonal differencing (go from original time series values to
93-
#' seasonally differenced time series values).
92+
#' Do first-order and seasonal differencing (go from original time series
93+
#' to differenced time series).
9494
#'
9595
#' @param y a univariate time series or numeric vector.
96-
#' @param ts_frequency frequency of time series. Must be provided if y is not
97-
#' of class "ts". See the help for stats::ts for more.
96+
#' @param d order of first differencing
97+
#' @param D order of seasonal differencing
98+
#' @param frequency frequency of time series. Must be provided if y is not
99+
#' of class "ts" and D > 0. See the help for stats::ts for more.
98100
#'
99-
#' @return a seasonally differenced time series object (of class 'ts'),
101+
#' @return a differenced time series object (of class 'ts'),
100102
#' padded with leading NAs.
101103
#'
102104
#' @export
103-
do_seasonal_difference <- function(y, ts_frequency) {
104-
differenced_y <- ts(c(rep(NA, ts_frequency),
105-
y[seq(from = ts_frequency + 1, to = length(y))] -
106-
y[seq(from = 1, to = length(y) - ts_frequency)]),
107-
frequency = ts_frequency)
108-
return(differenced_y)
105+
do_difference <- function(y, d = 0, D = 0, frequency = 1) {
106+
# first differencing
107+
for(i in seq_len(d)) {
108+
y <- ts(
109+
c(NA,
110+
y[seq(from = 1 + 1, to = length(y))] -
111+
y[seq(from = 1, to = length(y) - 1)]),
112+
frequency = frequency)
113+
}
114+
115+
# seasonal differencing
116+
if(D > 0 && frequency < 2) {
117+
stop("It doesn't make sense to do seasonal differencing with a time series frequency of 1.")
118+
}
119+
for(i in seq_len(D)) {
120+
y <- ts(
121+
c(rep(NA, frequency),
122+
y[seq(from = frequency + 1, to = length(y))] -
123+
y[seq(from = 1, to = length(y) - frequency)]),
124+
frequency = frequency)
125+
}
126+
127+
return(y)
109128
}
110129

111-
#' Invert first-order seasonal differencing (go from seasonally differenced time
112-
#' series values to original time series values).
130+
#' Invert first-order and seasonal differencing (go from seasonally differenced
131+
#' time series to original time series).
113132
#'
114-
#' @param dy a first-order seasonally differenced univariate time series with
115-
#' values like y_{t} - y_{t - ts_frequency}
133+
#' @param dy a first-order and/or seasonally differenced univariate time series
134+
#' with values like y_{t} - y_{t - ts_frequency}
116135
#' @param y a univariate time series or numeric vector with values like
117136
#' y_{t - ts_frequency}.
118-
#' @param ts_frequency frequency of time series. Must be provided if y is not
119-
#' of class "ts". See the help for stats::ts for more.
137+
#' @param d order of first differencing
138+
#' @param D order of seasonal differencing
139+
#' @param frequency frequency of time series. Must be provided if y is not
140+
#' of class "ts" and D > 0. See the help for stats::ts for more.
120141
#'
121142
#' @details y may have longer length than dy. It is assumed that dy "starts"
122-
#' one time index after y "ends": that is, if y is of length T then
123-
#' dy[1] = y[T + 1] - y[T + 1 - ts_frequency]
143+
#' one time index after y "ends": that is, if y is of length T, d = 0, and
144+
#' D = 1 then dy[1] = y[T + 1] - y[T + 1 - ts_frequency]
124145
#'
125146
#' @return a time series object (of class 'ts')
126147
#'
127148
#' @export
128-
invert_seasonal_difference <- function(dy, y, ts_frequency) {
129-
return(ts(dy + y[length(y) + seq_along(dy) - ts_frequency],
130-
freq = ts_frequency))
149+
invert_difference <- function(dy, y, d, D, frequency) {
150+
for(i in seq_len(d)) {
151+
y_dm1 <- do_difference(y, d = d-i, D = D, frequency = frequency)
152+
dy_full <- c(y_dm1, dy)
153+
for(t in seq_len(length(dy))) {
154+
dy_full[length(y_dm1) + t] <- dy_full[length(y_dm1) + t - 1] + dy_full[length(y_dm1) + t]
155+
}
156+
dy <- dy_full[length(y_dm1) + seq_along(dy)]
157+
}
158+
159+
for(i in seq_len(D)) {
160+
y_dm1 <- do_difference(y, d = 0, D = D-i, frequency = frequency)
161+
dy_full <- c(y_dm1, dy)
162+
for(t in seq_len(length(dy))) {
163+
dy_full[length(y_dm1) + t] <- dy_full[length(y_dm1) + t - frequency] + dy_full[length(y_dm1) + t]
164+
}
165+
dy <- dy_full[length(y_dm1) + seq_along(dy)]
166+
}
167+
168+
# for(i in seq_len(D)) {
169+
# y_dm1 <- do_difference(y, d = 0, D = D-i, frequency = frequency)
170+
# dy <- dy + y_dm1[length(y_dm1) + seq_along(dy) - frequency]
171+
# }
172+
173+
return(ts(dy, frequency = frequency))
131174
}
132175

133176
#' Remove leading values that are infinite or missing, and replace all internal

Diff for: man/do_difference.Rd

+27
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: man/do_seasonal_difference.Rd

-23
This file was deleted.

Diff for: man/fit_sarima.Rd

+7-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: man/invert_difference.Rd

+35
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)