-
Notifications
You must be signed in to change notification settings - Fork 1
/
shortstacking.R
168 lines (157 loc) · 7.21 KB
/
shortstacking.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#' Predictions using Short-Stacking.
#'
#' @family utilities
#'
#' @description Predictions using short-stacking.
#'
#' @inheritParams crosspred
#' @param shortstack_y Optional vector of the outcome variable to form
#' short-stacking predictions for. Base learners are always trained on
#' \code{y}.
#'
#' @return \code{shortstack} returns a list containing the following components:
#' \describe{
#' \item{\code{oos_fitted}}{A matrix of out-of-sample predictions,
#' each column corresponding to an ensemble type (in chronological
#' order).}
#' \item{\code{weights}}{An array, providing the weight
#' assigned to each base learner (in chronological order) by the
#' ensemble procedures.}
#' \item{\code{is_fitted}}{When \code{compute_insample_predictions = T}.
#' a list of matrices with in-sample predictions by sample fold.}
#' \item{\code{auxilliary_fitted}}{When \code{auxilliary_X} is not
#' \code{NULL}, a list of matrices with additional predictions.}
#' \item{\code{oos_fitted_bylearner}}{A matrix of
#' out-of-sample predictions, each column corresponding to a base
#' learner (in chronological order).}
#' \item{\code{is_fitted_bylearner}}{When
#' \code{compute_insample_predictions = T}, a list of matrices with
#' in-sample predictions by sample fold.}
#' \item{\code{auxilliary_fitted_bylearner}}{When \code{auxilliary_X} is
#' not \code{NULL}, a
#' list of matrices with additional predictions for each learner.}
#' }
#' Note that unlike \code{crosspred}, \code{shortstack} always computes
#' out-of-sample predictions for each base learner (at no additional
#' computational cost).
#' @export
#'
#' @references
#' Ahrens A, Hansen C B, Schaffer M E, Wiemann T (2023). "ddml: Double/debiased
#' machine learning in Stata." \url{https://arxiv.org/abs/2301.09397}
#'
#' Wolpert D H (1992). "Stacked generalization." Neural Networks, 5(2), 241-259.
#'
#' @examples
#' # Construct variables from the included Angrist & Evans (1998) data
#' y = AE98[, "worked"]
#' X = AE98[, c("morekids", "age","agefst","black","hisp","othrace","educ")]
#'
#' # Compute predictions using shortstacking with base learners ols and lasso.
#' # Two stacking approaches are simultaneously computed: Equally
#' # weighted (ensemble_type = "average") and MSPE-minimizing with weights
#' # in the unit simplex (ensemble_type = "nnls1"). Predictions for each
#' # learner are also calculated.
#' shortstack_res <- shortstacking(y, X,
#' learners = list(list(fun = ols),
#' list(fun = mdl_glmnet)),
#' ensemble_type = c("average",
#' "nnls1",
#' "singlebest"),
#' sample_folds = 2,
#' silent = TRUE)
#' dim(shortstack_res$oos_fitted) # = length(y) by length(ensemble_type)
#' dim(shortstack_res$oos_fitted_bylearner) # = length(y) by length(learners)
shortstacking <- function (y, X, Z = NULL,
learners,
sample_folds = 2,
ensemble_type = "average",
custom_ensemble_weights = NULL,
compute_insample_predictions = FALSE,
subsamples = NULL,
silent = FALSE,
progress = NULL,
auxilliary_X = NULL,
shortstack_y = y) {
# Data parameters
nobs <- nrow(X)
nlearners <- length(learners)
# Throw error if no ensemble is estimated
calc_ensemble <- !("what" %in% names(learners))
if (!calc_ensemble) {
stop("shortstacking cannot be estimated with a single learner.")
}#IF
# Create sample fold tuple
if (is.null(subsamples)) {
subsamples <- generate_subsamples(nobs, sample_folds)
}#IF
sample_folds <- length(subsamples)
ncustom <- ncol(custom_ensemble_weights)
ncustom <- ifelse(is.null(ncustom), 0, ncustom)
nensb <- length(ensemble_type) + ncustom
# Compute out-of-sample predictions for each learner
res <- crosspred(y, X, Z,
learners = learners,
ensemble_type = "average",
compute_insample_predictions = compute_insample_predictions,
compute_predictions_bylearner = TRUE,
subsamples = subsamples,
silent = silent, progress = progress,
auxilliary_X = auxilliary_X)
# Compute ensemble weights via subsample cross-fitted residual
fakecv <- list()
fakecv$oos_resid <- kronecker(shortstack_y, t(rep(1, nlearners))) -
res$oos_fitted_bylearner
weights <- ensemble_weights(shortstack_y, X, learners = learners,
type = ensemble_type,
custom_weights = custom_ensemble_weights,
cv_results = fakecv)$weights
# Compute predictions
oos_fitted <- res$oos_fitted_bylearner %*% weights
# Compute auxilliary predictions (optional)
auxilliary_fitted <- rep(list(NULL), sample_folds)
if (!is.null(auxilliary_X)) {
for (k in 1:sample_folds) {
auxilliary_fitted[[k]] <- res$auxilliary_fitted_bylearner[[k]] %*% weights
}#FOR
}#if
# Compute in-sample predictions (optional)
is_fitted <- rep(list(NULL), sample_folds)
fakecv_k <- list()
if (compute_insample_predictions) {
for (k in 1:sample_folds) {
# Compute shortstacking weights in-sample
fakecv_k$oos_resid <- kronecker(y[-subsamples[[k]]], t(rep(1, nlearners))) -
res$is_fitted_bylearner[[k]]
weights_k <- ensemble_weights(y[-subsamples[[k]]], X[-subsamples[[k]], ],
learners = learners,
type = ensemble_type,
custom_weights = custom_ensemble_weights,
cv_results = fakecv_k)$weights
# Combine base learners
is_fitted[[k]] <- res$is_fitted_bylearner[[k]] %*% weights_k
}#FOR
# When multiple ensembles are computed, need to reorganize is_fitted
if (nensb > 1) {
# Loop over each ensemble type to creat list of is_fitted's
new_is_fitted <- rep(list(rep(list(1), sample_folds)), nensb)
for (i in 1:nensb) {
for (k in 1:sample_folds) {
new_is_fitted[[i]][[k]] <- is_fitted[[k]][, i, drop = F]
}#FOR
}#FOR
is_fitted <- new_is_fitted
}#IF
}#IF
# Compute mspe
mspe <- colMeans((kronecker(shortstack_y, t(rep(1, nensb))) - oos_fitted)^2)
# return shortstacking output
output <- list(oos_fitted = oos_fitted,
weights = weights, mspe = mspe,
is_fitted = is_fitted,
auxilliary_fitted = auxilliary_fitted,
oos_fitted_bylearner = res$oos_fitted_bylearner,
is_fitted_bylearner = res$is_fitted_bylearner,
auxilliary_fitted_bylearner = res$auxilliary_fitted_bylearner)
return(output)
}#SHORTSTACKING