-
Notifications
You must be signed in to change notification settings - Fork 1
/
optic-simulation-helper.R
299 lines (265 loc) · 13.1 KB
/
optic-simulation-helper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#------------------------------------------------------------------------------#
# OPTIC R Package Code Repository
# Copyright (C) 2023 by The RAND Corporation
# See README.md for information on usage and licensing
#------------------------------------------------------------------------------#
#' Create a configuration object used to run simulations
#'
#' @description Performs validation on inputs and produces a configuration object
#' that contains all required parameters to dispatch simulation runs for the empirical data
#' provided.
#'
#' @details The resulting configuration object is used to pass simulation scenarios to the 'simulate' function. Provided as a convenience function to the user
#' so they can investigate simulation arguments prior to running models.
#'
#' @param x Empirical data used to simulate synthetic datasets with specified treatment effect.
#' @param models List of `optic_model` objects that should be run for each iteration and simulation scenario.
#' The elements must be created using the `optic_model` function.
#' @param iters A numeric, specifying number of iterations for each simulation scenario.
#' @param unit_var A string variable, used to determine clusters for clustered standard errors.
#' @param time_var A string variable, specifying time units (e.g. "year", "time to treat", etc). Must be specified in terms of years (fractional years are accepted).
#' @param treat_var A string variable, referring to the unit-of-analysis for treatment (which may not be the same as the unit var argument, e.g. treated classrooms within clustered schools)
#' @param conf_var An unobserved confounding variable. Only used for the 'confound-method'.
#' @param prior_control Only used for confounding method. Adds an additional set of variables which control for the outcome in previous periods (either a moving average of previous time periods or an autoregressive term)
#' @param effect_magnitude A vector of numerics, specifying 'true' effect sizes for treatment scenarios. See vignette for more details. Synthetic datasets will be generated for each entry in the vector.
#' @param n_units A numeric, determining number of units to simulate treatment effects. Synthetic datasets will be generated for each entry in the vector.
#' @param effect_direction A vector containing either 'neg', 'null', or 'pos'. Determines the direction of the simulated effect. Synthetic datasets will be generated for each entry in the vector.
#' @param policy_speed A vector of strings, containing either 'instant' or 'slow' entries, determining how quickly treated units obtain the simulated effect. Synthetic datasets will be generated for each entry in the vector. Can either be 'instant" (so treatment effect applies fully in the first treated time period) or 'slow' (treatment effect ramps up linearly to the desired effect size, based on `n_implementation_periods`.
#' @param bias_type A string, either linear" or "nonlinear". Specifies type of bias for 'confounding' method
#' @param bias_size A string, either "small" "medium" or "large". Specifies relative size of bias for 'confounding' method.
#' @param n_implementation_periods A vector of numerics, determining number of periods after implementation until treated units reach the desired simulated treatment effect. Synthetic datasets will be generated for each entry in the vector.
#' @param rhos A vector of values between 0-1, indicating the correlation between the primary policy and a concurrent policy. Only applies when 'method' == 'concurrent'. Synthetic datasets will be generated for each entry in the vector.
#' @param years_apart A numeric, for number of years between the primary policy being implemented and the concurrent policy. Only applies when 'method' == 'concurrent'.
#' @param ordered A boolean, determines if the primary policy always occurs before the concurrent policy (`TRUE`) or if the policies are randomly ordered (`FALSE`).
#' @param method A string, determing the simulation method. Can be either 'no_confounding', 'confounding' or 'concurrent'
#' @param method_sample Underlying function for the sampling method to determine treatment status. Provided here for convenience so that the user does not need to modify the actual underlying function's script.
#' @param method_pre_model Similar to method_sample argument, this variable is provided as a convenience for the user. This function transforms the treatment effect, after it's simulated within the synthetic data.
#' @param method_model Another convenience function, which can be modified to control the model call.
#' @param method_post_model Another convenience function, which can be modified to control transformations to the simulated effect, after modeling.
#' @param method_results Another convenience function, which can be modified to control the simulation results that are returned.
#' @param verbose Boolean, default True. If TRUE, provides summary details on simulation runs across iterations
#' @param globals Additional globals to pass to the simulate function, such as parallelization packages or additional R packages used by method calls (e.g. modeling packages, like "FEOLS").
#' @returns An OpticSim object, which contains simulation and model parameters for simulation runs, which is used as an input for dispatch_simulations.
#' @examples
#'
#' # Load data for simulation and set up a hypothetical policy effect:
#'
#' data(overdoses)
#' eff <- 0.1*mean(overdoses$crude.rate, na.rm = TRUE)
#'
#' # Set up a simple linear model
#' form <- formula(crude.rate ~ state + year + population + treatment_level)
#' mod <- optic_model(name = 'lin',
#' type = 'reg',
#' call = 'lm',
#' formula = form,
#' se_adjust = 'none')
#'
#' # Create simulation object, with desired parameters for simulations:
#' sim <- optic_simulation(x = overdoses,
#' models = list(mod),
#' method = 'no_confounding',
#' unit_var = 'state',
#' treat_var = 'state',
#' time_var = 'year',
#' effect_magnitude = list(eff),
#' n_units = 10,
#' effect_direction = 'pos',
#' iters = 10,
#' policy_speed = 'instant',
#' n_implementation_periods = 1)
#'
#' @export
#'
#' @importFrom purrr cross
#' @importFrom purrr transpose
#'
optic_simulation <- function(x, models, iters,
unit_var, time_var,
conf_var,
effect_magnitude,
n_units,
effect_direction,
policy_speed,
prior_control = "level", # level should be levels
bias_size = NULL,
bias_type = NULL,
treat_var = NULL,
n_implementation_periods,
rhos = NULL, years_apart = NULL, ordered = NULL,
method,
method_sample, method_model, method_results,
method_pre_model, method_post_model,
globals=NULL, verbose=TRUE) {
###
# VALIDATION
###
# all model list elements must contain at least model_call and model_formula args,
# optional model_args - nothing else
# also should be named, but if not apply names: model1, model2, model3, ...
for (m in models) {
if (!"name" %in% names(m)) {
stop("model must contain `name` named element")
}
if (!"model_call" %in% names(m)) {
stop("model must contain `model_call` named element")
}
if (!"model_formula" %in% names(m)) {
stop("model must contain `model_formula` named element")
}
}
for (i in 1:length(models)) {
names(models)[i] <- models[[i]][["name"]]
}
# iters should be integer
iters <- as.integer(iters)
# Start setting the default methods:
if(method == "concurrent") {
d_method_sample= concurrent_sample
d_method_pre_model= concurrent_premodel
d_method_model= concurrent_model
d_method_post_model= concurrent_postmodel
d_method_results= concurrent_results
# Check method-specific inputs:
stopifnot(is.numeric(years_apart))
stopifnot(is.numeric(rhos))
stopifnot(is.logical(ordered))
} else if (method == "no_confounding") {
#stopifnot(is.character(treat_var))
d_method_sample = noconf_sample
d_method_pre_model = noconf_premodel
d_method_model = noconf_model
d_method_post_model = noconf_postmodel
d_method_results = noconf_results
} else if (method == "confounding") {
# Note: Add these new parameters when incorporating the confounding runs.
stopifnot(bias_type %in% c("linear","nonlinear"))
stopifnot(is.character(conf_var))
stopifnot(length(conf_var) == 1)
stopifnot(conf_var %in% names(x))
stopifnot(bias_size %in% c("small","medium","large"))
#stop("Confounding is currently not implemented.")
# Here, assign confounding methods once they are implemented
# Uncomment once we bring the confounding code to the package.
d_method_sample = selbias_sample
d_method_pre_model = selbias_premodel
d_method_model = selbias_model
d_method_post_model = selbias_postmodel
d_method_results = selbias_results
} else {
stop("method argument must be either no_confounding, confounding, or concurrent")
}
# Choose among default methods or user-provided methods.
if(missing(method_sample)) {
method_sample <- d_method_sample
}
if(missing(method_pre_model)) {
method_pre_model <- d_method_pre_model
}
if(missing(method_model)) {
method_model <- d_method_model
}
if(missing(method_post_model)) {
method_post_model <- d_method_post_model
}
if(missing(method_results)) {
method_results <- d_method_results
}
# confirm functions with arg of single_simulation
if (!is.function(method_sample)) {
stop("`method_sample` must be of class 'function'")
}
if (!is.function(method_model)) {
stop("`method_model` must be of class 'function'")
}
if (!is.function(method_results)) {
stop("`method_results` must be of class 'function'")
}
if (!is.function(method_sample)) {
stop("`method_sample` must be of class 'function'")
}
if (!is.null(method_pre_model)) {
if (!is.function(method_pre_model)) {
stop("`method_pre_model` must be of class 'function'")
}
}
if (!is.null(method_post_model)) {
if (!is.function(method_post_model)) {
stop("`method_post_model` must be of class 'function'")
}
}
# check parameters
# these checks are likely very stringent and could be relaxed for some
# combinations of inputs
stopifnot(prior_control %in% c("level", "trend"))
stopifnot(is.character(unit_var), length(unit_var) == 1)
stopifnot(is.character(time_var), length(time_var) == 1)
stopifnot(is.list(effect_magnitude))
stopifnot(is.numeric(n_units))
stopifnot(all(effect_direction %in% c("null", "neg", "pos")))
stopifnot(all(policy_speed %in% c("instant", "slow")))
stopifnot(is.numeric(n_implementation_periods))
# Crete list with mandatory parameters
params <- list(unit_var = unit_var,
time_var = time_var,
effect_magnitude = effect_magnitude,
n_units = n_units,
effect_direction = effect_direction,
policy_speed = policy_speed,
n_implementation_periods = n_implementation_periods,
prior_control = prior_control)
# Add method-specific parameters if they are provided:
if(!missing(rhos)) {
params$rhos <- rhos
}
if(!missing(years_apart)) {
params$years_apart <- years_apart
}
if(!missing(ordered)) {
params$ordered <- ordered
}
if(!missing(treat_var)) {
params$treat_var <- treat_var
}
if(!missing(conf_var)) {
params$conf_var <- conf_var
}
if(!missing(bias_size)) {
params$bias_size <- bias_size
}
if(!missing(bias_type)) {
params$bias_type <- bias_type
}
# Compute prior_control variables:
first_model_outcome <- get_model_outcome(models[[1]])
x <- compute_prior_controls(data = x,
unit_var = params$unit_var,
time_var = params$time_var,
outcome_var = first_model_outcome)
###
# create a OpticSim object
# It is difficult to
###
conf <- OpticSim$new(
data=x,
models=models,
iters=iters,
params=params,
globals=globals,
method_sample=method_sample,
method_pre_model=method_pre_model,
method_model=method_model,
method_post_model=method_post_model,
method_results=method_results
)
if (verbose) {
# let user know combinations and total individual models
combs <- length(models) * length(purrr::cross(params))
runs <- combs * iters
print(conf)
if (runs > 40000) {
cat("hey, that's a lot of iterations! we recommend using the parallel options when dispatching this job.\n")
}
}
return(conf)
}