-
Notifications
You must be signed in to change notification settings - Fork 1
/
param-estimates-batch.R
207 lines (183 loc) · 7.58 KB
/
param-estimates-batch.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Batch Processing of Parameter Estimates in R
#' Batch Processing of Parameter Estimates
#'
#' Calls out to `bbi nonmem params` and returns a tibble of parameter estimates.
#' The primary reason for this function is for things like bootstraps or
#' simulations where parameter estimates for a large number of models (hundreds
#' or thousands) need to be pulled in together. In those cases, this function is
#' _much_ faster than using [model_summaries()] and [param_estimates()],
#' because it does _not_ parse all of the other information contained in a
#' `bbi_nonmem_summary` object.
#'
#' @details
#'
#' The tibble will always have columns `absolute_model_path`, `run`,
#' `error_msg`, and `termination_code`. All other column names are those of the
#' parameter estimates found in the `.ext` files detected in the directory
#' passed. Note: the passed directory is _not_ searched recursively. Passed
#' directory will be searched for model files and output directories for found
#' models will be searched for `.ext` files.
#'
#' `error_msg`: column will be `NA` if error is not detected. Column will contain message if known error is detected. The known errors
#' are
#' - `"no ext file contents"`: `.ext` file detected, but contains nothing
#' - `"no estimation output detected"`: `.ext` file detected, has contents, but is missing line `-1000000000` which are the final parameter estimates
#'
#' `termination_code`: parsed from line `-1000000007` of the `.ext`
#'
#' **parameter columns:** naming will be the same as names found in `.ext` file.
#' If `.ext` files within the directory contain differing parameter names, the
#' output tibble will show all parameter names across files and NA values for
#' the files where a given parameter is missing.
#'
#' @importFrom dplyr rename select mutate everything across
#' @importFrom data.table fread
#' @importFrom tibble as_tibble
#'
#' @param .path a path to a directory containing model sudirectories for batch parameter processing.
#' @param ... args passed through to [bbi_exec()]
#' @param .dry_run show what the command would be without actually running it
#' @export
param_estimates_batch <- function(.path,
...,
.dry_run = FALSE) {
assert_bbi_version(.min_version = "3.1.0", .function = "param_estimates_batch")
.path <- fs::path_abs(.path)
if (!dir_exists(.path)) {
err_msg <-
glue("param_estimates_batch('{.path}') failed; unable to locate {.path}")
stop(err_msg, call. = FALSE)
}
# path containing .ext file
ext_file_dir <- basename(.path)
# for bbi nonmem params, need to pass the directory containing .ext
# cd to the root directory containing directory with .ext files
.path <- dirname(.path)
cmd_args <- c("nonmem", "params", "--dir", ext_file_dir)
# if .dry_run return output call
if (.dry_run) {
res <- bbi_dry_run(cmd_args, .path)
return(res)
}
res <- tryCatch(
bbi_exec(cmd_args, .dir = .path, ..., .wait = TRUE),
error = function(e) {
err_msg <-
glue(
"param_estimates_batch('{.path}') failed with the following error. This may be because the modeling run has not finished successfully.\n\nERROR: \n{e}"
)
stop(err_msg, call. = FALSE)
}
)
df <- as_tibble(fread(text = res$stdout, header = TRUE))
# throw out extra column that gets created if no models succeeded
if ("V4" %in% names(df)) df <- select(df, -"V4")
# reformat parameter names to conform with `param_estimates()` output
names(df) <- gsub("\\(([0-9]+)_([0-9]+)\\)", "(\\1,\\2)", names(df))
# format and return
df %>%
rename(
absolute_model_path = "dir",
error_msg = "error",
termination_code = "termination"
) %>%
mutate(
across(everything(), ~ ifelse(. == "", NA, .)),
absolute_model_path = file.path(.path, .data$absolute_model_path),
run = basename(.data$absolute_model_path)
) %>%
select(
"absolute_model_path",
"run",
"error_msg",
"termination_code",
everything()
)
}
#' Compare parameter estimates
#'
#' Summarizes the parameter estimates from tibble like what's returned from
#' [param_estimates_batch()], showing you the quantiles passed to `probs`,
#' likely interpreted as confidence intervals for your parameter estimates.
#' Optionally takes an "original model" to compare these quantiles against.
#' Originally conceived for comparing a model to multiple runs of the "same"
#' model, for example in a bootstrap or simulation.
#'
#' @return A tibble with the first column containing the parameter names (that
#' were column names in `.param_df`), the second column optionally containing
#' original parameter estimates (if `.orig_mod` is passed), and subsequent
#' columns containing the quantiles specified in `probs`. You can think of
#' this as essentially `.param_df`, summarized by quantiles across all rows,
#' and then pivoted.
#'
#'
#' @param .param_df A tibble with columns for each parameter and rows for each
#' model (like what's returned from [param_estimates_batch()])
#' @param .orig_mod `bbi_model` object to compare `.param_df` against. If
#' `NULL`, the default, only returns quantiles from `.param_df`. If passed,
#' will display an additional `original_estimate` column.
#' @param .compare_cols An expression that can be passed to [dplyr::select()] to
#' select which columns in `.param_df` will be pivoted and summarized.
#' Defaults to `dplyr::starts_with(c("THETA", "SIGMA", "OMEGA"))` (designed
#' for NONMEM runs), but consider using [dplyr::matches()] to select columns
#' using regex. Also see
#' [?tidyselect::language](https://tidyselect.r-lib.org/reference/language.html)
#' for more details and options.
#' @param probs Numeric vector with values between 0 and 1 to be passed through to
#' [stats::quantile()]. Represents the quantiles to calculate for parameter
#' estimates in `.param_df`.
#' @param na.rm Logical scalar, passed through to [stats::quantile()].
#'
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr summarise across starts_with select rename left_join
#' @importFrom stats quantile
#' @export
param_estimates_compare <- function(
.param_df,
.orig_mod = NULL,
.compare_cols = starts_with(c("THETA", "SIGMA", "OMEGA")),
probs = c(.5, 0.025, 0.975),
na.rm = FALSE
) {
comp_df <- .param_df %>%
select({{ .compare_cols }})
if (!is.null(.orig_mod)) {
if (!inherits(.orig_mod, NM_SUM_CLASS)) {
.orig_mod <- model_summary(.orig_mod)
}
mod_df <- param_estimates(.orig_mod)
# check that param names match
n1 <- sort(mod_df$parameter_names)
n2 <- sort(names(comp_df))
.same <- suppressSpecificWarning(
all(n1 == n2),
"is not a multiple"
)
if (!.same) {
stop(paste(
glue("The models passed to `.param_df` and `.orig_mod` do not have the same parameters."),
glue(" `.param_df` parameter names: {paste(n2, collapse = ', ')}"),
glue(" `.orig_mod` parameter names: {paste(n1, collapse = ', ')}"),
sep = "\n"
))
}
}
# summarize comp to quantiles
quantile_fn <- function(x) {
quantile(x, probs = probs, na.rm = na.rm)
}
comp_df <- comp_df %>%
reframe(across(.cols = everything(), .fns = quantile_fn)) %>%
t() %>%
as.data.frame() %>%
rownames_to_column()
colnames(comp_df) <- c("parameter_names", paste0(probs * 100, "%"))
# join quantiles to original
if (!is.null(.orig_mod)) {
comp_df <- mod_df %>%
select("parameter_names", "estimate") %>%
rename(original_estimate = "estimate") %>%
left_join(comp_df, by = "parameter_names")
}
return(comp_df)
}