-
-
Notifications
You must be signed in to change notification settings - Fork 82
/
tidy-params.R
231 lines (225 loc) · 8.71 KB
/
tidy-params.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#' Tidy parameter selection
#'
#' Parameter selection in the style of **dplyr** and other tidyverse packages.
#'
#' @name tidy-params
#'
#' @details
#' As of version `1.7.0`, **bayesplot** allows the `pars` argument for [MCMC
#' plots][bayesplot::MCMC-overview] to use "tidy" variable selection (in the
#' style of the **dplyr** package). The [`vars()`][dplyr::vars] function is
#' re-exported from **dplyr** for this purpose.
#'
#' Features of tidy selection includes direct selection (`vars(alpha, sigma)`),
#' everything-but selection (`vars(-alpha)`), ranged selection
#' (``vars(`beta[1]`:`beta[3]`)``), support for selection functions
#' (`vars(starts_with("beta"))`), and combinations of these features. See the
#' **Examples** section, below.
#'
#' When using `pars` for tidy parameter selection, the `regex_pars` argument is
#' ignored because **bayesplot** supports using [tidyselect helper
#' functions][tidyselect::select_helpers] (`starts_with()`, `contains()`,
#' `num_range()`, etc.) for the same purpose. **bayesplot** also exports some
#' additional helper functions to help with parameter selection:
#'
#' * `param_range()`: like [`num_range()`][tidyselect::num_range] but used
#' when parameter indexes are in brackets (e.g. `beta[2]`).
#'
#' * `param_glue()`: for more complicated parameter names with multiple
#' indexes (including variable names) inside the brackets
#' (e.g., `beta[(Intercept) age_group:3]`).
#'
#' These functions can be used inside of `vars()`, `dplyr::select()`,
#' and similar functions, just like the
#' [tidyselect helper functions][tidyselect::select_helpers].
#'
#' @section Extra Advice:
#'
#' Parameter names in `vars()` are not quoted. When the names contain special
#' characters like brackets, they should be wrapped in backticks, as in
#' ``vars(`beta[1]`)``.
#'
#' To exclude a range of variables, wrap the sequence in parentheses and then
#' negate it. For example, (``vars(-(`beta[1]`:`beta[3]`))``) would exclude
#' `beta[1]`, `beta[2]`, and `beta[3]`.
#'
#' `vars()` is a helper function. It holds onto the names and expressions used
#' to select columns. When selecting variables inside a **bayesplot**
#' function, use `vars(...)`: `mcmc_hist(data, pars = vars(alpha))`. When
#' using `select()` to prepare a dataframe for a **bayesplot** function, do
#' not use `vars()`: `data %>% select(alpha) %>% mcmc_hist()`.
#'
#' Internally, tidy selection works by converting names and expressions
#' into position numbers. As a result, integers will select parameters;
#' `vars(1, 3)` selects the first and third ones. We do not endorse this
#' approach because positions might change as variables are added and
#' removed from models. To select a parameter that happens to be called `1`,
#' use backticks to escape it ``vars(`1`)``.
#'
#' @seealso [glue::glue()]
#'
#' @examples
#' x <- example_mcmc_draws(params = 6)
#' dimnames(x)
#' mcmc_hex(x, pars = vars(alpha, `beta[2]`))
#' mcmc_dens(x, pars = vars(sigma, contains("beta")))
#' mcmc_hist(x, pars = vars(-contains("beta")))
#'
#' # using the param_range() helper
#' mcmc_hist(x, pars = vars(param_range("beta", c(1, 3, 4))))
#'
#' \donttest{
#' #############################
#' ## Examples using rstanarm ##
#' #############################
#' if (requireNamespace("rstanarm", quietly = TRUE)) {
#' # see ?rstanarm::example_model
#' fit <- example("example_model", package = "rstanarm", local=TRUE)$value
#' print(fit)
#' posterior <- as.data.frame(fit)
#' str(posterior)
#'
#' color_scheme_set("brightblue")
#' mcmc_hist(posterior, pars = vars(size, contains("period")))
#'
#' # same as previous but using dplyr::select() and piping
#' library("dplyr")
#' posterior %>%
#' select(size, contains("period")) %>%
#' mcmc_hist()
#'
#' mcmc_intervals(posterior, pars = vars(contains("herd")))
#' mcmc_intervals(posterior, pars = vars(contains("herd"), -contains("Sigma")))
#'
#' bayesplot_theme_set(ggplot2::theme_dark())
#' color_scheme_set("viridisC")
#' mcmc_areas_ridges(posterior, pars = vars(starts_with("b[")))
#'
#' bayesplot_theme_set()
#' color_scheme_set("purple")
#' not_789 <- vars(starts_with("b["), -matches("[7-9]"))
#' mcmc_intervals(posterior, pars = not_789)
#'
#' # using the param_glue() helper
#' just_149 <- vars(param_glue("b[(Intercept) herd:{level}]", level = c(1,4,9)))
#' mcmc_intervals(posterior, pars = just_149)
#'
#' # same but using param_glue() with dplyr::select()
#' # before passing to bayesplot
#' posterior %>%
#' select(param_glue("b[(Intercept) herd:{level}]",
#' level = c(1, 4, 9))) %>%
#' mcmc_intervals()
#' }
#'}
NULL
# re-export vars for tidy parameter selection
#' @importFrom dplyr vars
#' @export
dplyr::vars
#' @rdname tidy-params
#' @export
#' @param vars `NULL` or a character vector of parameter names to choose from.
#' This is only needed for the atypical use case of calling the function as a
#' standalone function outside of `vars()`, `select()`, etc. Typically this is
#' left as `NULL` and will be set automatically for the user.
#' @param prefix,range For `param_range()` only, `prefix` is a string naming a
#' parameter and `range` is an integer vector providing the indices of a
#' subset of elements to select. For example, using
#'
#' param_range("beta", c(1,2,8))
#'
#' would select parameters named `beta[1]`, `beta[2]`, and `beta[8]`.
#' `param_range()` is only designed for the case that the indices are integers
#' surrounded by brackets. If there are no brackets use
#' [num_range()][tidyselect::select_helpers].
#'
param_range <- function(prefix, range, vars = NULL) {
if (!is.null(vars) && !is.character(vars)) {
abort("'vars' must be NULL or a character vector.")
}
nms <- paste0(prefix, "[", range, "]")
param_matches <- match(nms, vars %||% tidyselect::peek_vars())
param_matches[!is.na(param_matches)]
}
#' @rdname tidy-params
#' @export
#' @param pattern,... For `param_glue()` only, `pattern` is a string containing
#' expressions enclosed in braces and `...` should be named arguments
#' providing one character vector per expression in braces in `pattern`. It is
#' easiest to describe how to use these arguments with an example:
#'
#' param_glue("beta_{var}[{level}]",
#' var = c("age", "income"),
#' level = c(3,8))
#'
#' would select parameters with names
#' `"beta_age[3]"`, `"beta_income[3]"`, `"beta_age[8]"`, `"beta_income[8]"`.
#'
#' @examples
#' \dontrun{
#' ###################################
#' ## More examples of param_glue() ##
#' ###################################
#' library(dplyr)
#' posterior <- tibble(
#' b_Intercept = rnorm(1000),
#' sd_condition__Intercept = rexp(1000),
#' sigma = rexp(1000),
#' `r_condition[A,Intercept]` = rnorm(1000),
#' `r_condition[B,Intercept]` = rnorm(1000),
#' `r_condition[C,Intercept]` = rnorm(1000),
#' `r_condition[A,Slope]` = rnorm(1000),
#' `r_condition[B,Slope]` = rnorm(1000)
#' )
#' posterior
#'
#' # using one expression in braces
#' posterior %>%
#' select(
#' param_glue("r_condition[{level},Intercept]", level = c("A", "B"))
#' ) %>%
#' mcmc_hist()
#'
#' # using multiple expressions in braces
#' posterior %>%
#' select(
#' param_glue(
#' "r_condition[{level},{type}]",
#' level = c("A", "B"),
#' type = c("Intercept", "Slope"))
#' ) %>%
#' mcmc_hist()
#'}
param_glue <- function(pattern, ..., vars = NULL) {
if (!is.null(vars) && !is.character(vars)) {
abort("'vars' must be NULL or a character vector.")
}
dots <- as.list(expand.grid(...))
nms <- as.character(glue::glue_data(dots, pattern))
param_matches <- match(nms, vars %||% tidyselect::peek_vars())
param_matches[!is.na(param_matches)]
}
# internal ----------------------------------------------------------------
#' Internal function for tidy parameter selection
#'
#' This function is called internally by `prepare_mcmc_array()` if the user's
#' `pars` argument is a quosure.
#'
#' @noRd
#' @param complete_pars A character vector of *all* parameter names.
#' @param pars_list A list of columns generated by `vars()`.
#' @return Character vector of selected parameter names.
tidyselect_parameters <- function(complete_pars, pars_list) {
# We use the list of helpers so that we don't have to keep track of any
# changes to tidyselect. We use `env_bury()`` so that the definitions of
# selection helpers are available. This pattern is taken from the example code
# in `vars_select_helpers`.
helpers <- tidyselect::vars_select_helpers
pars_list <- lapply(pars_list, rlang::env_bury, !!! helpers)
selected <- tidyselect::vars_select(.vars = complete_pars, !!! pars_list)
if (!length(selected)) {
abort("No parameters were found matching those names.")
}
unname(selected)
}