/
fitted_flocker.R
319 lines (297 loc) · 11.2 KB
/
fitted_flocker.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#' Posterior predictive distributions for modeled probabilities
#'
#' Get expected values of the posterior predictive distribution for the modeled
#' probabilities (occupancy, detection, colonization, extinction, autologistic).
#'
#' The probabilities returned are conditional probabilities (e.g. detection
#' conditional on occupancy, colonization conditional on previous
#' non-occupancy, etc). These probabilities are not conditioned on the
#' observed histories (e.g. the occupancy probability is not fixed to one
#' at sites with a detection; it is estimated only based on the covariates).
#' @param flocker_fit A flocker_fit object.
#' @param components a character vector specifying one or more of "occ",
#' "det", "col", "ex", "auto", and "Omega" for which to obtain fitted values.
#' @param new_data Optional new data at which to evaluate occupancy predictions.
#' New data can be passed as a flocker_data object produced by
#' \code{make_flocker_data} or as a dataframe with one row per desired
#' prediction. If `NULL` (the default) expected values are generated for the
#' original data as formatted by make_flocker_data.
#' @param summarise if TRUE, return the expected value and upper and lower bound
#' of the credible interval, otherwise return posterior draws.
#' @param CI A vector of length 2 specifying the upper and lower bounds of the
#' credible interval.
#' @param draw_ids Vector of indices of the posterior draws to be
#' used. If `NULL` (the default) all draws are used in their native order.
#' @param response Should results be returned on the response or logit scale?
#' Defaults to `TRUE`, i.e. response scale. However, the autologistic
#' parameter is not interpretable as a probability and is always returned
#' on the logit scale regardless of the value of `response`
#' @param re_formula formula containing group-level effects to be considered in
#' the prediction. If `NULL` (default), include all group-level effects; if
#' NA, include no group-level effects.
#' @param allow_new_levels allow new levels for random effect terms in `new_data`?
#' Will error if set to `FALSE` and new levels are provided in `new_data`.
#' @param sample_new_levels If new_data is provided and contains random effect
#' levels not present in the original data, how should predictions be
#' handled? Passed directly to `brms::prepare_predictions`, which see.
#' @param unit_level Logical; defaults to FALSE. Relevant only when `new_data`
#' is not a dataframe (i.e. it is `NULL` or a flocker_data object), and useful
#' only for multiseason models with missing seasons. If FALSE, returns in the
#' shape of the observation matrix/array with NAs for missing visits. If
#' TRUE, returns in the shape of the first visit, and returns values for all
#' units that are not part of a trailing block of never-visited units,
#' including never-visited units that are part of series with subsequent
#' visits.
#' @return A list of sets of expected values (one per component). If `new_data`
#' is a dataframe, each element contains one row per row of `new_data`.
#' Otherwise, returns in the shape of the observation matrix/array used
#' to format the flocker_data (but see `unit_level` parameter for further
#' details).
#' @export
#' @examples
#' \donttest{
#' fitted_flocker(
#' example_flocker_model_single,
#' summarise = TRUE
#' )
#' }
fitted_flocker <- function(
flocker_fit,
components = c("occ", "det", "col", "ex", "auto", "Omega"),
new_data = NULL, unit_level = FALSE,
summarise = FALSE, CI = c(.05, .95), draw_ids = NULL,
response=TRUE, re_formula = NULL, allow_new_levels = FALSE,
sample_new_levels = "uncertainty") {
assertthat::assert_that(
is_flocker_fit(flocker_fit),
msg = "flocker_fit is corrupt or is not a flocker fit."
)
assertthat::assert_that(
is_one_logical(unit_level),
msg = "unit_level must be a single logical value"
)
assertthat::assert_that(
is_one_logical(summarise),
msg = "summarise must be a single logical value"
)
if(summarise){
assertthat::assert_that(
is.numeric(CI),
msg = "CI must be numeric"
)
assertthat::assert_that(
length(CI) == 2,
msg = "CI must be of length 2"
)
assertthat::assert_that(
all(CI >= 0) & all(CI <= 1),
msg = "CI must be between zero and one inclusive"
)
}
ndraws_total <- brms::ndraws(flocker_fit)
if(!is.null(draw_ids)) {
assertthat::assert_that(
max(draw_ids) <= ndraws_total,
msg = "A requested draw id is greater than the number of draws contained in flocker_fit"
)
} else {
draw_ids <- seq_len(brms::ndraws(flocker_fit))
}
if(!is.null(new_data)) {
assertthat::assert_that(
isTRUE(class(new_data) == "data.frame") | isTRUE(is_flocker_data(new_data)),
msg = "new_data must be NULL, a dataframe, or a flocker_data object"
)
if(is_flocker_data(new_data)) {
assertthat::assert_that(
new_data$type == attributes(flocker_fit)$data_type,
msg = "new_data is formatted for a different model type than flocker_fit"
)
}
}
model_type <- type_flocker_fit(flocker_fit)
relevant_components <- params_by_type[[model_type]]
if("col" %in% components){components[components == "col"] <- "colo"}
use_components <- components[components %in% relevant_components]
assertthat::assert_that(
length(use_components) > 0,
msg = "none of the requested components is relevant to the supplied model type"
)
old_data <- flocker_fit$data
if(is.null(new_data)) { # new data not provided
new_data_fmtd <- old_data
} else if(is_flocker_data(new_data)) {
new_data_fmtd <- new_data$data
} else {
new_data_fmtd <- new_data
}
# Handle models fitted via 0 + Intercept syntax with newdata
if(
("Intercept" %in% names(flocker_fit$data)) &
!("Intercept" %in% names(new_data_fmtd))){
new_data_fmtd$Intercept <- 1
}
f_form <- flocker_fit$formula
component_list <- list()
if("occ" %in% use_components) {
occ_covs <- all.vars(f_form[[2]]$occ[[3]])
linpred_occ <- t(
brms::posterior_linpred(
flocker_fit, dpar = "occ", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = occ_covs
)
)
if(response){
linpred_occ <- boot::inv.logit(linpred_occ)
}
component_list$linpred_occ <- linpred_occ
}
if("colo" %in% use_components) {
colo_covs <- all.vars(f_form[[2]]$colo[[3]])
linpred_colo <- t(
brms::posterior_linpred(
flocker_fit, dpar = "colo", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = colo_covs
)
)
if(response){
linpred_colo <- boot::inv.logit(linpred_colo)
}
component_list$linpred_col <- linpred_colo
}
if("ex" %in% use_components) {
ex_covs <- all.vars(f_form[[2]]$ex[[3]])
linpred_ex <- t(
brms::posterior_linpred(
flocker_fit, dpar = "ex", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = ex_covs
)
)
if(response){
linpred_ex <- boot::inv.logit(linpred_ex)
}
component_list$linpred_ex <- linpred_ex
}
if("auto" %in% use_components) {
auto_covs <- all.vars(f_form[[2]]$autologistic[[3]])
linpred_auto <- t(
brms::posterior_linpred(
flocker_fit, dpar = "autologistic", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = auto_covs
)
)
component_list$linpred_auto <- linpred_auto
}
if("det" %in% use_components) {
det_covs <- all.vars(f_form[[1]][[3]])
linpred_det <- t(
brms::posterior_linpred(
flocker_fit, dpar = "mu", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = det_covs
)
)
if(response){
linpred_det <- boot::inv.logit(linpred_det)
}
component_list$linpred_det <- linpred_det
}
if("Omega" %in% use_components) {
Omega_covs <- all.vars(f_form[[2]]$Omega[[3]])
linpred_Omega <- t(
brms::posterior_linpred(
flocker_fit, dpar = "Omega", draw_ids = draw_ids, newdata = new_data_fmtd,
re_formula = re_formula, allow_new_levels = allow_new_levels,
sample_new_levels = sample_new_levels,
req_vars = Omega_covs
)
)
if(response){
linpred_Omega <- boot::inv.logit(linpred_Omega)
}
component_list$linpred_Omega <- linpred_Omega
}
if(summarise) {
cl2 <- lapply(component_list, summarise_fun, CI = CI)
} else {
cl2 <- component_list
}
if(is.null(new_data)) {
gp <- get_positions(flocker_fit, unit_level = unit_level)
out <- lapply(cl2, reshape_fun, gp = gp)
} else if (is_flocker_data(new_data)) {
gp <- get_positions(new_data, unit_level = unit_level)
out <- lapply(cl2, reshape_fun, gp = gp)
} else {
out <- cl2
}
dt <- attributes(flocker_fit)$data_type
if(is.null(new_data) | is_flocker_data(new_data)) {
dn <- list(
paste0("site_", seq_len(dim(out[[1]])[1]))
)
if(!unit_level) {
dn <- append(dn, list(paste0("visit_", seq_len(dim(out[[1]])[2]))))
multi_ind <- 3
} else {
multi_ind <- 2
}
if(dt == "augmented"){
dn <- append(dn, list(paste0("species_", seq_len(dim(out[[1]])[multi_ind]))))
}
if(dt == "multi"){
dn <- append(dn, list(paste0("timestep_", seq_len(dim(out[[1]])[multi_ind]))))
}
} else {
dn <- list(
paste0("row_", seq_len(dim(out[[1]])[1]))
)
}
if(summarise){
dn <- append(dn, list(c("mean", paste0("Q", c(min(CI)*100, paste0(max(CI)*100))))))
} else {
dn <- append(dn, list(paste0("draw_", draw_ids)))
}
for(i in seq_along(out)){
dimnames(out[[i]]) <- dn
}
out
}
#' summarize matrix of linear predictors to its mean and CI
#' @param x linpreds
#' @param CI fractional bounds of credible interval between 0 and 1 inclusive
#' @return summary
#' @noRd
summarise_fun <- function(x, CI) {
out <- data.frame(estimate = matrixStats::rowMeans2(x),
lwr = matrixStats::rowQuantiles(x, probs = min(CI)),
upr = matrixStats::rowQuantiles(x, probs = max(CI)))
names(out)[2:3] <- paste0("Q", c(min(CI)*100, paste0(max(CI)*100)))
out
}
#' reshape a matrix of values into a stack of arrays via get_positions
#' @param x input_matrix
#' @param gp output of get_positions
#' @noRd
reshape_fun <- function(x, gp) {
assertthat::assert_that(is.matrix(x) | is.data.frame(x))
ai <- list()
for(i in seq_len(ncol(x))) {
ai[[i]] <- array(x[,i][gp], dim = dim(gp))
}
if(ncol(x) > 1) {
arr_dim <- length(dim(ai[[1]]))
return(abind::abind(ai, along = arr_dim + 1))
} else {
return(ai[[1]])
}
}