-
Notifications
You must be signed in to change notification settings - Fork 13
/
utils.R
147 lines (128 loc) Β· 4.21 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#' Wald-Style Confidence Intervals
#'
#' @importFrom stats qnorm
#'
#' @keywords internal
#
wald_ci <- function(est, se, level = 0.95, q = NULL) {
if (is.null(q)) {
q <- abs(stats::qnorm(p = (1 - level) / 2))
}
ci_low <- est - q * se
ci_high <- est + q * se
return(cbind(ci_low, ci_high))
}
#' Summarize Estimates
#'
#' Generates a \code{data.table} summarizing results with inference
#'
#' @importFrom stats var cov
#' @importFrom mvtnorm qmvnorm
#'
#' @param task \code{tmle3_Task} containing the observed data of interest; the
#' same as that passed to ..
#' @param estimates \code{list}, TMLE estimates of parameter and ICs from
#' \code{\link{tmle3_Fit}$estimates}
#' @param param_types the types of the parameters being estimated
#' @param param_names the names of the parameters being estimated
#' @param init_psi the names of the parameters being estimated
#' @param simultaneous_ci if TRUE, calculate simulatenous confidence intervals
#'
#' @return \code{data.table} summarizing results
#'
#' @export
#
summary_from_estimates <- function(task, estimates, param_types = NULL,
param_names = NULL, init_psi = NULL,
simultaneous_ci = FALSE) {
psi <- unlist(lapply(estimates, `[[`, "psi"))
IC <- lapply(estimates, `[[`, "IC")
IC <- do.call(cbind, IC)
# for repeated measures, average IC values to get subject-level IC values
if (length(unique(task$id)) < length(task$id)) {
combined <- (by(IC, as.numeric(task$id), colMeans, simplify = FALSE))
IC <- do.call(rbind, combined)
}
var_D <- cov(IC)
n <- nrow(IC)
se <- sqrt(diag(var_D) / n)
level <- 0.95
if (simultaneous_ci && (ncol(IC) > 1)) {
rho_D <- var_D / sqrt(tcrossprod(diag(var_D)))
q <- qmvnorm(level, tail = "both", corr = rho_D)$quantile
} else {
q <- abs(stats::qnorm(p = (1 - level) / 2))
}
ci <- wald_ci(psi, se, q = q)
if (is.null(param_types)) {
param_types <- rep(as.character(NA), length(estimates))
}
if (is.null(param_names)) {
param_names <- rep(as.character(NA), length(estimates))
}
if (is.null(init_psi)) {
init_psi <- rep(as.double(NA), length(estimates))
}
transforms <- sapply(estimates, function(estimate) {
if (!is.null(estimate$transform)) {
return(estimate$transform)
} else {
return(identity)
}
})
apply_transform <- function(x, transform) {
transform(x)
}
psi_lengths <- sapply(lapply(estimates, `[[`, "psi"), length)
index_vec <- rep(seq_along(psi_lengths), psi_lengths)
psi_transformed <- mapply(apply_transform, psi, transforms[index_vec])
ci_transformed <- mapply(apply_transform, ci, transforms[index_vec])
ci_transformed <- matrix(ci_transformed, nrow = nrow(ci), ncol = ncol(ci))
summary_dt <- as.data.table(list(
param_types[index_vec],
param_names, init_psi, psi, se, ci,
psi_transformed, ci_transformed
))
setnames(summary_dt, c(
"type", "param", "init_est", "tmle_est", "se", "lower",
"upper", "psi_transformed", "lower_transformed",
"upper_transformed"
))
return(summary_dt)
}
#' Get Empirical Mean of EIFs from Estimates
#' @param estimates a list of estimates objects
ED_from_estimates <- function(estimates) {
IC <- lapply(estimates, `[[`, "IC")
IC <- do.call(cbind, IC)
ED <- colMeans(IC)
return(ED)
}
#' Bound (Truncate) Likelihoods
#' @param x the likelihood values to bound
#' @param bounds Either a length two vector of c(lower,upper) or a lower bound, where the upper is then 1 - lower
#' @export
bound <- function(x, bounds) {
lower <- bounds[[1]]
if (length(bounds) > 1) {
upper <- bounds[[2]]
} else {
upper <- 1 - lower
}
pmin(pmax(x, lower), upper)
}
#' Manually Train Likelihood Factor
#' The internal training process for likelihood factors is somewhat obtuse, so this function
#' does the steps to manually train one, which is helpful if you want to use a likelihood factor
#' independently of a likelihood object
#' @param lf the likelihood factor to train
#' @param tmle_task the task to use for training
#' @export
train_lf <- function(lf, tmle_task) {
lf_fit <- lf$delayed_train(tmle_task)
if (inherits(lf_fit, "Delayed")) {
lf_fit <- lf_fit$compute()
}
lf$train(tmle_task, lf_fit)
return(lf)
}