-
Notifications
You must be signed in to change notification settings - Fork 0
/
rmtl.R
369 lines (360 loc) · 15.1 KB
/
rmtl.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#' Estimate Restricted Mean Time Lost (RMTL) and its Difference
#'
#' The method of Connor and Trinquart (Stat Med 2021) estimates the RMTL in the
#' presence of competing risks. This function is a modification of their R code.
#' Estimation functions are identical; input handling and output have been
#' adapted.
#'
#' @param data Data frame (tibble).
#' @param exposure Optional. Name of exposure variable within the \code{data}.
#' If omitted, will return unstratified RMTL.
#' @param time Name of time variable within the \code{data}. By default, this
#' is the event time. If \code{time2} is also given, this is the entry time,
#' as per \code{\link[survival]{Surv}} standard.
#' @param time2 Optional. Name of the variable within the \code{data}
#' indicating the exit (event) time. If omitted, \code{time} is the event
#' time, as per \code{\link[survival]{Surv}} standard.
#' @param event Name of event variable within the \code{data}. Can be integer,
#' factor, character, or logical. The reference level indicates censoring and
#' is taken as the first level of the factor, the lowest numeric value
#' (usually \code{0}), or \code{FALSE} for a logical variables. Other levels
#' are events of different types. If no competing risks, the alternative
#' level, e.g. \code{1}, indicates the event (e.g., death).
#' @param event_of_interest Optional. Indicator for which of the non-censoring
#' events is of main interest. Others are treated as competing. Defaults to
#' \code{1}, i.e., the first non-censoring event.
#' @param weight Optional. Weights, such as inverse-probability weights or
#' survey weights. The default (\code{NULL}) uses equal weights for all
#' observations.
#' @param tau Optional. Time horizon to restrict event times. By default, the
#' latest time in the group with the shortest follow-up. Prefer a
#' user-defined, interpretable time horizon.
#' @param reach_tau Optional. How to handle provided
#' \code{tau} values that are not reached in one of the exposure categories.
#'
#' * \code{"warn"} Default. Display a warning and estimate RMTL and its
#' difference in those exposure categories where \code{tau} is reached.
#' * \code{"stop"} Stop with an error if \code{tau} not reached in one or more
#' of the exposure categories.
#' * \code{"ignore"} Ignore exposure categories where \code{tau} is not
#' reached; estimate RMTL and its difference in those exposure categories
#' where \code{tau} is reached.
#'
#' If \code{tau} is not reached in any of the exposure categories, the
#' function always stops with an error.
#' @param conf.level Optional. Confidence level. Defaults to \code{0.95}.
#'
#' @details
#' Differences to the original function rmtl::rmtl():
#' * Convert \code{\link{print}} for errors to \code{\link{stop}} or
#' \code{\link{warning}}.
#' * Pass data as a data frame/tibble and select variables from it.
#' * Allow for different variable types in the \code{event} variable.
#' * Use \code{\link[survival]{Surv}} conventions for entry/exit times
#' (\code{time}, \code{time2}).
#' * Allow for exposure groups where \code{tau} is not reached.
#' * Return contrasts in restricted mean time lost as comparisons to the
#' reference level instead of all pairwise comparisons.
#' * Add origin (time 0, cumulative incidence 0) to the returned cumulative
#' incidence function for proper plotting.
#' * Return results as a list of tibbles. Do not print or plot.
#'
#' @return List:
#' * \code{tau} Time horizon.
#' * \code{rmtl} Tibble with absolute estimates of RMTL, per exposure group if
#' given.
#' * \code{rmtdiff} Tibble with contrasts of RMTL between exposure groups,
#' compared to a common reference (the first level). Empty tibble if no
#' exposure variable given.
#' * \code{cif} Tibble with cumulative incidence function for the event of
#' interest:
#' * \code{exposure} Exposure group.
#' * \code{time} Event time.
#' * \code{estimate} Aalen-Johansen estimate of cumulative incidence function
#' with \code{se}, \code{conf.low}, and \code{conf.high}.
#'
#' @references Conner SC, Trinquart L. Estimation and modeling of the restricted
#' mean time lost in the presence of competing risks. Stat Med 2021;40:2177–96.
#' [https://doi.org/10.1002/sim.8896](https://doi.org/10.1002/sim.8896).
#' @export
#'
#' @examples
#' data(cancer, package = "survival")
#' cancer <- cancer %>% dplyr::mutate(
#' status = status - 1, # make 0/1
#' sex = factor(sex, levels = 1:2, labels = c("Men", "Women")))
#'
#' result <- estimate_rmtl(
#' data = cancer,
#' exposure = sex,
#' time = time,
#' event = status,
#' tau = 365.25) # time horizon: one year
#' result
#'
#' # Make simple plot
#' library(ggplot2)
#' library(dplyr)
#' library(tidyr)
#'
#' result$cif %>%
#' ggplot(mapping = aes(x = time, y = estimate, color = exposure)) +
#' geom_step() +
#' scale_x_continuous(breaks = seq(from = 0, to = 365, by = 60)) +
#' labs(x = "Time since start of follow-up in days",
#' y = "Cumulative incidence")
#'
#' # Make fancier plot with a shaded area for the RMTL difference
#' df_ribbon <- result$cif %>%
#' select(exposure, time, estimate) %>%
#' pivot_wider(names_from = exposure,
#' values_from = estimate,
#' names_repair = ~c("time", "surv", "surv2")) %>%
#' filter(time < 365.25) %>% # tau for RMST
#' arrange(time) %>% # carry forward survival values per stratum
#' fill(surv) %>%
#' fill(surv2)
#'
#' result$cif %>%
#' ggplot() +
#' geom_step(mapping = aes(x = time, y = estimate, color = exposure)) +
#' scale_x_continuous(breaks = seq(from = 0, to = 365, by = 60)) +
#' scale_y_continuous(expand = expansion()) +
#' labs(x = "Time since start of follow-up in days",
#' y = "Cumulative incidence") +
#' cowplot::theme_minimal_hgrid() +
#' geom_stepribbon(data = df_ribbon,
#' mapping = aes(x = time, ymin = surv, ymax = surv2),
#' fill = "gray80", alpha = 0.5)
estimate_rmtl <- function(data,
exposure = NULL,
time,
time2 = NULL,
event,
event_of_interest = 1,
weight = NULL,
tau = NULL,
reach_tau = c("warn", "stop", "ignore"),
conf.level = 0.95) {
alldat <- data %>%
tibble::as_tibble() %>%
dplyr::select(time = {{ time }},
time2 = {{ time2 }},
event = {{ event }},
group = {{ exposure }},
weight = {{ weight }})
# set to default values if missing in data
added_cols <- tibble(time2 = NA_real_, group = "Overall", weight = 1)
alldat <- alldat %>%
tibble::add_column(added_cols %>%
select(-dplyr::any_of(names(alldat)))) %>%
filter(!is.na(.data$group) & !is.na(.data$time)) %>%
arrange(.data$group) %>%
dplyr::mutate(
group = factor(.data$group),
entry = dplyr::if_else(is.na(.data$time2),
true = 0, false = as.double(.data$time)),
times = dplyr::if_else(is.na(.data$time2),
true = .data$time, false = .data$time2),
event = as.numeric(factor(as.numeric(.data$event))) - 1) %>%
dplyr::select(-.data$time, -.data$time2)
if (any(alldat$times < 0)) {
stop("Event/censoring times must be positive.")
}
z <- stats::qnorm(1 - (1 - conf.level) / 2)
gg <- length(levels(alldat$group))
if (is.null(tau)) {
tau <- alldat %>%
dplyr::group_by(.data$group) %>%
dplyr::summarize(maxfu = max(.data$times), .groups = "drop") %>%
dplyr::summarize(tau = min(.data$maxfu)) %>%
pull(.data$tau)
} else {
tau.error <- rep(0, gg)
for (i in (1:gg)) {
groupval <- (levels(alldat$group)[i])
dat_group <- alldat[which(alldat$group == (groupval)), ]
tau.error[i] <- ifelse(max(dat_group$times) < tau, 1, 0)
}
if(all(tau.error == 1)) {
stop("Observed event/censoring times do not reach tau in ANY ",
"exposure group. Choose a different tau or leave ",
"NULL for automatic selection of latest possible time.")
}
if (sum(tau.error) > 0) {
tau_message <- paste0("Observed event/censoring times do not reach tau ",
"in these exposure groups: ",
paste(levels(alldat$group)[tau.error == 1],
sep = ", ", collapse = ", "),
". Choose a different tau or leave NULL for ",
"automatic selection of latest possible time.")
if(reach_tau[1] == "stop")
stop(tau_message)
if(reach_tau[1] == "warn")
warning(tau_message)
}
}
alldat$event[alldat$times > tau] <- 0
alldat$times[alldat$times > tau] <- tau
rmtl <- rep(NA, length(1:gg))
groupval <- rep(NA, length(1:gg))
rmtl.se <- rep(NA, length(1:gg))
res.cif <- list()
for (g in 1:gg) {
groupval1 <- (levels(alldat$group)[g])
dat_group1 <- alldat[which(alldat$group == (groupval1)), ]
if(max(dat_group1$times) >= tau) {
groupval[g] <- levels(alldat$group)[g]
data <- alldat[which(alldat$group == (groupval[g])), ]
tj <- data$times[data$event != 0]
tj <- unique(tj[order(tj)])
num.tj <- length(tj)
num.atrisk <-
sapply(tj, function(x)
sum(data$weight[data$entry < x & data$times >= x]))
num.ev1 <-
sapply(tj, function(x)
sum(data$weight[data$event == event_of_interest &
data$times == x]))
num.ev2 <-
sapply(tj, function(x)
sum(data$weight[data$event != event_of_interest &
data$event != 0 & data$times == x]))
num.ev <- num.ev1 + num.ev2
m <- sapply(tj, function(x) {
sum((data$weight[data$entry < x & data$times >= x]) ^ 2)
})
mg <- ((num.atrisk ^ 2) / m)
h1 <- num.ev1 / num.atrisk
h <- num.ev / num.atrisk
s <- cumprod(c(1, 1 - h))
s <- s[1:length(s) - 1]
theta <- s * h1
cif1 <- cumsum(theta)
a <- c(0, cumsum(num.ev / (mg * (num.atrisk - num.ev))))
a <- a[1:num.tj]
var.theta <- ((theta) ^ 2) *
(((num.atrisk - num.ev1) / (mg * num.ev1)) + a)
var.theta[is.nan(var.theta)] <- 0
cov.theta <- matrix(NA, nrow = num.tj, ncol = num.tj)
b <- c(0, cumsum(num.ev / (mg * (num.atrisk - num.ev))))
for (j in 1:(num.tj - 1)) {
for (k in (j + 1):num.tj) {
cov.theta[k, j] <- cov.theta[j, k] <- (theta[j]) *
(theta[k]) * (-1 / mg[j] + b[j])
}
}
diag(cov.theta) <- var.theta
cov.f10 <- apply(cov.theta, 2, function(x) {
x[is.na(x)] <- 0
cumsum(x)
})
cov.f1 <- apply(cov.f10, 1, function(x) {
x[is.na(x)] <- 0
cumsum(x)
})
var.f1 <- diag(cov.f1)
areas <- c(tj[2:num.tj], tau) - tj
rmtl[g] <- sum(areas * cif1)
cov.weights <- outer(areas, areas)
cov.f1.weight <- cov.weights * cov.f1
rmtl.var <- sum(cov.f1.weight)
rmtl.se[g] <- sqrt(rmtl.var)
res.cif.g <- list()
res.cif.g[[length(res.cif.g) + 1]] <- c(cif1, cif1[num.tj])
res.cif.g[[length(res.cif.g) + 1]] <- c(sqrt(var.f1), sqrt(var.f1[num.tj]))
res.cif.g[[length(res.cif.g) + 1]] <- c(tj, tau)
res.cif.g[[length(res.cif.g) + 1]] <- c(
cif1 - z * sqrt(var.f1),
cif1[num.tj] - z * sqrt(var.f1[num.tj]))
res.cif.g[[length(res.cif.g) + 1]] <- c(
cif1 + z * sqrt(var.f1),
cif1[num.tj] + z * sqrt(var.f1[num.tj]))
names(res.cif.g) <- c("cif", "se.cif", "tj", "cif.cil", "cif.ciu")
} else {
res.cif.g <- list(cif = NA, se.cif = NA, tj = NA,
cif.cil = NA, cif.ciu = NA)
}
res.cif[[length(res.cif) + 1]] <- res.cif.g
names(res.cif)[g] <- groupval[g] # assign name here
}
# if(gg > 1) { # moved from here
res <- data.frame(
groupval,
rmtl,
rmtl.se,
cil = rmtl - (z * rmtl.se),
ciu = rmtl + (z * rmtl.se))
#names(res.cif) <- groupval # assign name above to allow for missing groups
pwc <- gg # all pairwise: ((gg ^ 2) - gg) / 2
# to here
if (gg > 1) {
if (pwc > 0) {
label.diff <- rep(NA, pwc)
rmtl.diff <- rep(NA, pwc)
rmtl.diff.se <- rep(NA, pwc)
rmtl.diff.cil <- rep(NA, pwc)
rmtl.diff.ciu <- rep(NA, pwc)
rmtl.diff.p <- rep(NA, pwc)
res.diff <- data.frame(
label.diff,
rmtl.diff,
rmtl.diff.se,
rmtl.diff.cil,
rmtl.diff.ciu,
rmtl.diff.p)
l <- 1
i <- 1 # make contrasts only to reference level (1st group)
for (ii in i:gg) { # (ii in (i + 1):gg) {
res.diff[l, ]$label.diff <- res[ii, ]$groupval
res.diff[l, ]$rmtl.diff <- (res[ii, ]$rmtl - res[i, ]$rmtl)
res.diff[l, ]$rmtl.diff.se <- sqrt(res[ii, ]$rmtl.se ^ 2 +
res[i, ]$rmtl.se ^2)
res.diff[l, ]$rmtl.diff.cil <- res.diff[l, ]$rmtl.diff -
z * res.diff[l, ]$rmtl.diff.se
res.diff[l, ]$rmtl.diff.ciu <- res.diff[l, ]$rmtl.diff +
z * res.diff[l, ]$rmtl.diff.se
res.diff[l, ]$rmtl.diff.p <- 2 * (1 - stats::pnorm(
abs(res.diff[l, ]$rmtl.diff) / res.diff[l, ]$rmtl.diff.se))
l <- l + 1
}
res.diff[1, c("rmtl.diff.se", "rmtl.diff.cil", "rmtl.diff.ciu",
"rmtl.diff.p")] <- NA
res.diff <- tibble::as_tibble(res.diff) %>%
dplyr::rename(exposure = .data$label.diff,
estimate = .data$rmtl.diff,
se = .data$rmtl.diff.se,
conf.low = .data$rmtl.diff.cil,
conf.high = .data$rmtl.diff.ciu,
p.value = .data$rmtl.diff.p)
}
} else {
res.diff <- tibble::tibble()
}
list(tau = tau,
rmtl = tibble::as_tibble(res) %>%
dplyr::rename(exposure = .data$groupval,
estimate = .data$rmtl,
se = .data$rmtl.se,
conf.low = .data$cil,
conf.high = .data$ciu),
rmtdiff = res.diff,
# add origin at (time = 0, cuminc = 0) for each exposure group
# (even if no estimation was possible there because tau was not reached)
cif = dplyr::bind_rows(tibble::tibble(exposure =
levels(alldat$group)) %>%
dplyr::mutate(tj = 0, cif = 0,
se.cif = NA,
cif.cil = NA, cif.ciu = NA),
res.cif %>%
purrr::transpose() %>%
tibble::as_tibble() %>%
dplyr::mutate(exposure = names(.data$cif)) %>%
tidyr::unnest(cols = c(-.data$exposure))) %>%
dplyr::select(.data$exposure,
time = .data$tj,
estimate = .data$cif,
se = .data$se.cif,
conf.low = .data$cif.cil,
conf.high = .data$cif.ciu))
}