/
surv-brier_survival_integrated.R
137 lines (127 loc) · 4.41 KB
/
surv-brier_survival_integrated.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#' Integrated Brier score for right censored data
#'
#' Compute the integrated Brier score for right censored data, which is an
#' overall calculation of model performance for all values of `.eval_time`.
#'
#' @family dynamic survival metrics
#' @templateVar fn brier_survival_integrated
#' @template return-dynamic-survival
#' @details
#'
#' The integrated time-dependent brier score is calculated in an "area under the
#' curve" fashion. The brier score is calculated for each value of `.eval_time`.
#' The area is calculated via the trapezoidal rule. The area is divided by the
#' largest value of `.eval_time` to bring it into the same scale as the
#' traditional brier score.
#'
#' Smaller values of the score are associated with better model performance.
#'
#' This formulation takes survival probability predictions at one or more
#' specific _evaluation times_ and, for each time, computes the Brier score.
#' To account for censoring, inverse probability of censoring weights
#' (IPCW) are used in the calculations.
#'
#' The column passed to `...` should be a list column with one element per
#' independent experiential unit (e.g. patient). The list column should contain
#' data frames with several columns:
#'
#' - `.eval_time`: The time that the prediction is made.
#' - `.pred_survival`: The predicted probability of survival up to `.eval_time`
#' - `.weight_censored`: The case weight for the inverse probability of censoring.
#'
#' The last column can be produced using [parsnip::.censoring_weights_graf()].
#' This corresponds to the weighting scheme of Graf _et al_ (1999). The
#' internal data set `lung_surv` shows an example of the format.
#'
#' This method automatically groups by the `.eval_time` argument.
#'
#' @inheritParams brier_survival
#'
#' @author Emil Hvitfeldt
#'
#' @references E. Graf, C. Schmoor, W. Sauerbrei, and M. Schumacher, “Assessment
#' and comparison of prognostic classification schemes for survival data,”
#' Statistics in Medicine, vol. 18, no. 17-18, pp. 2529–2545, 1999.
#'
#' @examples
#' library(dplyr)
#'
#' lung_surv %>%
#' brier_survival_integrated(
#' truth = surv_obj,
#' .pred
#' )
#' @export
brier_survival_integrated <- function(data, ...) {
UseMethod("brier_survival_integrated")
}
brier_survival_integrated <- new_integrated_survival_metric(
brier_survival_integrated,
direction = "minimize"
)
#' @rdname brier_survival_integrated
#' @export
brier_survival_integrated.data.frame <- function(data,
truth,
...,
na_rm = TRUE,
case_weights = NULL) {
dynamic_survival_metric_summarizer(
name = "brier_survival_integrated",
fn = brier_survival_integrated_vec,
data = data,
truth = !!enquo(truth),
...,
na_rm = na_rm,
case_weights = !!enquo(case_weights)
)
}
#' @export
#' @rdname brier_survival_integrated
brier_survival_integrated_vec <- function(truth,
estimate,
na_rm = TRUE,
case_weights = NULL,
...) {
check_dynamic_survival_metric(
truth, estimate, case_weights
)
num_eval_times <- get_unique_eval_times(estimate)
if (num_eval_times < 2) {
cli::cli_abort(
"At least 2 evaluation times are required. \\
Only {num_eval_times} unique time was given."
)
}
if (na_rm) {
result <- yardstick_remove_missing(
truth, seq_along(estimate), case_weights
)
truth <- result$truth
estimate <- estimate[result$estimate]
case_weights <- result$case_weights
} else {
any_missing <- yardstick_any_missing(
truth, estimate, case_weights
)
if (any_missing) {
return(NA_real_)
}
}
brier_survival_integrated_impl(truth, estimate, case_weights)
}
get_unique_eval_times <- function(x) {
# Since validate_surv_truth_list_estimate() makes sure they are all the same
length(x[[1]]$.eval_time)
}
brier_survival_integrated_impl <- function(truth,
estimate,
case_weights) {
res <- brier_survival_vec(
truth = truth,
estimate = estimate,
na_rm = FALSE,
case_weights = case_weights
)
auc(res$.eval_time, res$.estimate) / max(res$.eval_time)
}