/
survfit2.R
133 lines (124 loc) · 4.52 KB
/
survfit2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#' Create survival curves
#'
#' @description
#' Simple wrapper for `survival::survfit()` except the environment is also
#' included in the returned object.
#'
#' Use this function with all other functions in this package to ensure
#' all elements are calculable.
#'
#' @inheritParams survival::survfit.formula
#' @inheritDotParams survival::survfit.formula
#'
#' @section `survfit2()` vs `survfit()`:
#'
#' Both functions have identical inputs, so why do we need `survfit2()`?
#'
#' The *only* difference between `survfit2()` and `survival::survfit()` is that the
#' former tracks the environment from which the call to the function was made.
#'
#' The definition of `survfit2()` is unremarkably simple:
#'
#' ```r
#' survfit2 <- function(formula, ...) {
#' # construct survfit object
#' survfit <- survival::survfit(formula, ...)
#'
#' # add the environment
#' survfit$.Environment = <calling environment>
#'
#' # add class and return
#' class(survfit) <- c("survfit2", "survfit")
#' survfit
#' }
#' ```
#'
#' The environment is needed to ensure the survfit call can be accurately
#' reconstructed or parsed at any point post estimation.
#' The call is parsed when p-values are reported and when labels are created.
#' For example, the raw variable names appear in the output of a stratified
#' `survfit()` result, e.g. `"sex=Female"`. When using `survfit2()`, the
#' originating data frame and formula may be parsed and the raw variable
#' names removed.
#'
#' Most functions in the package work with both `survfit2()` and `survfit()`;
#' however, the output will be styled in a preferable format with `survfit2()`.
#'
#' @return survfit2 object
#' @export
#'
#' @seealso [`survival::survfit.formula()`]
#' @examples
#' # With `survfit()`
#' fit <- survfit(Surv(time, status) ~ sex, data = df_lung)
#' fit
#'
#' # With `survfit2()`
#' fit2 <- survfit2(Surv(time, status) ~ sex, data = df_lung)
#' fit2
#'
#' # Consistent behavior with other functions
#' summary(fit, times = c(10, 20))
#'
#' summary(fit2, times = c(10, 20))
survfit2 <- function(formula, ...) {
if (missing(formula)) {
cli::cli_abort("The {.code formula} argument cannot be missing.")
}
if (!rlang::is_formula(formula) & !inherits(formula, "coxph")) {
cli::cli_abort(
c("x" = "The {.code formula} argument must be class {.cls formula} or {.cls coxph}.",
"i" = "Argument is class {.cls {class(formula)}}")
)
}
# create call to `survfit()` -------------------------------------------------
# solution taken from https://adv-r.hadley.nz/evaluation.html#match.call
call <- match.call(survival::survfit, expand.dots = TRUE)
call[[1]] <- quote(survival::survfit)
# evaluate call --------------------------------------------------------------
survfit <- eval(call, parent.frame())
# checking if data was piped in with magrittr --------------------------------
if (lapply(as.list(call), function(x) identical(x, quote(.))) %>% unlist() %>% any()) {
# save the "dot" to the new environment, so it can be evaluated later in functions like `survfti2_p()`
env <- rlang::env(parent.frame(), `.` = eval(quote(.), parent.frame()))
}
else env <- parent.frame()
# update object with env and add another class -------------------------------
survfit %>%
utils::modifyList(val = list(.Environment = env)) %>%
structure(class = c("survfit2", class(survfit))) %>%
.check_PARAM_consistency()
}
.check_PARAM_consistency <- function(x) {
# get the formula and data
formula <- .extract_formula_from_survfit(x)
data <- .extract_data_from_survfit(x)
if (is.null(data) || is.null(formula)) return(x)
if (.is_CDISC_ADTTE(data) && !.is_PARAM_consistent(formula, data))
cli::cli_warn(c("!" = "Columns {.cls {c('PARAM', 'PARAMCD')}} are not unique and usage is likely incorrect."))
x
}
.is_CDISC_ADTTE <- function(data) {
all(c("AVAL", "CNSR") %in% names(data)) &&
any(c("PARAM", "PARAMCD") %in% names(data))
}
.is_PARAM_consistent <- function(formula, data) {
isTRUE(
(
# PARAM and PARAMCD both present with appropriate lengths
(
all(c("PARAM", "PARAMCD") %in% names(data)) &&
(length(unique(data[["PARAM"]])) == 1L) &&
(length(unique(data[["PARAMCD"]])) == 1L)
) ||
# PARAMCD only present, and is appropriate length
(
!"PARAM" %in% names(data) &&
"PARAMCD" %in% names(data) &&
(length(unique(data[["PARAMCD"]])) == 1L)
)
) ||
# or PARAM can be any length and it must appear in formula
(any(c("PARAM", "PARAMCD") %in% all.vars(formula)))
)
}