tidymodels · EmilHvitfeldt · Mar 14, 2023 · Mar 1, 2023 · Mar 1, 2023 · Mar 1, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: yardstick
 Title: Tidy Characterizations of Model Performance
-Version: 1.1.0.9000
+Version: 1.1.0.9001
 Authors@R: c(
     person("Max", "Kuhn", , "max@posit.co", role = "aut"),
     person("Davis", "Vaughan", , "davis@posit.co", role = "aut"),
@@ -19,6 +19,7 @@ BugReports: https://github.com/tidymodels/yardstick/issues
 Depends:
     R (>= 3.4.0)
 Imports:
+    cli,
     dplyr (>= 1.1.0),
     generics (>= 0.1.2),
     hardhat (>= 1.2.0.9000),

diff --git a/NAMESPACE b/NAMESPACE
@@ -66,6 +66,7 @@ S3method(roc_auc,data.frame)
 S3method(roc_aunp,data.frame)
 S3method(roc_aunu,data.frame)
 S3method(roc_curve,data.frame)
+S3method(roc_curve_survival,data.frame)
 S3method(rpd,data.frame)
 S3method(rpiq,data.frame)
 S3method(rsq,data.frame)
@@ -179,6 +180,7 @@ export(roc_aunp_vec)
 export(roc_aunu)
 export(roc_aunu_vec)
 export(roc_curve)
+export(roc_curve_survival)
 export(rpd)
 export(rpd_vec)
 export(rpiq)

diff --git a/NEWS.md b/NEWS.md
@@ -37,6 +37,9 @@
 
 * The Brier score for survival data was added with `brier_survival()`.
 
+* Time-Dependent ROC curves estimation for right-censored data can now be 
+  calculated with `roc_curve_survival()`.
+
 # yardstick 1.1.0
 
 * Emil Hvitfeldt is now the maintainer (#315).

diff --git a/R/aaa.R b/R/aaa.R
@@ -38,6 +38,7 @@ utils::globalVariables(
   s3_register("ggplot2::autoplot", "gain_df")
   s3_register("ggplot2::autoplot", "lift_df")
   s3_register("ggplot2::autoplot", "roc_df")
+  s3_register("ggplot2::autoplot", "roc_survival_df")
   s3_register("ggplot2::autoplot", "pr_df")
   s3_register("ggplot2::autoplot", "conf_mat")
 

diff --git a/R/import-standalone-survival.R b/R/import-standalone-survival.R
@@ -0,0 +1,92 @@
+# Standalone file: do not edit by hand
+# Source: <https://github.com/tidymodels/parsnip/blob/main/R/standalone-survival.R>
+# ----------------------------------------------------------------------
+#
+# ---
+# repo: tidymodels/parsnip
+# file: standalone-survival.R
+# last-updated: 2023-02-28
+# license: https://unlicense.org
+# ---
+
+# This file provides a portable set of helper functions for Surv objects
+
+# ## Changelog
+
+# 2023-02-28:
+# * Initial version
+
+
+# @param surv A [survival::Surv()] object
+# @details
+# `.is_censored_right()` always returns a logical while
+# `.check_censored_right()` will fail if `FALSE`.
+#
+# `.extract_status()` will return the data as 0/1 even if the original object
+# used the legacy encoding of 1/2. See [survival::Surv()].
+# @return
+# - `.extract_surv_status()` returns a vector.
+# - `.extract_surv_time()` returns a vector when the type is `"right"` or `"left"`
+#    and a tibble otherwise.
+# - Functions starting with `.is_` or `.check_` return logicals although the
+#   latter will fail when `FALSE`.
+
+# nocov start
+# These are tested in the extratests repo since it would require a dependency
+# on the survival package. https://github.com/tidymodels/extratests/pull/78
+.is_censored_right <- function(surv) {
+  .check_cens_type(surv, fail = FALSE)
+}
+
+.check_censored_right <- function(surv) {
+  .check_cens_type(surv, fail = TRUE)
+} # will add more as we need them
+
+.extract_surv_time <- function(surv) {
+  .is_surv(surv)
+  keepers <- c("time", "start", "stop", "time1", "time2")
+  res <- surv[, colnames(surv) %in% keepers]
+  if (NCOL(res) > 1) {
+    res <- dplyr::tibble(as.data.frame(res))
+  }
+  res
+}
+
+.extract_surv_status <- function(surv) {
+  .is_surv(surv)
+  res <-   surv[, "status"]
+  un_vals <- sort(unique(res))
+  event_type_to_01 <- !(.extract_surv_type(surv) %in% c("interval", "interval2", "mstate"))
+  if (
+    event_type_to_01 &&
+    (identical(un_vals, 1:2) | identical(un_vals, c(1.0, 2.0))) ) {
+    res <- res - 1
+  }
+  res
+}
+
+.is_surv <- function(surv, fail = TRUE) {
+  is_surv <- inherits(surv, "Surv")
+  if (!is_surv && fail) {
+    rlang::abort("The object does not have class `Surv`.", call = NULL)
+  }
+  is_surv
+}
+
+.extract_surv_type <- function(surv) {
+  attr(surv, "type")
+}
+
+.check_cens_type <- function(surv, type = "right", fail = TRUE) {
+  .is_surv(surv)
+  obj_type <- .extract_surv_type(surv)
+  good_type <- all(obj_type %in% type)
+  if (!good_type && fail) {
+    c_list <- paste0("'", type, "'")
+    msg <- cli::format_inline("For this usage, the allowed censoring type{?s} {?is/are}: {c_list}")
+    rlang::abort(msg, call = NULL)
+  }
+  good_type
+}
+
+# nocov end
diff --git a/R/surv-roc_survival_curve.R b/R/surv-roc_survival_curve.R
@@ -0,0 +1,160 @@
+#' ROC Survival Curve
+#'
+#' @family survival curve metrics
+#' @templateVar fn roc_curve_survival
+#'
+#' @inheritParams brier_survival
+#'
+#' @return
+#' A tibble with class `roc_survival_df`, `grouped_roc_survival_df` having
+#' columns `.threshold`, `recall`, and `precision`.
+#'
+#' @seealso
+#' Compute the area under the ROC survival curve with (TODO link to
+#' roc_survival_auc()).
+#'
+#' @author Emil Hvitfeldt
+#' @examples
+#' result <- roc_curve_survival(
+#'   lung_surv,
+#'   truth = surv_obj,
+#'   estimate = .pred_survival,
+#'   censoring_weights = ipcw,
+#'   eval_time = .time
+#' )
+#' result
+#'
+#' #' # ---------------------------------------------------------------------------
+#' # `autoplot()`
+#'
+#' # Visualize the curve using ggplot2 manually
+#' library(ggplot2)
+#' library(dplyr)
+#' result %>%
+#'   ggplot(aes(x = 1 - specificity, y = sensitivity)) +
+#'   geom_path() +
+#'   geom_abline(lty = 3) +
+#'   coord_equal() +
+#'   theme_bw()
+#'
+#' # Or use autoplot
+#' autoplot(result)
+#' @export
+roc_curve_survival <- function(data, ...) {
+  UseMethod("roc_curve_survival")
+}
+
+#' @export
+#' @rdname roc_curve_survival
+roc_curve_survival.data.frame <- function(data,
+                                          truth,
+                                          estimate,
+                                          censoring_weights,
+                                          eval_time,
+                                          na_rm = TRUE,
+                                          case_weights = NULL,
+                                          ...) {
+
+  result <- curve_survival_metric_summarizer(
+    name = "roc_curve_survival",
+    fn = roc_curve_survival_vec,
+    data = data,
+    truth = !!enquo(truth),
+    estimate = !!enquo(estimate),
+    censoring_weights = !!enquo(censoring_weights),
+    eval_time = !!enquo(eval_time),
+    na_rm = na_rm,
+    case_weights = !!enquo(case_weights)
+  )
+
+  curve_finalize(result, data, "roc_survival_df", "grouped_roc_survival_df")
+}
+
+roc_curve_survival_vec <- function(truth,
+                                   estimate,
+                                   censoring_weights,
+                                   eval_time,
+                                   na_rm = TRUE,
+                                   case_weights = NULL,
+                                   ...) {
+  check_dynamic_survival_metric(
+    truth, estimate, censoring_weights, case_weights, eval_time
+  )
+
+  if (na_rm) {
+    result <- yardstick_remove_missing(
+      truth, estimate, case_weights, censoring_weights, eval_time
+    )
+
+    truth <- result$truth
+    estimate <- result$estimate
+    censoring_weights <- result$censoring_weights
+    eval_time <- result$eval_time
+    case_weights <- result$case_weights
+  } else {
+    any_missing <- yardstick_any_missing(
+      truth, estimate, case_weights, censoring_weights, eval_time
+    )
+    if (any_missing) {
+      return(NA_real_)
+    }
+  }
+
+  roc_curve_survival_impl(
+    truth = truth,
+    estimate = estimate,
+    censoring_weights = censoring_weights,
+    eval_time = eval_time
+  )
+}
+
+roc_curve_survival_impl <- function(truth,
+                                    estimate,
+                                    censoring_weights,
+                                    eval_time) {
+  res <- dplyr::tibble(.threshold = sort(unique(c(0, 1, estimate))))
+  res$sensitivity <- vapply(
+    res$.threshold,
+    sensitivity_uno_2007,
+    FUN.VALUE = numeric(1),
+    eval_time, truth, estimate, censoring_weights
+  )
+  res$specificity <- vapply(
+    res$.threshold,
+    specificity_naive,
+    FUN.VALUE = numeric(1),
+    eval_time, truth, estimate
+  )
+  res
+}
+
+sensitivity_uno_2007 <- function(threshold,
+                                 eval_time,
+                                 surv_obj,
+                                 prob_surv,
+                                 prob_cens) {
+  n <- length(prob_surv)
+  event_time <- .extract_surv_time(surv_obj)
+  delta <- .extract_surv_status(surv_obj)
+  obs_time_le_time <- ifelse(event_time <= eval_time, 1, 0)
+  # Since the "marker" X is the survival prob, X <= C means an event
+  prob_le_thresh <- ifelse(prob_surv <= threshold, 1, 0)
+  multiplier <- delta / (n * prob_cens)
+  numer <- sum(obs_time_le_time * prob_le_thresh * multiplier, na.rm = TRUE)
+  denom <- sum(obs_time_le_time * multiplier, na.rm = TRUE)
+  numer / denom
+}
+
+specificity_naive <- function(threshold, eval_time, surv_obj, prob_surv) {
+  event_time <- .extract_surv_time(surv_obj)
+  delta <- .extract_surv_status(surv_obj)
+  obs_time_gt_time <- ifelse(event_time > eval_time, 1, 0)
+  # Since the "marker" X is the survival prob, X > C means no event
+  prob_gt_thresh <- ifelse(prob_surv > threshold, 1, 0)
+  numer <- sum(obs_time_gt_time * prob_gt_thresh, na.rm = TRUE)
+  denom <- sum(obs_time_gt_time, na.rm = TRUE)
+  numer / denom
+}
+
+# Dynamically exported
+autoplot.roc_survival_df <- autoplot.roc_df
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -72,6 +72,10 @@ reference:
     contents:
     - brier_survival
 
+  - title: Curve Survival Functions
+    contents:
+    - roc_curve_survival
+
   - title: Curve Functions
     contents:
     - roc_curve