add 'opt-out'

mdbrown · Jul 10, 2017 · 60f77a2 · 60f77a2
1 parent 84bac52
commit 60f77a2
Show file tree

Hide file tree

Showing 11 changed files with 527 additions and 186 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: DecisionCurve
 Type: Package
 Title: Calculate and Plot Decision Curves
-Version: 1.3
-Date: 2016-08-11
+Version: 1.4
+Date: 2017-07-10
 Author: Marshall Brown
 Maintainer: Marshall Brown <mdbrown@fredhutch.org>
 Description: Decision curves are a useful tool to evaluate the population impact

diff --git a/R/Add_CostBenefit_Axis.R b/R/Add_CostBenefit_Axis.R
@@ -17,16 +17,25 @@
 Add_CostBenefit_Axis <- function(xlim,
                                  cost.benefits,
                                  n.cost.benefits = 6,
-                                 line = 4, ...){
+                                 line = 4,
+                                 policy, ...){
 
   if(missing(cost.benefits)){
     #no cost.benefits are given, so we must choose them (wisely) from available defaults
     #listed here
+
+
     tmp <- c( 1/2, 1/3, 1/4, 2/3, 3/4, 1/5, 1/10, 2/5, 3/5, 4/5, 7/10, 9/10, 5/6, 1/15, 1/20,  1/25, 1/30, 1/40, 1/50, 1/60, 1/75, 1/80, 1/100)
     dd <-  data.frame( "name" = c("1:1", "1:2", "1:3", "1:4", "2:3", "3:4", "1:5", "1:10", "2:5", "3:5", "4:5", "7:10", "9:10", "5:6", "1:15", "1:20", "1:25", "1:30", "1:40", "1:50", "1:60", "1:75","1:80",  "1:100",
                                   "2:1", "3:1", "4:1", "3:2", "4:3", "5:1", "10:1", "5:2", "5:3", "5:4", "10:7", "10:9", "6:5", "15:1", "20:1", "25:1", "30:1", "40:1", "50:1",  "60:1","75:1", "80:1",  "100:1"),
                        "value" = c(1/1, tmp, 1/tmp))
-    dd$threshold <- dd$value/(1+dd$value)
+
+    if(policy == "opt-in"){
+      dd$threshold <- dd$value/(1+dd$value)
+    }else{
+      dd$threshold <- (1/(1+dd$value)) ##need to ask about this
+    }
+
     dd <- dd[order(dd$threshold),]
     threshold <- NULL
     dd.sub <- subset(dd, threshold >= min(xlim) & threshold <= max(xlim))
@@ -62,7 +71,11 @@ Add_CostBenefit_Axis <- function(xlim,
     B <- sapply(CB.split, function(x) as.numeric(x[2]))
 
     dd$value = C/B
-    dd$threshold <- dd$value/(1+dd$value)
+    if(policy == "opt-in"){
+      dd$threshold <- dd$value/(1+dd$value)
+    }else{
+      dd$threshold <- (1/(1+dd$value))
+    }
     dd <- dd[order(dd$threshold),]
     dd.sub <- subset(dd, threshold >= min(xlim) & threshold <= max(xlim))
     #check to make sure there are default choices within xlim

diff --git a/R/cv_decision_curve.R b/R/cv_decision_curve.R
@@ -45,7 +45,8 @@ cv_decision_curve <- function(formula,
                            thresholds = seq(0, 1, by = .01),
                            folds = 5,
                            study.design = c("cohort", "case-control"),
-                           population.prevalence){
+                           population.prevalence,
+                           policy = c("opt-in", "opt-out")){
   call <- match.call()
 
   stopifnot(is.numeric(folds))
@@ -55,7 +56,7 @@ cv_decision_curve <- function(formula,
   if(any( names.check <- !is.element(all.vars(formula), names(data)))) stop(paste("variable(s)", paste( all.vars(formula)[names.check], collapse = ", ") , "not found in 'data'"))
 
   study.design <- match.arg(study.design)
-
+  policy = match.arg(policy)
   #throw out missing data
   data <- data[,all.vars(formula)]
   #complete case indicator
@@ -89,8 +90,10 @@ cv_decision_curve <- function(formula,
                         thresholds = thresholds,
                         confidence.intervals = "none",
                         study.design = study.design,
-                        population.prevalence = population.prevalence)$derived.data
-  out$derived.data[, 2:8] <- 0
+                        population.prevalence = population.prevalence,
+                        policy =policy)$derived.data
+
+  out$derived.data[, 2:10] <- 0
 
   for(kk in 1:folds){
 
@@ -117,23 +120,25 @@ cv_decision_curve <- function(formula,
     #add measures for each fold to the first 8 columns, bc these are the
     #the only numeric estimates.
     #we divide by number of folds later
-    out$derived.data[,2:8] <- out$derived.data[,2:8] +
+    out$derived.data[,2:10] <- out$derived.data[,2:10] +
                              decision_curve(formula = outcome~risk.hat,
                              data = dat.cv,
                              family = family,
                              fitted.risk = TRUE,
                              thresholds = thresholds,
                              confidence.intervals = "none",
                              study.design = study.design,
-                             population.prevalence = population.prevalence)$derived.data[,2:8]
+                             population.prevalence = population.prevalence,
+                             policy = policy)$derived.data[,2:10]
   }
   #take the mean across folds as estimates
-  out$derived.data[,2:8] <- out$derived.data[,2:8]/folds
+  out$derived.data[,2:10] <- out$derived.data[,2:10]/folds
 
 
   #return list of elements
   out$call <- call
   out$folds <- folds
+  out$policy = policy
   out$confidence.intervals <- 'none'
 
   class(out) = "decision_curve"

diff --git a/R/decision_curve.R b/R/decision_curve.R
@@ -5,6 +5,7 @@
 #' @param formula an object of class 'formula' of the form outcome ~ predictors, giving the prediction model to be fitted using glm. The outcome must be a binary variable that equals '1' for cases and '0' for controls.
 #' @param data data.frame containing outcome and predictors. Missing data on any of the predictors will cause the entire observation to be removed.
 #' @param family a description of the error distribution and link function to pass to 'glm' used for model fitting. Defaults to binomial(link = "logit") for logistic regression.
+#' @param policy Either "opt-in" (default) or "opt-out", describing the type of policy for which to report the net benefit. An policy is "opt-in" (default) when the standard of care is to treat no-one, and the model is used to recommend high risk patients to 'opt-in' for a treatment. Alternatively, an "opt-out" policy is a policy for which the standard of care is to treat everyone in the population and the model is used to discover low risk patients to 'opt-out' of treatment.
 #' @param fitted.risk logical (default FALSE) indicating whether the predictor provided are estimated risks from an already established model. If set to TRUE, no model fitting will be done and all estimates will be conditional on the risks provided.  Risks must fall between 0 and 1.
 #' @param thresholds Numeric vector of high risk thresholds to use when plotting and calculating net benefit values.
 #' @param confidence.intervals Numeric (default 0.95 for 95\% confidence bands) level of bootstrap confidence intervals to plot. Set as NA or 'none' to remove confidence intervals. See details for more information.
@@ -63,6 +64,7 @@
 decision_curve <- function(formula,
                           data,
                           family = binomial(link = "logit"),
+                          policy = c("opt-in", "opt-out"),
                           fitted.risk = FALSE,
                           thresholds = seq(0, 1, by = .01),
                           confidence.intervals = 0.95,
@@ -76,10 +78,14 @@ decision_curve <- function(formula,
   stopifnot(class(formula) == "formula")  #check formula
   stopifnot(is.data.frame(data)) #check data
   stopifnot(is.logical(fitted.risk))
+
   stopifnot(is.numeric(thresholds))
   stopifnot(all(thresholds >= 0)); stopifnot(all(thresholds <= 1));
   if(is.numeric(confidence.intervals)) stopifnot(confidence.intervals > 0 & confidence.intervals < 1)
   stopifnot(is.numeric(bootstraps))
+  policy <- match.arg(policy)
+  opt.in = policy == "opt-in"
+
   study.design <- match.arg(study.design)
 
   if(!missing(population.prevalence)) {
@@ -155,10 +161,14 @@ decision_curve <- function(formula,
 
   n.out <- length(predictors)*length(thresholds)
   dc.data <- data.frame("thresholds" = numeric(n.out),
-                    "FPR" = numeric(n.out),"TPR" = numeric(n.out),
+                    "FPR" = numeric(n.out), "FNR" = numeric(n.out),
+                    "TPR" = numeric(n.out),"TNR" = numeric(n.out),
                     "NB" = numeric(n.out), "sNB" = numeric(n.out),
-                    "rho" = numeric(n.out),"prob.high.risk" = numeric(n.out),
+                    "rho" = numeric(n.out),
+                    "prob.high.risk" = numeric(n.out),
+                    "prob.low.risk" = numeric(n.out),
                     "DP" = numeric(n.out),
+                    "nonDP" = numeric(n.out),
                     "model"= numeric(n.out))
 
 
@@ -201,7 +211,8 @@ decision_curve <- function(formula,
                               family = family,
                               data = data,
                               formula.ind = formula.ind[i],
-                              casecontrol.rho = population.prevalence)
+                              casecontrol.rho = population.prevalence,
+                              opt.in = opt.in)
 
     tmpNBdata$model <- predictor.names[[i]]
 
@@ -220,7 +231,8 @@ decision_curve <- function(formula,
                    family = family,
                    data = data[x,],
                    formula.ind = formula.ind[i],
-                   casecontrol.rho = population.prevalence)})
+                   casecontrol.rho = population.prevalence,
+                   opt.in = opt.in)})
 
       alpha = 1- confidence.intervals
 
@@ -246,7 +258,7 @@ decision_curve <- function(formula,
   #no ci's but we need to fill in the data.frame anyway
   if(!is.numeric(confidence.intervals)){dc.data <- add.ci.columns(dc.data)}
 
-  dc.data$cost.benefit.ratio <- as.character(fractions(threshold_to_costbenefit(dc.data$thresholds)))
+  dc.data$cost.benefit.ratio <- as.character(fractions(threshold_to_costbenefit(dc.data$thresholds, policy)))
   #find indices without a fraction and make them "xx/1"
   add.dash1 <-  which(!is.element(1:nrow(dc.data), grep("/", dc.data$cost.benefit.ratio)))
   dc.data$cost.benefit.ratio[add.dash1] <- paste(dc.data$cost.benefit.ratio[add.dash1], "/1", sep = "")
@@ -257,6 +269,7 @@ decision_curve <- function(formula,
   #return list of elements
   out <- list("derived.data"  = dc.data,
               "confidence.intervals" = confidence.intervals,
+              "policy" = policy,
               "call" = call)
   class(out) = "decision_curve"
   invisible(out)

diff --git a/R/plot_functions_main.R b/R/plot_functions_main.R
@@ -82,8 +82,8 @@ plot_decision_curve <- function(x, curve.names,
                                col,
                                lty, lwd = 2,
                                xlim, ylim,
-                               xlab = "Risk Threshold", ylab,
-                               cost.benefit.xlab = "Cost:Benefit Ratio",
+                               xlab, ylab,
+                               cost.benefit.xlab,
                                legend.position = c("topright", "right", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "none"),
                                ...){
 
@@ -116,14 +116,15 @@ plot_decision_curve <- function(x, curve.names,
   if(length(lwd) == length(predictors)) lwd = c(lwd, 1, 1)
 
   if(missing(ylab)) ylab <- ifelse(standardize, "Standardized Net Benefit", "Net Benefit")
-
+  policy = ifelse(class(x)=="decision_curve", x$policy, x[[1]]$policy)
+  if(missing(xlab)) xlab <- ifelse(policy  == 'opt-in', "High Risk Threshold", "Low Risk Threshold")
+  if(missing(cost.benefit.xlab)) cost.benefit.xlab <- ifelse(policy == "opt-in", "Opt-in Cost:Benefit Ratio", "Opt-out Cost:Benefit Ratio")
     if(missing(ylim)){
 
     if(standardize) ylim = c(-1, 1)
     else ylim = c(-0.05, 1.1*max(dc.data[["NB"]][is.finite(dc.data[["NB"]])]))
 
   }
-
   plot_generic(xx = dc.data,
                predictors = predictors,
                value = ifelse(standardize, "sNB", "NB"),
@@ -138,7 +139,9 @@ plot_decision_curve <- function(x, curve.names,
                col = col,
                lty = lty, lwd = lwd,
                xlim = xlim, ylim = ylim,
-               legend.position = legend.position, ...)
+               legend.position = legend.position,
+               policy = policy,
+               ...)
 
 }
 
@@ -236,6 +239,7 @@ plot_roc_components <- function(x,
                lty = lty.tpr, lwd = lwd,
                xlim = xlim, ylim = ylim,
                legend.position = "none",
+               policy = x$policy,
                ...)
 
   plot_generic(xx = dc.data,
@@ -256,6 +260,7 @@ plot_roc_components <- function(x,
                lty.fpr = lty.fpr,
                lty.tpr = lty.tpr,
                tpr.fpr.legend = TRUE,
+               policy = x$policy,
                ...)
 
 
@@ -306,7 +311,7 @@ plot_clinical_impact <- function(x,
                             lty = 1,
                             lwd = 2,
                             xlim, ylim,
-                            xlab = "Risk Threshold", ylab,
+                            xlab, ylab,
                             cost.benefit.xlab = "Cost:Benefit Ratio",
                             legend.position = c("topright", "right", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "none"),
                             ...){
@@ -336,13 +341,21 @@ plot_clinical_impact <- function(x,
   if(length(lwd) ==1) lwd <- rep(lwd, length(predictors))
   if(length(lwd) == length(predictors)) lwd = c(lwd, 1, 1)
 
-  if(missing(ylab)) ylab <-  paste("Number high risk (out of ", population.size,  ")", sep = "")
+  if(missing(ylab)) {
+    if(x$policy == "opt-in") {
+      ylab <-  paste("Number high risk (out of ", population.size,  ")", sep = "")
+    }else if(x$policy == "opt-out"){
+        ylab <- paste("Number low risk (out of ", population.size,  ")", sep = "")
+    }
+  }
+  if(missing(xlab)) xlab <- ifelse(x$policy == 'opt-in', "High Risk Threshold", "Low Risk Threshold")
+
 
   if(missing(ylim)) ylim = c(0, population.size*1.05)
 
   plot_generic(xx = dc.data,
                predictors = predictors,
-               value = "prob.high.risk",
+               value = ifelse(x$policy == "opt-in", "prob.high.risk", "prob.low.risk"),
                plotNew = TRUE,
                standardize = FALSE,
                confidence.intervals,
@@ -356,11 +369,12 @@ plot_clinical_impact <- function(x,
                xlim = xlim, ylim = ylim,
                legend.position = "none",
                population.size = population.size,
+               policy = x$policy,
                ...) #add my own legend
 
   plot_generic(xx = dc.data,
                predictors = predictors,
-               value = "DP",
+               value = ifelse(x$policy == "opt-in", "DP", "nonDP"),
                plotNew = FALSE,
                standardize = FALSE,
                confidence.intervals,
@@ -376,8 +390,10 @@ plot_clinical_impact <- function(x,
                lty.fpr = 0,
                lty.tpr = 0,
                tpr.fpr.legend = FALSE,
-               impact.legend = TRUE,
+               impact.legend = (x$policy == "opt-in"),
+               impact.legend.2 = (x$policy == "opt-out"),
                population.size = population.size,
+               policy = x$policy,
                ...)
 
 }
diff --git a/R/plot_functions_sub.R b/R/plot_functions_sub.R
@@ -29,7 +29,8 @@ preparePlotData   <- function(x, curve.names, confidence.intervals){
     if(!all(sapply(x, FUN = function(xx) class(xx) == "decision_curve")) ){
       stop("One or more elements of the list provided is not an object of class 'decision_curve' (output from the function 'decision_curve').")
     }
-
+    policy.vec <- sapply(x, FUN = function(x) x$policy)
+    if(length(unique(policy.vec))>1) stop("Comparing decision curves with different opt-in/opt-out policies is not valid.")
     if(is.na(confidence.intervals[1])){
 
       ci.list <- sapply(x, FUN = function(x) x$confidence.intervals)
@@ -109,7 +110,9 @@ plot_generic<- function(xx, predictors, value, plotNew,
                         lty.fpr = 2, lty.tpr = 1,
                         tpr.fpr.legend = FALSE,
                         impact.legend = FALSE,
-                        population.size = 1000, ...){
+                        impact.legend.2 = FALSE,
+                        population.size = 1000,
+                        policy = policy, ...){
 ## xx is output from get_DecisionCurve,
 ## others are directly from the function call
 
@@ -174,7 +177,7 @@ plot_generic<- function(xx, predictors, value, plotNew,
   }
 
   #the clinical impact plots are on a different scale
-  if(is.element(value, c("DP" , "prob.high.risk"))){
+  if(is.element(value, c("DP" ,"nonDP", "prob.high.risk", "prob.low.risk"))){
     #population.size
     ps <- population.size
   }else{
@@ -185,8 +188,8 @@ plot_generic<- function(xx, predictors, value, plotNew,
   for(i in 1:length(predictors)){
     #plot ci's if asked for
 
-    j <- ifelse(is.element(value, c("TPR", "prob.high.risk")), 1, i)
-    j <- ifelse(is.element(value, c("FPR", "DP")), 2, i)
+    j <- ifelse(is.element(value, c("TPR", "prob.high.risk", "prob.low.risk")), 1, i)
+    j <- ifelse(is.element(value, c("FPR", "DP", "nonDP")), 2, i)
     if(is.numeric(confidence.intervals)){
       #get rid of cases missing for that predictor; this sometimes
        #happens due to different thresholds for each predictor
@@ -229,18 +232,26 @@ plot_generic<- function(xx, predictors, value, plotNew,
       legend(legend.position,
              lty = c( 1, 2),
              col = col, bg  = "white",
-             lwd = lwd, legend = c("Number high risk", "Number high risk with outcome"))
+             lwd = lwd, legend = c("Number high risk", "Number high risk with event"))
+
+    }else if(impact.legend.2){
+      legend(legend.position,
+             lty = c( 1, 2),
+             col = col, bg  = "white",
+             lwd = lwd, legend = c("Number low risk", "Number low risk without event"))
 
     }
 
   }
 
   #add cost benefit axis if wanted
   if(cost.benefit.axis){
+
     tmp <- Add_CostBenefit_Axis(xlim = xlim,
                                 cost.benefits = cost.benefits,
                                 n.cost.benefits = n.cost.benefits,
-                                line = 4)
+                                line = 4,
+                                policy = policy)
     mtext(xlab, 1, 2.2)
     mtext(cost.benefit.xlab, side = 1, 6.1)
   }else{