Skip to content

Commit

Permalink
add 'opt-out'
Browse files Browse the repository at this point in the history
  • Loading branch information
mdbrown committed Jul 10, 2017
1 parent 84bac52 commit 60f77a2
Show file tree
Hide file tree
Showing 11 changed files with 527 additions and 186 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DecisionCurve
Type: Package
Title: Calculate and Plot Decision Curves
Version: 1.3
Date: 2016-08-11
Version: 1.4
Date: 2017-07-10
Author: Marshall Brown
Maintainer: Marshall Brown <mdbrown@fredhutch.org>
Description: Decision curves are a useful tool to evaluate the population impact
Expand Down
19 changes: 16 additions & 3 deletions R/Add_CostBenefit_Axis.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,25 @@
Add_CostBenefit_Axis <- function(xlim,
cost.benefits,
n.cost.benefits = 6,
line = 4, ...){
line = 4,
policy, ...){

if(missing(cost.benefits)){
#no cost.benefits are given, so we must choose them (wisely) from available defaults
#listed here


tmp <- c( 1/2, 1/3, 1/4, 2/3, 3/4, 1/5, 1/10, 2/5, 3/5, 4/5, 7/10, 9/10, 5/6, 1/15, 1/20, 1/25, 1/30, 1/40, 1/50, 1/60, 1/75, 1/80, 1/100)
dd <- data.frame( "name" = c("1:1", "1:2", "1:3", "1:4", "2:3", "3:4", "1:5", "1:10", "2:5", "3:5", "4:5", "7:10", "9:10", "5:6", "1:15", "1:20", "1:25", "1:30", "1:40", "1:50", "1:60", "1:75","1:80", "1:100",
"2:1", "3:1", "4:1", "3:2", "4:3", "5:1", "10:1", "5:2", "5:3", "5:4", "10:7", "10:9", "6:5", "15:1", "20:1", "25:1", "30:1", "40:1", "50:1", "60:1","75:1", "80:1", "100:1"),
"value" = c(1/1, tmp, 1/tmp))
dd$threshold <- dd$value/(1+dd$value)

if(policy == "opt-in"){
dd$threshold <- dd$value/(1+dd$value)
}else{
dd$threshold <- (1/(1+dd$value)) ##need to ask about this
}

dd <- dd[order(dd$threshold),]
threshold <- NULL
dd.sub <- subset(dd, threshold >= min(xlim) & threshold <= max(xlim))
Expand Down Expand Up @@ -62,7 +71,11 @@ Add_CostBenefit_Axis <- function(xlim,
B <- sapply(CB.split, function(x) as.numeric(x[2]))

dd$value = C/B
dd$threshold <- dd$value/(1+dd$value)
if(policy == "opt-in"){
dd$threshold <- dd$value/(1+dd$value)
}else{
dd$threshold <- (1/(1+dd$value))
}
dd <- dd[order(dd$threshold),]
dd.sub <- subset(dd, threshold >= min(xlim) & threshold <= max(xlim))
#check to make sure there are default choices within xlim
Expand Down
19 changes: 12 additions & 7 deletions R/cv_decision_curve.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ cv_decision_curve <- function(formula,
thresholds = seq(0, 1, by = .01),
folds = 5,
study.design = c("cohort", "case-control"),
population.prevalence){
population.prevalence,
policy = c("opt-in", "opt-out")){
call <- match.call()

stopifnot(is.numeric(folds))
Expand All @@ -55,7 +56,7 @@ cv_decision_curve <- function(formula,
if(any( names.check <- !is.element(all.vars(formula), names(data)))) stop(paste("variable(s)", paste( all.vars(formula)[names.check], collapse = ", ") , "not found in 'data'"))

study.design <- match.arg(study.design)

policy = match.arg(policy)
#throw out missing data
data <- data[,all.vars(formula)]
#complete case indicator
Expand Down Expand Up @@ -89,8 +90,10 @@ cv_decision_curve <- function(formula,
thresholds = thresholds,
confidence.intervals = "none",
study.design = study.design,
population.prevalence = population.prevalence)$derived.data
out$derived.data[, 2:8] <- 0
population.prevalence = population.prevalence,
policy =policy)$derived.data

out$derived.data[, 2:10] <- 0

for(kk in 1:folds){

Expand All @@ -117,23 +120,25 @@ cv_decision_curve <- function(formula,
#add measures for each fold to the first 8 columns, bc these are the
#the only numeric estimates.
#we divide by number of folds later
out$derived.data[,2:8] <- out$derived.data[,2:8] +
out$derived.data[,2:10] <- out$derived.data[,2:10] +
decision_curve(formula = outcome~risk.hat,
data = dat.cv,
family = family,
fitted.risk = TRUE,
thresholds = thresholds,
confidence.intervals = "none",
study.design = study.design,
population.prevalence = population.prevalence)$derived.data[,2:8]
population.prevalence = population.prevalence,
policy = policy)$derived.data[,2:10]
}
#take the mean across folds as estimates
out$derived.data[,2:8] <- out$derived.data[,2:8]/folds
out$derived.data[,2:10] <- out$derived.data[,2:10]/folds


#return list of elements
out$call <- call
out$folds <- folds
out$policy = policy
out$confidence.intervals <- 'none'

class(out) = "decision_curve"
Expand Down
23 changes: 18 additions & 5 deletions R/decision_curve.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @param formula an object of class 'formula' of the form outcome ~ predictors, giving the prediction model to be fitted using glm. The outcome must be a binary variable that equals '1' for cases and '0' for controls.
#' @param data data.frame containing outcome and predictors. Missing data on any of the predictors will cause the entire observation to be removed.
#' @param family a description of the error distribution and link function to pass to 'glm' used for model fitting. Defaults to binomial(link = "logit") for logistic regression.
#' @param policy Either "opt-in" (default) or "opt-out", describing the type of policy for which to report the net benefit. An policy is "opt-in" (default) when the standard of care is to treat no-one, and the model is used to recommend high risk patients to 'opt-in' for a treatment. Alternatively, an "opt-out" policy is a policy for which the standard of care is to treat everyone in the population and the model is used to discover low risk patients to 'opt-out' of treatment.
#' @param fitted.risk logical (default FALSE) indicating whether the predictor provided are estimated risks from an already established model. If set to TRUE, no model fitting will be done and all estimates will be conditional on the risks provided. Risks must fall between 0 and 1.
#' @param thresholds Numeric vector of high risk thresholds to use when plotting and calculating net benefit values.
#' @param confidence.intervals Numeric (default 0.95 for 95\% confidence bands) level of bootstrap confidence intervals to plot. Set as NA or 'none' to remove confidence intervals. See details for more information.
Expand Down Expand Up @@ -63,6 +64,7 @@
decision_curve <- function(formula,
data,
family = binomial(link = "logit"),
policy = c("opt-in", "opt-out"),
fitted.risk = FALSE,
thresholds = seq(0, 1, by = .01),
confidence.intervals = 0.95,
Expand All @@ -76,10 +78,14 @@ decision_curve <- function(formula,
stopifnot(class(formula) == "formula") #check formula
stopifnot(is.data.frame(data)) #check data
stopifnot(is.logical(fitted.risk))

stopifnot(is.numeric(thresholds))
stopifnot(all(thresholds >= 0)); stopifnot(all(thresholds <= 1));
if(is.numeric(confidence.intervals)) stopifnot(confidence.intervals > 0 & confidence.intervals < 1)
stopifnot(is.numeric(bootstraps))
policy <- match.arg(policy)
opt.in = policy == "opt-in"

study.design <- match.arg(study.design)

if(!missing(population.prevalence)) {
Expand Down Expand Up @@ -155,10 +161,14 @@ decision_curve <- function(formula,

n.out <- length(predictors)*length(thresholds)
dc.data <- data.frame("thresholds" = numeric(n.out),
"FPR" = numeric(n.out),"TPR" = numeric(n.out),
"FPR" = numeric(n.out), "FNR" = numeric(n.out),
"TPR" = numeric(n.out),"TNR" = numeric(n.out),
"NB" = numeric(n.out), "sNB" = numeric(n.out),
"rho" = numeric(n.out),"prob.high.risk" = numeric(n.out),
"rho" = numeric(n.out),
"prob.high.risk" = numeric(n.out),
"prob.low.risk" = numeric(n.out),
"DP" = numeric(n.out),
"nonDP" = numeric(n.out),
"model"= numeric(n.out))


Expand Down Expand Up @@ -201,7 +211,8 @@ decision_curve <- function(formula,
family = family,
data = data,
formula.ind = formula.ind[i],
casecontrol.rho = population.prevalence)
casecontrol.rho = population.prevalence,
opt.in = opt.in)

tmpNBdata$model <- predictor.names[[i]]

Expand All @@ -220,7 +231,8 @@ decision_curve <- function(formula,
family = family,
data = data[x,],
formula.ind = formula.ind[i],
casecontrol.rho = population.prevalence)})
casecontrol.rho = population.prevalence,
opt.in = opt.in)})

alpha = 1- confidence.intervals

Expand All @@ -246,7 +258,7 @@ decision_curve <- function(formula,
#no ci's but we need to fill in the data.frame anyway
if(!is.numeric(confidence.intervals)){dc.data <- add.ci.columns(dc.data)}

dc.data$cost.benefit.ratio <- as.character(fractions(threshold_to_costbenefit(dc.data$thresholds)))
dc.data$cost.benefit.ratio <- as.character(fractions(threshold_to_costbenefit(dc.data$thresholds, policy)))
#find indices without a fraction and make them "xx/1"
add.dash1 <- which(!is.element(1:nrow(dc.data), grep("/", dc.data$cost.benefit.ratio)))
dc.data$cost.benefit.ratio[add.dash1] <- paste(dc.data$cost.benefit.ratio[add.dash1], "/1", sep = "")
Expand All @@ -257,6 +269,7 @@ decision_curve <- function(formula,
#return list of elements
out <- list("derived.data" = dc.data,
"confidence.intervals" = confidence.intervals,
"policy" = policy,
"call" = call)
class(out) = "decision_curve"
invisible(out)
Expand Down
36 changes: 26 additions & 10 deletions R/plot_functions_main.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ plot_decision_curve <- function(x, curve.names,
col,
lty, lwd = 2,
xlim, ylim,
xlab = "Risk Threshold", ylab,
cost.benefit.xlab = "Cost:Benefit Ratio",
xlab, ylab,
cost.benefit.xlab,
legend.position = c("topright", "right", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "none"),
...){

Expand Down Expand Up @@ -116,14 +116,15 @@ plot_decision_curve <- function(x, curve.names,
if(length(lwd) == length(predictors)) lwd = c(lwd, 1, 1)

if(missing(ylab)) ylab <- ifelse(standardize, "Standardized Net Benefit", "Net Benefit")

policy = ifelse(class(x)=="decision_curve", x$policy, x[[1]]$policy)
if(missing(xlab)) xlab <- ifelse(policy == 'opt-in', "High Risk Threshold", "Low Risk Threshold")
if(missing(cost.benefit.xlab)) cost.benefit.xlab <- ifelse(policy == "opt-in", "Opt-in Cost:Benefit Ratio", "Opt-out Cost:Benefit Ratio")
if(missing(ylim)){

if(standardize) ylim = c(-1, 1)
else ylim = c(-0.05, 1.1*max(dc.data[["NB"]][is.finite(dc.data[["NB"]])]))

}

plot_generic(xx = dc.data,
predictors = predictors,
value = ifelse(standardize, "sNB", "NB"),
Expand All @@ -138,7 +139,9 @@ plot_decision_curve <- function(x, curve.names,
col = col,
lty = lty, lwd = lwd,
xlim = xlim, ylim = ylim,
legend.position = legend.position, ...)
legend.position = legend.position,
policy = policy,
...)

}

Expand Down Expand Up @@ -236,6 +239,7 @@ plot_roc_components <- function(x,
lty = lty.tpr, lwd = lwd,
xlim = xlim, ylim = ylim,
legend.position = "none",
policy = x$policy,
...)

plot_generic(xx = dc.data,
Expand All @@ -256,6 +260,7 @@ plot_roc_components <- function(x,
lty.fpr = lty.fpr,
lty.tpr = lty.tpr,
tpr.fpr.legend = TRUE,
policy = x$policy,
...)


Expand Down Expand Up @@ -306,7 +311,7 @@ plot_clinical_impact <- function(x,
lty = 1,
lwd = 2,
xlim, ylim,
xlab = "Risk Threshold", ylab,
xlab, ylab,
cost.benefit.xlab = "Cost:Benefit Ratio",
legend.position = c("topright", "right", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "none"),
...){
Expand Down Expand Up @@ -336,13 +341,21 @@ plot_clinical_impact <- function(x,
if(length(lwd) ==1) lwd <- rep(lwd, length(predictors))
if(length(lwd) == length(predictors)) lwd = c(lwd, 1, 1)

if(missing(ylab)) ylab <- paste("Number high risk (out of ", population.size, ")", sep = "")
if(missing(ylab)) {
if(x$policy == "opt-in") {
ylab <- paste("Number high risk (out of ", population.size, ")", sep = "")
}else if(x$policy == "opt-out"){
ylab <- paste("Number low risk (out of ", population.size, ")", sep = "")
}
}
if(missing(xlab)) xlab <- ifelse(x$policy == 'opt-in', "High Risk Threshold", "Low Risk Threshold")


if(missing(ylim)) ylim = c(0, population.size*1.05)

plot_generic(xx = dc.data,
predictors = predictors,
value = "prob.high.risk",
value = ifelse(x$policy == "opt-in", "prob.high.risk", "prob.low.risk"),
plotNew = TRUE,
standardize = FALSE,
confidence.intervals,
Expand All @@ -356,11 +369,12 @@ plot_clinical_impact <- function(x,
xlim = xlim, ylim = ylim,
legend.position = "none",
population.size = population.size,
policy = x$policy,
...) #add my own legend

plot_generic(xx = dc.data,
predictors = predictors,
value = "DP",
value = ifelse(x$policy == "opt-in", "DP", "nonDP"),
plotNew = FALSE,
standardize = FALSE,
confidence.intervals,
Expand All @@ -376,8 +390,10 @@ plot_clinical_impact <- function(x,
lty.fpr = 0,
lty.tpr = 0,
tpr.fpr.legend = FALSE,
impact.legend = TRUE,
impact.legend = (x$policy == "opt-in"),
impact.legend.2 = (x$policy == "opt-out"),
population.size = population.size,
policy = x$policy,
...)

}
25 changes: 18 additions & 7 deletions R/plot_functions_sub.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ preparePlotData <- function(x, curve.names, confidence.intervals){
if(!all(sapply(x, FUN = function(xx) class(xx) == "decision_curve")) ){
stop("One or more elements of the list provided is not an object of class 'decision_curve' (output from the function 'decision_curve').")
}

policy.vec <- sapply(x, FUN = function(x) x$policy)
if(length(unique(policy.vec))>1) stop("Comparing decision curves with different opt-in/opt-out policies is not valid.")
if(is.na(confidence.intervals[1])){

ci.list <- sapply(x, FUN = function(x) x$confidence.intervals)
Expand Down Expand Up @@ -109,7 +110,9 @@ plot_generic<- function(xx, predictors, value, plotNew,
lty.fpr = 2, lty.tpr = 1,
tpr.fpr.legend = FALSE,
impact.legend = FALSE,
population.size = 1000, ...){
impact.legend.2 = FALSE,
population.size = 1000,
policy = policy, ...){
## xx is output from get_DecisionCurve,
## others are directly from the function call

Expand Down Expand Up @@ -174,7 +177,7 @@ plot_generic<- function(xx, predictors, value, plotNew,
}

#the clinical impact plots are on a different scale
if(is.element(value, c("DP" , "prob.high.risk"))){
if(is.element(value, c("DP" ,"nonDP", "prob.high.risk", "prob.low.risk"))){
#population.size
ps <- population.size
}else{
Expand All @@ -185,8 +188,8 @@ plot_generic<- function(xx, predictors, value, plotNew,
for(i in 1:length(predictors)){
#plot ci's if asked for

j <- ifelse(is.element(value, c("TPR", "prob.high.risk")), 1, i)
j <- ifelse(is.element(value, c("FPR", "DP")), 2, i)
j <- ifelse(is.element(value, c("TPR", "prob.high.risk", "prob.low.risk")), 1, i)
j <- ifelse(is.element(value, c("FPR", "DP", "nonDP")), 2, i)
if(is.numeric(confidence.intervals)){
#get rid of cases missing for that predictor; this sometimes
#happens due to different thresholds for each predictor
Expand Down Expand Up @@ -229,18 +232,26 @@ plot_generic<- function(xx, predictors, value, plotNew,
legend(legend.position,
lty = c( 1, 2),
col = col, bg = "white",
lwd = lwd, legend = c("Number high risk", "Number high risk with outcome"))
lwd = lwd, legend = c("Number high risk", "Number high risk with event"))

}else if(impact.legend.2){
legend(legend.position,
lty = c( 1, 2),
col = col, bg = "white",
lwd = lwd, legend = c("Number low risk", "Number low risk without event"))

}

}

#add cost benefit axis if wanted
if(cost.benefit.axis){

tmp <- Add_CostBenefit_Axis(xlim = xlim,
cost.benefits = cost.benefits,
n.cost.benefits = n.cost.benefits,
line = 4)
line = 4,
policy = policy)
mtext(xlab, 1, 2.2)
mtext(cost.benefit.xlab, side = 1, 6.1)
}else{
Expand Down
Loading

0 comments on commit 60f77a2

Please sign in to comment.