add note

add note to warn against resubstitution bias
mdbrown · Jul 17, 2018 · 5a0cf6c · 5a0cf6c
1 parent b2dd119
commit 5a0cf6c
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 7 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: rmda
 Type: Package
 Title: Risk Model Decision Analysis
-Version: 1.6
+Version: 1.7
 Date: 2018-05-30
 Author: Marshall Brown
 Maintainer: Marshall Brown <mdbrown@fredhutch.org>

diff --git a/NEWS b/NEWS
@@ -2,7 +2,7 @@ rmda v 1.6
 ========================
 * update default y-axis limits for sNB plots to show positive values only. 
 * print a message warning of bias due to overfitting when the same data is used to fit and evaluate a model using decision_curve. 
-* fix a bug where CI's for the policy of treating 'None' were not plotted when policy = "opt-out". 
+* fix a bug where CI's for the policy of treating 'None' were not plotted or printed using summary when policy = "opt-out". 
 
 rmda v 1.5
 ========================

diff --git a/R/decision_curve.R b/R/decision_curve.R
@@ -131,6 +131,11 @@ decision_curve <- function(formula,
 
   #retreive outcome and check
   outcome <- data[[all.vars(formula[[2]])]];
+  #extract the model name from formula
+  predictors <- c(Reduce(paste, deparse(formula[[3]])), 'All', 'None')
+  predictor.names <- c(Reduce(paste, deparse(formula)), 'All', 'None')
+
+
   if(length(unique(outcome)) != 2) stop('outcome variable is not binary (it does not take two unique values).')
   stopifnot(is.numeric(outcome))
   if(min(outcome) != 0 | max(outcome) != 1) stop('outcome variable must be binary taking on values 0 for control and 1 for case.')
@@ -145,7 +150,9 @@ decision_curve <- function(formula,
     provided.risks <-  data[[Reduce(paste, deparse(formula[[3]]))]] #get the name of the fitted risk variable from formula.
     if(min(provided.risks) < 0 | max(provided.risks) > 1) stop('When fitted.risks = TRUE, all risks provided must be between 0 and 1.')
 
-  }else{
+  }else if(length(strsplit(predictors[[1]], "+", fixed = TRUE)[[1]])  > 1) {
+
+    message("Note:  The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance. ")
    #print a message about potential bias due to overfitting when the same data is used to fit/evaluate a model.
   }
   #########
@@ -155,9 +162,6 @@ decision_curve <- function(formula,
   #calculate curves
   #first we fit the model
 
-  #extract the model name from formula
-  predictors <- c(Reduce(paste, deparse(formula[[3]])), 'All', 'None')
-  predictor.names <- c(Reduce(paste, deparse(formula)), 'All', 'None')
 
   #indicate whether we are fitting a model with a formula or not
   #the last two are FALSE since they correspond to 'all' and 'none'

diff --git a/inst/notes/tutorial.Rmd b/inst/notes/tutorial.Rmd
@@ -72,7 +72,7 @@ First we use the function `decision_curve` to create a decision curve object for
 ```{r, message=FALSE, warning = FALSE}
 set.seed(123)
 #first use rmda with the default settings (set bootstraps = 50 here to reduce computation time). 
-baseline.model <- decision_curve(Cancer~Age + Female + Smokes, #fitting a logistic model
+baseline.model <- decision_curve( Cancer~Age + Female + Smokes, #fitting a logistic model
                                 data = dcaData, 
                                 study.design = "cohort", 
                                 policy = "opt-in",  #default