Added confidence interval calculation and a new function to get inter…

…-rater reliability
ma-kopka · Apr 16, 2024 · 157f11b · 157f11b
1 parent e452db2
commit 157f11b
Show file tree

Hide file tree

Showing 22 changed files with 544 additions and 44 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 0.1.0
-Date: 2024-01-10 10:22:24 UTC
-SHA: 64305e06ddc8f4ece7b7162e9a7f7172ec707543
+Version: 0.1.1
+Date: 2024-01-11 10:07:55 UTC
+SHA: af460d5dcc431afccd73040706420df0e4df889f
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: symptomcheckR
 Title: Analyzing and Visualizing Symptom Checker Performance
-Version: 0.1.1
+Version: 0.1.2
 Authors@R: c(
     person("Marvin", "Kopka", , "marvin.kopka@tu-berlin.de", role = c("cre", "aut"),
            comment = c(ORCID = "0000-0003-3848-1471")),
@@ -27,11 +27,14 @@ Imports:
     dplyr (>= 1.0.0),
     ggplot2 (>= 3.2.0),
     ggpubr (>= 0.6.0),
-    tidyr (>= 1.3.0)
+    tidyr (>= 1.3.0),
+    irr (>= 0.84.1)
 Suggests: 
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0)
 Encoding: UTF-8
 Language: en-US
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(get_ccs)
 export(get_ccs_by_triage)
 export(get_comprehensiveness)
 export(get_inclination_to_overtriage)
+export(get_irr)
 export(get_item_difficulty)
 export(get_safety_of_advice)
 export(plot_accuracy)
@@ -19,6 +20,7 @@ export(plot_safety_of_advice)
 import(dplyr)
 import(ggplot2)
 import(ggpubr)
+import(irr)
 import(tidyr)
 importFrom(stats,na.omit)
 importFrom(stats,setNames)
diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,12 @@
 # symptomcheckR 0.1.1
 
-* Revised CRAN submission.
+-   The package now includes confidence intervals for all metrics. These can be obtained by setting the paramter CI == TRUE.
+-   The package now contains get_irr() to calculate the inter-rater reliability if multiple raters were involved in the evaluation.
 
+# symptomcheckR 0.1.1
+
+-   Revised CRAN submission.
 
 # symptomcheckR 0.1.0
 
-* Initial CRAN submission.
+-   Initial CRAN submission.
diff --git a/R/.DS_Store b/R/.DS_Store
diff --git a/R/get_accuracy.R b/R/get_accuracy.R
@@ -5,19 +5,23 @@
 #' @param data A dataframe
 #' @param correct A string indicating the column name storing if the symptom checker solved the case (TRUE or FALSE)
 #' @param apps A string indicating the column name storing the app names (optional)
+#' @param CI A Boolean (TRUE or FALSE) indicating whether 95\% confidence intervals should be output (optional)
 #'
 #' @return A data frame object containing the accuracy of the symptom checker or the accuracy of multiple symptom checkers. Use the apps argument to calculate this metric for multiple symptom checkers.
 #' @examples
 #' data(symptomcheckRdata)
 #' accuracy <- get_accuracy(
 #'   data = symptomcheckRdata,
 #'   correct = "Correct_Triage_Advice_provided_from_app",
-#'   apps = "App_name"
+#'   apps = "App_name",
+#'   CI = TRUE
 #'   )
 #' @export
 #' @import dplyr
 
-get_accuracy <- function(data, correct, apps = NULL) {
+get_accuracy <- function(data, correct, apps = NULL, CI = FALSE) {
+  accuracy <- NULL
+  qnorm <- NULL
   # Code input for handling with dplyr
   correct_sym <- sym(correct)
 
@@ -49,16 +53,59 @@ get_accuracy <- function(data, correct, apps = NULL) {
       message("Only one app provided, calculating accuracy for this app")
     }
     # Calculate accuracy for each symptom checker
+    if (CI == TRUE) {
+      output <- data %>%
+        group_by(!!apps_sym) %>%
+        summarise(accuracy = mean(!!correct_sym, na.rm = TRUE),
+                  n = n()) %>%
+        rowwise() %>%
+        mutate(
+          lower_ci = ifelse(CI, accuracy - qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_),
+          upper_ci = ifelse(CI, accuracy + qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_)
+        ) %>%
+        mutate(lower_ci = case_when(
+          lower_ci < 0 ~ 0,
+          lower_ci > 1 ~ 1,
+          TRUE ~ lower_ci),
+          upper_ci = case_when(
+            upper_ci < 0 ~ 0,
+            upper_ci > 1 ~ 1,
+            TRUE ~ lower_ci)) %>%
+        select(-n)
+
+    } else {
     output <- data %>%
       group_by(!!apps_sym) %>%
       summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+    }
   } else {
     # Output message that accuracy is calculated across dataset
     message("No apps vector specified, calculating accuracy across the entire dataset.")
 
     # Calculate accuracy
+    if (CI == TRUE) {
+      output <- data %>%
+        summarise(accuracy = mean(!!correct_sym, na.rm = TRUE),
+                  n = n()) %>%
+        rowwise() %>%
+        mutate(
+          lower_ci = ifelse(CI, accuracy - qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_),
+          upper_ci = ifelse(CI, accuracy + qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_)
+        ) %>%
+        mutate(lower_ci = case_when(
+          lower_ci < 0 ~ 0,
+          lower_ci > 1 ~ 1,
+          TRUE ~ lower_ci),
+          upper_ci = case_when(
+            upper_ci < 0 ~ 0,
+            upper_ci > 1 ~ 1,
+            TRUE ~ lower_ci)) %>%
+        select(-n)
+
+    } else {
     output <- data %>%
       summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+    }
   }
 
   return(output)

diff --git a/R/get_accuracy_by_triage.R b/R/get_accuracy_by_triage.R
@@ -6,6 +6,7 @@
 #' @param correct A string indicating the column name storing if the symptom checker solved the case (TRUE or FALSE)
 #' @param triagelevel A string indicating the column name storing the correct triage solutions
 #' @param apps A string indicating the column name storing the app names (optional)
+#' @param CI A Boolean (TRUE or FALSE) indicating whether 95\% confidence intervals should be output (optional)
 #'
 #' @return A data frame object containing the accuracy on each triage level (of one or multiple symptom checkers) or the accuracy of multiple symptom checkers. Use the apps argument to calculate this metric for multiple symptom checkers.
 #' @examples
@@ -14,14 +15,17 @@
 #'   data = symptomcheckRdata,
 #'   correct = "Correct_Triage_Advice_provided_from_app",
 #'   triagelevel = "Goldstandard_solution",
-#'   apps = "App_name"
+#'   apps = "App_name",
+#'   CI = TRUE
 #'   )
 #' @export
 #' @import dplyr
 
-get_accuracy_by_triage <- function(data, correct, triagelevel,  apps = NULL) {
+get_accuracy_by_triage <- function(data, correct, triagelevel,  apps = NULL, CI = FALSE) {
   item_difficulty <- NULL
   ccs <- NULL
+  accuracy <- NULL
+  qnorm <- NULL
   # Code input for handling with dplyr
   correct_sym <- sym(correct)
   triagelevel_sym <- sym(triagelevel)
@@ -58,22 +62,68 @@ get_accuracy_by_triage <- function(data, correct, triagelevel,  apps = NULL) {
       message("Only one app provided, calculating accuracy for this app")
     }
 
+    if (CI == TRUE) {
     # Group by triage level and calculate accuracy for each symptom checker
     output <- data %>%
       filter(!is.na(!!triagelevel_sym)) %>%
       filter(!is.na(!!correct_sym)) %>%
       group_by(!!apps_sym, !!triagelevel_sym) %>%
-      summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+      summarise(accuracy = mean(!!correct_sym, na.rm = TRUE),
+                n = n()) %>%
+      rowwise() %>%
+      mutate(
+        lower_ci = ifelse(CI, accuracy - qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_),
+        upper_ci = ifelse(CI, accuracy + qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_)
+      ) %>%
+      mutate(lower_ci = case_when(
+        lower_ci < 0 ~ 0,
+        lower_ci > 1 ~ 1,
+        TRUE ~ lower_ci),
+        upper_ci = case_when(
+          upper_ci < 0 ~ 0,
+          upper_ci > 1 ~ 1,
+          TRUE ~ lower_ci)) %>%
+      select(-n)
+    } else {
+      output <- data %>%
+        filter(!is.na(!!triagelevel_sym)) %>%
+        filter(!is.na(!!correct_sym)) %>%
+        group_by(!!apps_sym, !!triagelevel_sym) %>%
+        summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+    }
   } else {
     # Output message that accuracy is calculated across dataset
     message("No apps vector specified, calculating accuracy across the entire dataset.")
+    if (CI == TRUE) {
+      output <- data %>%
+        filter(!is.na(!!triagelevel_sym)) %>%
+        filter(!is.na(!!correct_sym)) %>%
+        group_by(!!triagelevel_sym) %>%
+        summarise(accuracy = mean(!!correct_sym, na.rm = TRUE),
+                  n = n()) %>%
+        rowwise() %>%
+        mutate(
+          lower_ci = ifelse(CI, accuracy - qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_),
+          upper_ci = ifelse(CI, accuracy + qnorm(0.975) * sqrt((accuracy * (1 - accuracy))/n), NA_real_)
+        ) %>%
+        mutate(lower_ci = case_when(
+          lower_ci < 0 ~ 0,
+          lower_ci > 1 ~ 1,
+          TRUE ~ lower_ci),
+          upper_ci = case_when(
+            upper_ci < 0 ~ 0,
+            upper_ci > 1 ~ 1,
+            TRUE ~ lower_ci)) %>%
+        select(-n)
 
+    } else {
     # Group by triage level and calculate accuracy
-    output <- data %>%
-      filter(!is.na(!!triagelevel_sym)) %>%
-      filter(!is.na(!!correct_sym)) %>%
-      group_by(!!triagelevel_sym) %>%
-      summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+      output <- data %>%
+        filter(!is.na(!!triagelevel_sym)) %>%
+        filter(!is.na(!!correct_sym)) %>%
+        group_by(!!triagelevel_sym) %>%
+        summarise(accuracy = mean(!!correct_sym, na.rm = TRUE))
+    }
   }
 
   return(output)

diff --git a/R/get_comprehensiveness.R b/R/get_comprehensiveness.R
@@ -6,6 +6,7 @@
 #' @param triagelevel_advice A string indicating the column name storing the recommendation of a symptom checker for a case
 #' @param vector_not_entered A vector indicating the values in which missing values are coded (e.g., as NA or a specified value such as -99)
 #' @param apps A string indicating the column name storing the app names
+#' @param CI A Boolean (TRUE or FALSE) indicating whether 95\% confidence intervals should be output (optional)
 #'
 #' @return A list containing both a raw number and the percentage of comprehensiveness for one or multiple symptom checkers
 #' @examples
@@ -14,14 +15,19 @@
 #'   data = symptomcheckRdata,
 #'   triagelevel_advice = "Triage_advice_from_app",
 #'   vector_not_entered = c(NA),
-#'   apps = "App_name"
+#'   apps = "App_name",
+#'   CI = TRUE
 #'   )
 #'
 #' @export
 #' @import dplyr
 
-get_comprehensiveness <- function(data, triagelevel_advice, vector_not_entered, apps = NULL) {
+get_comprehensiveness <- function(data, triagelevel_advice, vector_not_entered, apps = NULL, CI = FALSE) {
   gave_advice <- NULL
+  comprehensiveness_percentage <- NULL
+  qnorm <- NULL
+  lower_ci <- NULL
+  upper_ci <- NULL
   # Handle errors
   if (!is.data.frame(data)) {
     stop("The first argument must be a data frame.")
@@ -69,13 +75,42 @@ get_comprehensiveness <- function(data, triagelevel_advice, vector_not_entered,
       ungroup()
 
     # Calculate percentage of comprehensiveness
+    if (CI == TRUE) {
     percentage <- data %>%
       group_by(!!apps_sym, gave_advice) %>%
       count() %>%
       ungroup() %>%
       group_by(!!apps_sym) %>%
-      summarise(comprehensiveness_percentage = sum(n[gave_advice == "Advice given"]) / sum(n) * 100) %>%
-      ungroup()
+      summarise(comprehensiveness_percentage = sum(n[gave_advice == "Advice given"]) / sum(n),
+                n = sum(n)) %>%
+      ungroup() %>%
+      rowwise() %>%
+      mutate(
+        lower_ci = ifelse(CI, comprehensiveness_percentage - qnorm(0.975) * sqrt((comprehensiveness_percentage * (1 - comprehensiveness_percentage))/n), NA_real_),
+        upper_ci = ifelse(CI, comprehensiveness_percentage + qnorm(0.975) * sqrt((comprehensiveness_percentage * (1 - comprehensiveness_percentage))/n), NA_real_)
+      ) %>%
+      mutate(comprehensiveness_percentage = comprehensiveness_percentage*100,
+             lower_ci = lower_ci*100,
+             upper_ci = upper_ci*100) %>%
+      mutate(lower_ci = case_when(
+        lower_ci < 0 ~ 0,
+        lower_ci > 100 ~ 100,
+        TRUE ~ lower_ci),
+        upper_ci = case_when(
+          upper_ci < 0 ~ 0,
+          upper_ci > 100 ~ 100,
+          TRUE ~ lower_ci)) %>%
+      select(-n)
+
+    } else {
+      percentage <- data %>%
+        group_by(!!apps_sym, gave_advice) %>%
+        count() %>%
+        ungroup() %>%
+        group_by(!!apps_sym) %>%
+        summarise(comprehensiveness_percentage = sum(n[gave_advice == "Advice given"]) / sum(n) * 100) %>%
+        ungroup()
+    }
 
   } else {
     # Code missing values accordingly
@@ -91,11 +126,38 @@ get_comprehensiveness <- function(data, triagelevel_advice, vector_not_entered,
       count()
 
     # Calculate percentage of comprehensiveness
+    if (CI == TRUE) {
+      percentage <- data %>%
+        group_by(gave_advice) %>%
+        count() %>%
+        ungroup() %>%
+        summarise(comprehensiveness_percentage = sum(n[gave_advice == "Advice given"]) / sum(n),
+                  n = sum(n)) %>%
+        ungroup() %>%
+        rowwise() %>%
+        mutate(
+          lower_ci = ifelse(CI, comprehensiveness_percentage - qnorm(0.975) * sqrt((comprehensiveness_percentage * (1 - comprehensiveness_percentage))/n), NA_real_),
+          upper_ci = ifelse(CI, comprehensiveness_percentage + qnorm(0.975) * sqrt((comprehensiveness_percentage * (1 - comprehensiveness_percentage))/n), NA_real_)
+        ) %>%
+        mutate(comprehensiveness_percentage = comprehensiveness_percentage*100,
+               lower_ci = lower_ci*100,
+               upper_ci = upper_ci*100) %>%
+        mutate(lower_ci = case_when(
+          lower_ci < 0 ~ 0,
+          lower_ci > 100 ~ 100,
+          TRUE ~ lower_ci),
+          upper_ci = case_when(
+            upper_ci < 0 ~ 0,
+            upper_ci > 100 ~ 100,
+            TRUE ~ lower_ci)) %>%
+        select(-n)
+    } else {
     percentage <- data %>%
       group_by(gave_advice) %>%
       count() %>%
       ungroup() %>%
       summarise(comprehensiveness_percentage = sum(n[gave_advice == "Advice given"]) / sum(n) * 100)
+    }
 
   }
   # Output raw numbers and percentage as a list