updates.

nt-williams · Apr 7, 2021 · b2b5582 · b2b5582
1 parent a24b930
commit b2b5582
Show file tree

Hide file tree

Showing 4 changed files with 290 additions and 15 deletions.
diff --git a/R/results.R b/R/results.R
@@ -1,26 +1,38 @@
 read_results <- function(context, regex) {
   files <- find_files(context, regex)
-  out <- purrr::map(files, function(file) {
+  out <- purrr::map_dfr(files, function(file) {
     x <- readRDS(file)
     if (inherits(x, "try-error")) {
       return(NULL)
     }
+    cop <- x
     if (length(x$param) > 1) {
+      x$tse_tmle <- sd(x$tmle, na.rm = TRUE)
+      x$tse_param <- sd(x$param, na.rm = TRUE)
       x$tmle_mse <- mse(x$tmle, x$truth)
       x$param_mse <- mse(x$param, x$truth)
-      x$param <- mean(x$param, na.rm = TRUE)
-      x$tmle <- mean(x$tmle, na.rm = TRUE)
+      x$coverage_tmle <- mean(purrr::map_lgl(x$tmle, ~ covered(.x, x$tse_tmle, x$truth)), na.rm = TRUE)
+      x$coverage_param <- mean(purrr::map_lgl(x$param, ~ covered(.x, x$tse_param, x$truth)), na.rm = TRUE)
     }
     data.table(truth = x$truth, pos = x$pos, 
                vn1 = x$vn1, vn0 = x$vn0, vn_cate = x$vn_cate,
-               param = x$param, tmle = x$tmle, 
-               tmle_mse = x$tmle_mse, param_mse = x$param_mse)
+               param = mean(x$param, na.rm = TRUE), tmle = mean(x$tmle, na.rm = TRUE), 
+               tmle_mse = x$tmle_mse, param_mse = x$param_mse, 
+               tmle_coverage = x$coverage_tmle, 
+               param_coverage = x$coverage_param, 
+               tmle_estimates = list(x$tmle), 
+               param_estimates = list(x$param))
   })
   setDT(out)
   out[, `:=`(tmle_bias = tmle - truth, 
              param_bias = param - truth)][]
 }
 
+covered <- function(theta, se, truth) {
+  z <- qnorm(0.975)
+  (theta - z * se < truth) && (truth < theta + z * se)
+}
+
 mse <- function(x_i, x_bar) {
   mean((x_i - x_bar)^2)
 }

diff --git a/scripts/extract.R b/scripts/extract.R
@@ -0,0 +1,29 @@
+# Nick Williams
+# Research Biostatistician 
+# Department of Population Health Sciences 
+# Weill Cornell Medicine
+
+.libPaths("/home/niw4001/R_local")
+
+setwd("/home/niw4001/doesNPMatter")
+
+devtools::load_all()
+
+args <- commandArgs(trailingOnly = TRUE)
+
+size <- args[1]
+context <- args[2]
+binary_cnf <- as.numeric(args[3])
+cont_cnf <- as.numeric(args[4])
+randomized <- as.logical(args[5])
+parametric <- as.logical(args[6])
+crossfit <- as.logical(args[7])
+
+regex <- glue::glue("{size}_[[:digit:]]+_{binary_cnf}_{cont_cnf}_{randomized}_{parametric}_{crossfit}*")
+op <- glue::glue("{context}_{size}_{binary_cnf}_{cont_cnf}_{randomized}_{parametric}_{crossfit}.rds")
+
+out <- read_results(context, regex)
+
+saveRDS(out, paste0("/home/niw4001/doesNPMatter/data/res/", op))
+
+quit("no")
diff --git a/scripts/nbinary-results.R b/scripts/nbinary-results.R
@@ -19,15 +19,18 @@ f1op4_0 <- readRDS("./data/res/binary_finite1_obserParam_4_0.rds")
 f2op4_0 <- readRDS("./data/res/binary_finite2_obserParam_4_0.rds")
 f3op4_0 <- readRDS("./data/res/binary_finite3_obserParam_4_0.rds")
 
-ao4_0 <- merge(ao4_0, aop4_0[, .(truth, tmlep_bias = tmle_bias)], by = "truth")
-f1o4_0 <- merge(f1o4_0, f1op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
-f2o4_0 <- merge(f2o4_0, f2op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
-f3o4_0 <- merge(f3o4_0, f3op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+ao4_0 <- merge(ao4_0, aop4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias)], by = "truth")
+f1o4_0 <- merge(f1o4_0, f1op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+f2o4_0 <- merge(f2o4_0, f2op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+f3o4_0 <- merge(f3o4_0, f3op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
 
 o4_0 <- rbindlist(list(`100000` = ao4_0, `2500` = f1o4_0, 
                        `500` = f2o4_0, `250` = f3o4_0), 
                   fill = TRUE, id = "n")
 o4_0[, n := as.numeric(n)]
+o4_0[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth)), 
+            rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth)), 
+            rel_bias_param = abs(param - truth) / max(abs(param), abs(truth)))]
 
 ao0_100 <- readRDS("./data/res/binary_asymp_obser_0_100.rds")
 f1o0_100 <- readRDS("./data/res/binary_finite1_obser_0_100.rds")
@@ -39,23 +42,26 @@ f1op0_100 <- readRDS("./data/res/binary_finite1_obserParam_0_100.rds")
 f2op0_100 <- readRDS("./data/res/binary_finite2_obserParam_0_100.rds")
 f3op0_100 <- readRDS("./data/res/binary_finite3_obserParam_0_100.rds")
 
-ao0_100 <- merge(ao0_100, aop0_100[, .(truth, tmlep_bias = tmle_bias)], by = "truth")
-f1o0_100 <- merge(f1o0_100, f1op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
-f2o0_100 <- merge(f2o0_100, f2op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
-f3o0_100 <- merge(f3o0_100, f3op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+ao0_100 <- merge(ao0_100, aop0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias)], by = "truth")
+f1o0_100 <- merge(f1o0_100, f1op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+f2o0_100 <- merge(f2o0_100, f2op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
+f3o0_100 <- merge(f3o0_100, f3op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
 
 o0_100 <- rbindlist(list(`100000` = ao0_100, `2500` = f1o0_100, 
                          `500` = f2o0_100, `250` = f3o0_100), 
                     fill = TRUE, id = "n")
 o0_100[, n := as.numeric(n)]
+o0_100[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth)), 
+            rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth)), 
+            rel_bias_param = abs(param - truth) / max(abs(param), abs(truth)))]
 
 # RCT
 ar4_0 <- readRDS("./data/res/binary_asymp_random_4_0.rds")
-f1r4_0 <- readRDS("./data/res/binary_finite1_random_4_0.rds")
+# f1r4_0 <- readRDS("./data/res/binary_finite1_random_4_0.rds")
 f2r4_0 <- readRDS("./data/res/binary_finite2_random_4_0.rds")
 f3r4_0 <- readRDS("./data/res/binary_finite3_random_4_0.rds")
 
-r4_0 <- rbindlist(list(`100000` = ar4_0, `2500` = f1r4_0, 
+r4_0 <- rbindlist(list(`100000` = ar4_0, # `2500` = f1r4_0, 
                        `500` = f2r4_0, `250` = f3r4_0), 
                   fill = TRUE, id = "n")
 r4_0[, n := as.numeric(n)]
@@ -70,6 +76,165 @@ r0_100 <- rbindlist(list(`100000` = ar0_100, `2500` = f1r0_100,
                     fill = TRUE, id = "n")
 r0_100[, n := as.numeric(n)]
 
+# plots -------------------------------------------------------------------
+
+ggplot(rbindlist(list("Scenario 1" = o0_100, "Scenario 2" = o4_0), 
+                 fill = TRUE, id = "scen")) +
+  geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(n), 
+             rows = vars(scen)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/bias_binary_obs.png", dpi = 600, width = 14, height = 5.5)
+
+
+ggplot(rbindlist(
+  list("Scenario 1" = o0_100, "Scenario 2" = o4_0), 
+  fill = TRUE, id = "scen"
+)[n != 1e5
+  ][, `:=`(tmle_mse_scaled = tmle_mse * n, 
+           tmlep_mse_scaled = tmlep_mse * n, 
+           param_mse_scaled = param_mse * n)]) +
+  geom_line(aes(x = abs(tmle_mse_scaled), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(param_mse_scaled), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(tmlep_mse_scaled), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 5)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(n), 
+             rows = vars(scen)) + 
+  labs(x = expression(MSE %*% N),
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/mse_binary_obs.png", dpi = 600, width = 14, height = 5.5)
+
+o0_100[, vn_cate_quart := 
+         cut(vn_cate, 
+             breaks = quantile(vn_cate, probs = seq(0, 1, by = 0.25), na.rm = TRUE), 
+                                 include.lowest = TRUE, 
+             labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]
+
+ggplot(o0_100[!is.na(vn_cate_quart)]) +
+  geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(vn_cate_quart),
+             rows = vars(n)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/bias_binary_vn_cate_obs.png", dpi = 600, width = 14, height = 10)
+
+o0_100[, vn_max_trt := pmax(vn1, vn0, na.rm = TRUE)
+       ][, vn_max_trt_quart := 
+           cut(vn_max_trt, 
+               breaks = quantile(vn_max_trt, probs = seq(0, 1, by = 0.25), na.rm = TRUE), 
+               include.lowest = TRUE, 
+               labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]
+
+ggplot(o0_100[!is.na(vn_max_trt_quart)]) +
+  geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(vn_max_trt_quart), 
+             rows = vars(n)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/bias_binary_max_vn_trt_obs.png", dpi = 600, width = 14, height = 10)
+
+o0_100[, pos_quart := 
+          cut(pos, 
+              breaks = quantile(
+                pos, probs = seq(0, 1, by = 0.25), na.rm = TRUE
+                ), 
+              include.lowest = TRUE, 
+              labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]
+
+o4_0[, pos_quart := 
+          cut(pos, 
+              breaks = quantile(
+                pos, probs = seq(0, 1, by = 0.25), na.rm = TRUE
+              ), 
+              include.lowest = TRUE, 
+              labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]
+
+ggplot(o0_100[!is.na(pos_quart)]) +
+  geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(pos_quart), 
+             rows = vars(n)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/bias_binary_pos_scen1_obs.png", dpi = 600, width = 14, height = 5.5)
+
+ggplot(o4_0[!is.na(pos_quart)]) +
+  geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(pos_quart), 
+             rows = vars(n)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
+                       guide = "legend")
+
+ggsave("./plots/bias_binary_pos_scen2_obs.png", dpi = 600, width = 14, height = 5.5)
+
 # absolute bias -----------------------------------------------------------
 
 # observational

diff --git a/scripts/test2.R b/scripts/test2.R
@@ -0,0 +1,69 @@
+# Nick Williams
+# Research Biostatistician 
+# Department of Population Health Sciences 
+# Weill Cornell Medicine
+
+box::use(data.table[...], ggplot2[...])
+
+ao10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_FALSE_FALSE.rds")
+f1o10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_FALSE_FALSE.rds")
+f2o10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_FALSE_FALSE.rds")
+f3o10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_FALSE_FALSE.rds")
+
+aop10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_TRUE_FALSE.rds")
+f1op10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_TRUE_FALSE.rds")
+f2op10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_TRUE_FALSE.rds")
+f3op10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_TRUE_FALSE.rds")
+
+aoc10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_FALSE_TRUE.rds")
+f1oc10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_FALSE_TRUE.rds")
+f2oc10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_FALSE_TRUE.rds")
+f3oc10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_FALSE_TRUE.rds")
+
+for (dt in list(aop10_0, f1op10_0, f2op10_0, f3op10_0)) {
+  setnames(dt, c("tmle", "tmle_bias", "tmle_mse", "tmle_estimates"), 
+           c("tmlep", "tmlep_bias", "tmlep_mse", "tmlep_estimates"), 
+           skip_absent = TRUE)
+}
+
+for (dt in list(aoc10_0, f1oc10_0, f2oc10_0, f3oc10_0)) {
+  setnames(dt, c("tmle", "tmle_bias", "tmle_mse", "tmle_estimates"), 
+           c("tmlec", "tmlec_bias", "tmlec_mse", "tmlec_estimates"), 
+           skip_absent = TRUE)
+}
+
+ao10_0 <- merge(ao10_0, aop10_0[, .(truth, tmlep, tmlep_bias)], by = "truth", all = TRUE)
+f1o10_0 <- merge(f1o10_0, f1op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
+f1o10_0 <- merge(f1o10_0, f1oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)
+f2o10_0 <- merge(f2o10_0, f2op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
+f2o10_0 <- merge(f2o10_0, f2oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)
+f3o10_0 <- merge(f3o10_0, f3op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
+f3o10_0 <- merge(f3o10_0, f3oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)
+
+o10_0 <- rbindlist(list(`100000` = ao10_0, `2500` = f1o10_0, `500` = f2o10_0, `250` = f3o10_0), 
+                   fill = TRUE, id = "n")
+
+o10_0[, n := as.numeric(n)]
+o10_0[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth), na.rm = TRUE), 
+             rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth), na.rm = TRUE), 
+             rel_bias_param = abs(param - truth) / max(abs(param), abs(truth), na.rm = TRUE), 
+             rel_bias_tmlec = abs(tmlec - truth) / max(abs(tmlec), abs(truth), na.rm = TRUE))]
+
+ggplot(o10_0) +
+  geom_line(aes(x = abs(tmle_bias), y = 1 - ..y.., color = "blue"), stat = 'ecdf', 
+            alpha = 0.5) +
+  geom_line(aes(x = abs(param_bias), y = 1 - ..y.., color = "red"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(tmlep_bias), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf', 
+            alpha = 0.5) + 
+  geom_line(aes(x = abs(tmlec_bias), y = 1 - ..y.., color = "orange"), stat = 'ecdf', 
+            alpha = 0.5) +
+  scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) + 
+  scale_y_continuous(expand = c(0.01, 0.01)) + 
+  facet_grid(cols = vars(n)) + 
+  labs(x = "|est - true| / max(|est|, |true|)",
+       y = NULL, 
+       color = NULL) + 
+  scale_color_identity(breaks = c("blue", "red", "darkgreen", "orange"),
+                       labels = c("TMLE+DA", "G-comp.", "TMLE+GLM", "CV-TMLE+DA"),
+                       guide = "legend")