Skip to content

Commit

Permalink
updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
nt-williams committed Apr 7, 2021
1 parent a24b930 commit b2b5582
Show file tree
Hide file tree
Showing 4 changed files with 290 additions and 15 deletions.
22 changes: 17 additions & 5 deletions R/results.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,38 @@
read_results <- function(context, regex) {
files <- find_files(context, regex)
out <- purrr::map(files, function(file) {
out <- purrr::map_dfr(files, function(file) {
x <- readRDS(file)
if (inherits(x, "try-error")) {
return(NULL)
}
cop <- x
if (length(x$param) > 1) {
x$tse_tmle <- sd(x$tmle, na.rm = TRUE)
x$tse_param <- sd(x$param, na.rm = TRUE)
x$tmle_mse <- mse(x$tmle, x$truth)
x$param_mse <- mse(x$param, x$truth)
x$param <- mean(x$param, na.rm = TRUE)
x$tmle <- mean(x$tmle, na.rm = TRUE)
x$coverage_tmle <- mean(purrr::map_lgl(x$tmle, ~ covered(.x, x$tse_tmle, x$truth)), na.rm = TRUE)
x$coverage_param <- mean(purrr::map_lgl(x$param, ~ covered(.x, x$tse_param, x$truth)), na.rm = TRUE)
}
data.table(truth = x$truth, pos = x$pos,
vn1 = x$vn1, vn0 = x$vn0, vn_cate = x$vn_cate,
param = x$param, tmle = x$tmle,
tmle_mse = x$tmle_mse, param_mse = x$param_mse)
param = mean(x$param, na.rm = TRUE), tmle = mean(x$tmle, na.rm = TRUE),
tmle_mse = x$tmle_mse, param_mse = x$param_mse,
tmle_coverage = x$coverage_tmle,
param_coverage = x$coverage_param,
tmle_estimates = list(x$tmle),
param_estimates = list(x$param))
})
setDT(out)
out[, `:=`(tmle_bias = tmle - truth,
param_bias = param - truth)][]
}

covered <- function(theta, se, truth) {
z <- qnorm(0.975)
(theta - z * se < truth) && (truth < theta + z * se)
}

mse <- function(x_i, x_bar) {
mean((x_i - x_bar)^2)
}
Expand Down
29 changes: 29 additions & 0 deletions scripts/extract.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Nick Williams
# Research Biostatistician
# Department of Population Health Sciences
# Weill Cornell Medicine

.libPaths("/home/niw4001/R_local")

setwd("/home/niw4001/doesNPMatter")

devtools::load_all()

args <- commandArgs(trailingOnly = TRUE)

size <- args[1]
context <- args[2]
binary_cnf <- as.numeric(args[3])
cont_cnf <- as.numeric(args[4])
randomized <- as.logical(args[5])
parametric <- as.logical(args[6])
crossfit <- as.logical(args[7])

regex <- glue::glue("{size}_[[:digit:]]+_{binary_cnf}_{cont_cnf}_{randomized}_{parametric}_{crossfit}*")
op <- glue::glue("{context}_{size}_{binary_cnf}_{cont_cnf}_{randomized}_{parametric}_{crossfit}.rds")

out <- read_results(context, regex)

saveRDS(out, paste0("/home/niw4001/doesNPMatter/data/res/", op))

quit("no")
185 changes: 175 additions & 10 deletions scripts/nbinary-results.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@ f1op4_0 <- readRDS("./data/res/binary_finite1_obserParam_4_0.rds")
f2op4_0 <- readRDS("./data/res/binary_finite2_obserParam_4_0.rds")
f3op4_0 <- readRDS("./data/res/binary_finite3_obserParam_4_0.rds")

ao4_0 <- merge(ao4_0, aop4_0[, .(truth, tmlep_bias = tmle_bias)], by = "truth")
f1o4_0 <- merge(f1o4_0, f1op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f2o4_0 <- merge(f2o4_0, f2op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f3o4_0 <- merge(f3o4_0, f3op4_0[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
ao4_0 <- merge(ao4_0, aop4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias)], by = "truth")
f1o4_0 <- merge(f1o4_0, f1op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f2o4_0 <- merge(f2o4_0, f2op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f3o4_0 <- merge(f3o4_0, f3op4_0[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")

o4_0 <- rbindlist(list(`100000` = ao4_0, `2500` = f1o4_0,
`500` = f2o4_0, `250` = f3o4_0),
fill = TRUE, id = "n")
o4_0[, n := as.numeric(n)]
o4_0[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth)),
rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth)),
rel_bias_param = abs(param - truth) / max(abs(param), abs(truth)))]

ao0_100 <- readRDS("./data/res/binary_asymp_obser_0_100.rds")
f1o0_100 <- readRDS("./data/res/binary_finite1_obser_0_100.rds")
Expand All @@ -39,23 +42,26 @@ f1op0_100 <- readRDS("./data/res/binary_finite1_obserParam_0_100.rds")
f2op0_100 <- readRDS("./data/res/binary_finite2_obserParam_0_100.rds")
f3op0_100 <- readRDS("./data/res/binary_finite3_obserParam_0_100.rds")

ao0_100 <- merge(ao0_100, aop0_100[, .(truth, tmlep_bias = tmle_bias)], by = "truth")
f1o0_100 <- merge(f1o0_100, f1op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f2o0_100 <- merge(f2o0_100, f2op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f3o0_100 <- merge(f3o0_100, f3op0_100[, .(truth, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
ao0_100 <- merge(ao0_100, aop0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias)], by = "truth")
f1o0_100 <- merge(f1o0_100, f1op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f2o0_100 <- merge(f2o0_100, f2op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")
f3o0_100 <- merge(f3o0_100, f3op0_100[, .(truth, tmlep = tmle, tmlep_bias = tmle_bias, tmlep_mse = tmle_mse)], by = "truth")

o0_100 <- rbindlist(list(`100000` = ao0_100, `2500` = f1o0_100,
`500` = f2o0_100, `250` = f3o0_100),
fill = TRUE, id = "n")
o0_100[, n := as.numeric(n)]
o0_100[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth)),
rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth)),
rel_bias_param = abs(param - truth) / max(abs(param), abs(truth)))]

# RCT
ar4_0 <- readRDS("./data/res/binary_asymp_random_4_0.rds")
f1r4_0 <- readRDS("./data/res/binary_finite1_random_4_0.rds")
# f1r4_0 <- readRDS("./data/res/binary_finite1_random_4_0.rds")
f2r4_0 <- readRDS("./data/res/binary_finite2_random_4_0.rds")
f3r4_0 <- readRDS("./data/res/binary_finite3_random_4_0.rds")

r4_0 <- rbindlist(list(`100000` = ar4_0, `2500` = f1r4_0,
r4_0 <- rbindlist(list(`100000` = ar4_0, # `2500` = f1r4_0,
`500` = f2r4_0, `250` = f3r4_0),
fill = TRUE, id = "n")
r4_0[, n := as.numeric(n)]
Expand All @@ -70,6 +76,165 @@ r0_100 <- rbindlist(list(`100000` = ar0_100, `2500` = f1r0_100,
fill = TRUE, id = "n")
r0_100[, n := as.numeric(n)]

# plots -------------------------------------------------------------------

ggplot(rbindlist(list("Scenario 1" = o0_100, "Scenario 2" = o4_0),
fill = TRUE, id = "scen")) +
geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(n),
rows = vars(scen)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/bias_binary_obs.png", dpi = 600, width = 14, height = 5.5)


ggplot(rbindlist(
list("Scenario 1" = o0_100, "Scenario 2" = o4_0),
fill = TRUE, id = "scen"
)[n != 1e5
][, `:=`(tmle_mse_scaled = tmle_mse * n,
tmlep_mse_scaled = tmlep_mse * n,
param_mse_scaled = param_mse * n)]) +
geom_line(aes(x = abs(tmle_mse_scaled), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(param_mse_scaled), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(tmlep_mse_scaled), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 5)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(n),
rows = vars(scen)) +
labs(x = expression(MSE %*% N),
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/mse_binary_obs.png", dpi = 600, width = 14, height = 5.5)

o0_100[, vn_cate_quart :=
cut(vn_cate,
breaks = quantile(vn_cate, probs = seq(0, 1, by = 0.25), na.rm = TRUE),
include.lowest = TRUE,
labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]

ggplot(o0_100[!is.na(vn_cate_quart)]) +
geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(vn_cate_quart),
rows = vars(n)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/bias_binary_vn_cate_obs.png", dpi = 600, width = 14, height = 10)

o0_100[, vn_max_trt := pmax(vn1, vn0, na.rm = TRUE)
][, vn_max_trt_quart :=
cut(vn_max_trt,
breaks = quantile(vn_max_trt, probs = seq(0, 1, by = 0.25), na.rm = TRUE),
include.lowest = TRUE,
labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]

ggplot(o0_100[!is.na(vn_max_trt_quart)]) +
geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(vn_max_trt_quart),
rows = vars(n)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/bias_binary_max_vn_trt_obs.png", dpi = 600, width = 14, height = 10)

o0_100[, pos_quart :=
cut(pos,
breaks = quantile(
pos, probs = seq(0, 1, by = 0.25), na.rm = TRUE
),
include.lowest = TRUE,
labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]

o4_0[, pos_quart :=
cut(pos,
breaks = quantile(
pos, probs = seq(0, 1, by = 0.25), na.rm = TRUE
),
include.lowest = TRUE,
labels = c("1st Quartile", "2nd Quartile", "3rd Quartile", "4th Quartile"))]

ggplot(o0_100[!is.na(pos_quart)]) +
geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(pos_quart),
rows = vars(n)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/bias_binary_pos_scen1_obs.png", dpi = 600, width = 14, height = 5.5)

ggplot(o4_0[!is.na(pos_quart)]) +
geom_line(aes(x = abs(rel_bias_tmle), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_param), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(rel_bias_tmlep), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(pos_quart),
rows = vars(n)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM"),
guide = "legend")

ggsave("./plots/bias_binary_pos_scen2_obs.png", dpi = 600, width = 14, height = 5.5)

# absolute bias -----------------------------------------------------------

# observational
Expand Down
69 changes: 69 additions & 0 deletions scripts/test2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Nick Williams
# Research Biostatistician
# Department of Population Health Sciences
# Weill Cornell Medicine

box::use(data.table[...], ggplot2[...])

ao10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_FALSE_FALSE.rds")
f1o10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_FALSE_FALSE.rds")
f2o10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_FALSE_FALSE.rds")
f3o10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_FALSE_FALSE.rds")

aop10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_TRUE_FALSE.rds")
f1op10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_TRUE_FALSE.rds")
f2op10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_TRUE_FALSE.rds")
f3op10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_TRUE_FALSE.rds")

aoc10_0 <- readRDS("./data/res/binary_asymp_10_0_FALSE_FALSE_TRUE.rds")
f1oc10_0 <- readRDS("./data/res/binary_finite1_10_0_FALSE_FALSE_TRUE.rds")
f2oc10_0 <- readRDS("./data/res/binary_finite2_10_0_FALSE_FALSE_TRUE.rds")
f3oc10_0 <- readRDS("./data/res/binary_finite3_10_0_FALSE_FALSE_TRUE.rds")

for (dt in list(aop10_0, f1op10_0, f2op10_0, f3op10_0)) {
setnames(dt, c("tmle", "tmle_bias", "tmle_mse", "tmle_estimates"),
c("tmlep", "tmlep_bias", "tmlep_mse", "tmlep_estimates"),
skip_absent = TRUE)
}

for (dt in list(aoc10_0, f1oc10_0, f2oc10_0, f3oc10_0)) {
setnames(dt, c("tmle", "tmle_bias", "tmle_mse", "tmle_estimates"),
c("tmlec", "tmlec_bias", "tmlec_mse", "tmlec_estimates"),
skip_absent = TRUE)
}

ao10_0 <- merge(ao10_0, aop10_0[, .(truth, tmlep, tmlep_bias)], by = "truth", all = TRUE)
f1o10_0 <- merge(f1o10_0, f1op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
f1o10_0 <- merge(f1o10_0, f1oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)
f2o10_0 <- merge(f2o10_0, f2op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
f2o10_0 <- merge(f2o10_0, f2oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)
f3o10_0 <- merge(f3o10_0, f3op10_0[, .(truth, tmlep, tmlep_bias, tmlep_mse)], by = "truth", all = TRUE)
f3o10_0 <- merge(f3o10_0, f3oc10_0[, .(truth, tmlec, tmlec_bias, tmlec_mse)], by = "truth", all = TRUE)

o10_0 <- rbindlist(list(`100000` = ao10_0, `2500` = f1o10_0, `500` = f2o10_0, `250` = f3o10_0),
fill = TRUE, id = "n")

o10_0[, n := as.numeric(n)]
o10_0[, `:=`(rel_bias_tmle = abs(tmle - truth) / max(abs(tmle), abs(truth), na.rm = TRUE),
rel_bias_tmlep = abs(tmlep - truth) / max(abs(tmlep), abs(truth), na.rm = TRUE),
rel_bias_param = abs(param - truth) / max(abs(param), abs(truth), na.rm = TRUE),
rel_bias_tmlec = abs(tmlec - truth) / max(abs(tmlec), abs(truth), na.rm = TRUE))]

ggplot(o10_0) +
geom_line(aes(x = abs(tmle_bias), y = 1 - ..y.., color = "blue"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(param_bias), y = 1 - ..y.., color = "red"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(tmlep_bias), y = 1 - ..y.., color = "darkgreen"), stat = 'ecdf',
alpha = 0.5) +
geom_line(aes(x = abs(tmlec_bias), y = 1 - ..y.., color = "orange"), stat = 'ecdf',
alpha = 0.5) +
scale_x_continuous(expand = c(0.025, 0), limits = c(0, 0.075)) +
scale_y_continuous(expand = c(0.01, 0.01)) +
facet_grid(cols = vars(n)) +
labs(x = "|est - true| / max(|est|, |true|)",
y = NULL,
color = NULL) +
scale_color_identity(breaks = c("blue", "red", "darkgreen", "orange"),
labels = c("TMLE+DA", "G-comp.", "TMLE+GLM", "CV-TMLE+DA"),
guide = "legend")

0 comments on commit b2b5582

Please sign in to comment.