In [1]:
library(RPostgreSQL)
library(tableone)
library(tidyverse)

Loading required package: DBI
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.3.4     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
data_dir <- file.path("..", "data")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
subgroup <- dbGetQuery(con, "select * from subgroup;")
head(subgroup)

icustay_id,hadm_id,echo,norepinephrine_max,dobutamine_flag,vasofreeday28,ventfreeday28,lactate_reduction,creatinine_reduction,sofa,sofa_drop_2,sofa_drop_3
200325,161851,1,0.0,0,,21.95833,1.4,0.4,1,-5,1
200339,150324,0,0.0,0,,21.1,0.0,0.3,1,1,0
200820,177351,0,0.0,0,27.94792,,1.4,0.2,5,3,3
201528,163480,1,0.0,0,27.95556,12.10417,0.3,0.1,4,2,4
201598,179631,0,0.3919988,0,27.59028,,0.0,0.1,7,4,7
201741,123870,1,0.0,0,0.0,0.0,,0.3,4,-1,0


In [5]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [6]:
ids <- subgroup %>% select(icustay_id, echo)
data.table::fwrite(ids, "ids.csv")

In [7]:
# features <- names(subgroup) %>%
#     discard(~ .x %in% c("hadm_id", "icustay_id", "echo"))
features <- c("ventfreeday28", "vasofreeday28", "dobutamine_flag", "sofa_drop_2", "sofa_drop_3",
              "norepinephrine_max", "lactate_reduction", "creatinine_reduction")
features

In [8]:
tab <- CreateTableOne(vars = features, strata = "echo",
                      factorVars = "dobutamine_flag",
                      data = subgroup, test = TRUE, testNormal = oneway.test)
tab

                                  Stratified by echo
                                   0             1             p      test
  n                                 3099          3063                    
  ventfreeday28 (mean (sd))        13.47 (14.73) 14.72 (27.21)  0.072     
  vasofreeday28 (mean (sd))        10.99 (13.83) 15.41 (17.26) <0.001     
  dobutamine_flag = 1 (%)             23 (0.7)     126 (4.1)   <0.001     
  sofa_drop_2 (mean (sd))           1.22 (4.34)   0.89 (3.43)   0.001     
  sofa_drop_3 (mean (sd))           2.29 (4.49)   1.54 (4.09)  <0.001     
  norepinephrine_max (mean (sd))    0.81 (2.39)   1.76 (5.77)  <0.001     
  lactate_reduction (mean (sd))     0.84 (1.75)   1.35 (2.26)  <0.001     
  creatinine_reduction (mean (sd))  0.37 (0.75)   0.79 (2.78)  <0.001     

In [9]:
tb <- features %>%
data.frame(covariate = ., stringsAsFactors = FALSE) %>%
rowwise() %>%
mutate(mean_0 = mean(subgroup[subgroup$echo == 0, covariate], na.rm = TRUE)) %>%
mutate(mean_1 = mean(subgroup[subgroup$echo == 1, covariate], na.rm = TRUE)) %>%
mutate(sd_0 = sd(subgroup[subgroup$echo == 0, covariate], na.rm = TRUE)) %>%
mutate(sd_1 = sd(subgroup[subgroup$echo == 1, covariate], na.rm = TRUE)) %>%
mutate(f_0 = mean_0 - sd_0 / sqrt(length(na.omit(subgroup[subgroup$echo == 0, covariate]))) * qnorm(.975)) %>%
mutate(f_1 = mean_1 - sd_1 / sqrt(length(na.omit(subgroup[subgroup$echo == 1, covariate]))) * qnorm(.975)) %>%
mutate(t_0 = mean_0 + sd_0 / sqrt(length(na.omit(subgroup[subgroup$echo == 0, covariate]))) * qnorm(.975)) %>%
mutate(t_1 = mean_1 + sd_1 / sqrt(length(na.omit(subgroup[subgroup$echo == 1, covariate]))) * qnorm(.975)) %>%
mutate(fstq_0 = quantile(subgroup[subgroup$echo == 0, covariate], .025, na.rm = TRUE, names = FALSE)) %>%
mutate(fstq_1 = quantile(subgroup[subgroup$echo == 1, covariate], .025, na.rm = TRUE, names = FALSE)) %>%
mutate(thrq_0 = quantile(subgroup[subgroup$echo == 0, covariate], .975, na.rm = TRUE, names = FALSE)) %>%
mutate(thrq_1 = quantile(subgroup[subgroup$echo == 1, covariate], .975, na.rm = TRUE, names = FALSE))

In [10]:
cis <- features %>%
map(function(x) {
    tt = t.test(subgroup[subgroup$echo == 1, x],
                subgroup[subgroup$echo == 0, x])
    dif <- tt$estimate[1] - tt$estimate[2]
    ci <- dif - tt$conf.int[1]
    ci2 <- tt$conf.int[2] - dif
    p.value <- tt$p.value
    data.frame(covariate = x, dif = dif, ci = ci, ci2 = ci2, p.value = p.value,
               stringsAsFactors = FALSE)
}) %>%
data.table::rbindlist() %>%
as.data.frame

In [11]:
tab <- tb %>%
inner_join(cis, by = "covariate") %>%
mutate(non_echo = sprintf("%.2f (+/- %.2f)", mean_0, sd_0)) %>%
mutate(echo = sprintf("%.2f (+/- %.2f)", mean_1, sd_1)) %>%
mutate(non_echo_ci = sprintf("%.2f - %.2f", f_0, t_0)) %>%
mutate(echo_ci = sprintf("%.2f - %.2f", f_1, t_1)) %>%
mutate(effect_size = sprintf("%.2f (+/- %.2f)", dif, ci)) %>%
mutate(p.value = sprintf("%.4f", p.value)) %>%
select(covariate, non_echo, non_echo_ci, echo, echo_ci, effect_size, p.value)
tab

covariate,non_echo,non_echo_ci,echo,echo_ci,effect_size,p.value
ventfreeday28,13.47 (+/- 14.73),12.81 - 14.14,14.72 (+/- 27.21),13.61 - 15.84,1.25 (+/- 1.30),0.0587
vasofreeday28,10.99 (+/- 13.83),10.27 - 11.71,15.41 (+/- 17.26),14.63 - 16.19,4.42 (+/- 1.06),0.0
dobutamine_flag,0.01 (+/- 0.09),0.00 - 0.01,0.04 (+/- 0.20),0.03 - 0.05,0.03 (+/- 0.01),0.0
sofa_drop_2,1.22 (+/- 4.34),1.07 - 1.38,0.89 (+/- 3.43),0.77 - 1.02,-0.33 (+/- 0.20),0.001
sofa_drop_3,2.29 (+/- 4.49),2.13 - 2.45,1.54 (+/- 4.09),1.40 - 1.69,-0.75 (+/- 0.21),0.0
norepinephrine_max,0.81 (+/- 2.39),0.73 - 0.90,1.76 (+/- 5.77),1.56 - 1.97,0.95 (+/- 0.22),0.0
lactate_reduction,0.84 (+/- 1.75),0.77 - 0.92,1.35 (+/- 2.26),1.26 - 1.44,0.51 (+/- 0.12),0.0
creatinine_reduction,0.37 (+/- 0.75),0.34 - 0.39,0.79 (+/- 2.78),0.69 - 0.89,0.42 (+/- 0.10),0.0


In [12]:
data.table::fwrite(tab, file.path(data_dir, "subgroup_tableone.csv"))