In [1]:
library(RPostgreSQL)
library(tableone)
library(tidyverse)

Loading required package: DBI
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.1     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
data_dir <- file.path("..", "data")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
sql <-
"
select hadm_id, icustay_id, echo,
    norepinephrine_max, dobutamine_flag,
    vasofreeday28, ventfreeday28,
    lactate_reduction, creatinine_reduction,
    sofa, sofa_drop_2, sofa_drop_3, day1, day2, day3,
    down2, down3,
    creatinine_diff,
    lactate_diff
from (select hadm_id, icustay_id, echo from merged_data) co
left join subgroup using (hadm_id,icustay_id,echo)
left join serum_diff using (hadm_id)
left join (select icustay_id, day1, day2, day3, down2, down3 from fluid) fl using (icustay_id)
"

In [5]:
subgroup <- dbGetQuery(con, "select * from subgroup left join fluid using (icustay_id) left join serum_diff using (hadm_id);")
subgroup <- dbGetQuery(con, sql)
head(subgroup)

hadm_id,icustay_id,echo,norepinephrine_max,dobutamine_flag,vasofreeday28,ventfreeday28,lactate_reduction,creatinine_reduction,sofa,sofa_drop_2,sofa_drop_3,day1,day2,day3,down2,down3,creatinine_diff,lactate_diff
161851,200325,1,0.0,0,28.0,21.95833,1.4,0.4,1,-5,1,466.1858,1797.677,1141.1389,-1331.4914,-674.953,,
150324,200339,0,0.0,0,28.0,21.1,0.0,0.3,1,1,0,4210.8575,2719.824,987.8543,1491.033,3223.0032,-0.1,0.0
177351,200820,0,0.0,0,27.94792,28.0,1.4,0.2,5,3,3,1473.0,1391.0,850.0,82.0,623.0,0.1,
163480,201528,1,0.0,0,27.95556,12.10417,0.3,0.1,4,2,4,1159.4792,7190.407,174.4947,-6030.9274,984.9844,,
179631,201598,0,0.3919988,0,27.59028,28.0,0.0,0.1,7,4,7,-210.9838,-835.0,-2065.0,624.0162,1854.0162,0.1,
123870,201741,1,0.0,0,0.0,0.0,,0.3,4,-1,0,887.0,2180.0,1180.0,-1293.0,-293.0,,


In [6]:
nrow(subgroup)

In [7]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [8]:
# features <- names(subgroup) %>%
#     discard(~ .x %in% c("hadm_id", "icustay_id", "echo"))
features <- c("ventfreeday28", "vasofreeday28", "dobutamine_flag", "day1", "day2", "day3",
              "sofa_drop_2", "sofa_drop_3",
              "norepinephrine_max", "lactate_diff", "creatinine_diff")
features

In [9]:
tab <- CreateTableOne(vars = features, strata = "echo",
                      factorVars = "dobutamine_flag",
                      data = subgroup, test = TRUE, testNormal = oneway.test)
tab

                                Stratified by echo
                                 0                 1                 p     
  n                                 3099              3063                 
  ventfreeday28 (mean (sd))        19.09 (13.53)     18.08 (24.22)    0.043
  vasofreeday28 (mean (sd))        20.25 (12.61)     20.30 (14.83)    0.888
  dobutamine_flag = 1 (%)             23 (0.7)         126 (4.1)     <0.001
  day1 (mean (sd))               1939.14 (3181.88) 2527.35 (3891.01) <0.001
  day2 (mean (sd))                835.26 (2429.91) 1294.66 (2964.32) <0.001
  day3 (mean (sd))                256.15 (2108.02)  687.19 (2623.07) <0.001
  sofa_drop_2 (mean (sd))           1.22 (4.34)       0.89 (3.43)     0.001
  sofa_drop_3 (mean (sd))           2.29 (4.49)       1.54 (4.09)    <0.001
  norepinephrine_max (mean (sd))    0.81 (2.39)       1.76 (5.77)    <0.001
  lactate_diff (mean (sd))          0.54 (2.44)       1.24 (2.50)     0.003
  creatinine_diff (mean (sd))       0

In [10]:
tb <- features %>%
data.frame(covariate = ., stringsAsFactors = FALSE) %>%
rowwise() %>%
mutate(mean_0 = mean(subgroup[subgroup$echo == 0, covariate], na.rm = TRUE)) %>%
mutate(mean_1 = mean(subgroup[subgroup$echo == 1, covariate], na.rm = TRUE)) %>%
mutate(sd_0 = sd(subgroup[subgroup$echo == 0, covariate], na.rm = TRUE)) %>%
mutate(sd_1 = sd(subgroup[subgroup$echo == 1, covariate], na.rm = TRUE)) %>%
mutate(f_0 = mean_0 - sd_0 / sqrt(length(na.omit(subgroup[subgroup$echo == 0, covariate]))) * qnorm(.975)) %>%
mutate(f_1 = mean_1 - sd_1 / sqrt(length(na.omit(subgroup[subgroup$echo == 1, covariate]))) * qnorm(.975)) %>%
mutate(t_0 = mean_0 + sd_0 / sqrt(length(na.omit(subgroup[subgroup$echo == 0, covariate]))) * qnorm(.975)) %>%
mutate(t_1 = mean_1 + sd_1 / sqrt(length(na.omit(subgroup[subgroup$echo == 1, covariate]))) * qnorm(.975)) %>%
mutate(fstq_0 = quantile(subgroup[subgroup$echo == 0, covariate], .025, na.rm = TRUE, names = FALSE)) %>%
mutate(fstq_1 = quantile(subgroup[subgroup$echo == 1, covariate], .025, na.rm = TRUE, names = FALSE)) %>%
mutate(thrq_0 = quantile(subgroup[subgroup$echo == 0, covariate], .975, na.rm = TRUE, names = FALSE)) %>%
mutate(thrq_1 = quantile(subgroup[subgroup$echo == 1, covariate], .975, na.rm = TRUE, names = FALSE))

In [11]:
cis <- features %>%
map(function(x) {
    tt = t.test(subgroup[subgroup$echo == 1, x],
                subgroup[subgroup$echo == 0, x])
    dif <- tt$estimate[1] - tt$estimate[2]
    ci <- dif - tt$conf.int[1]
    ci2 <- tt$conf.int[2] - dif
    p.value <- tt$p.value
    data.frame(covariate = x, dif = dif, ci = ci, ci2 = ci2, p.value = p.value,
               stringsAsFactors = FALSE)
}) %>%
data.table::rbindlist() %>%
as.data.frame

In [12]:
tab <- tb %>%
inner_join(cis, by = "covariate") %>%
mutate(non_echo = sprintf("%.2f (+/- %.2f)", mean_0, sd_0)) %>%
mutate(echo = sprintf("%.2f (+/- %.2f)", mean_1, sd_1)) %>%
mutate(non_echo_ci = sprintf("%.2f - %.2f", f_0, t_0)) %>%
mutate(echo_ci = sprintf("%.2f - %.2f", f_1, t_1)) %>%
mutate(effect_size = sprintf("%.2f (+/- %.2f)", dif, ci)) %>%
mutate(p.value = sprintf("%.4f", p.value)) %>%
select(covariate, non_echo, echo, effect_size, p.value)
tab

covariate,non_echo,echo,effect_size,p.value
ventfreeday28,19.09 (+/- 13.53),18.08 (+/- 24.22),-1.01 (+/- 0.98),0.0437
vasofreeday28,20.25 (+/- 12.61),20.30 (+/- 14.83),0.05 (+/- 0.69),0.8884
dobutamine_flag,0.01 (+/- 0.09),0.04 (+/- 0.20),0.03 (+/- 0.01),0.0
day1,1939.14 (+/- 3181.88),2527.35 (+/- 3891.01),588.21 (+/- 187.45),0.0
day2,835.26 (+/- 2429.91),1294.66 (+/- 2964.32),459.40 (+/- 149.72),0.0
day3,256.15 (+/- 2108.02),687.19 (+/- 2623.07),431.04 (+/- 146.75),0.0
sofa_drop_2,1.22 (+/- 4.34),0.89 (+/- 3.43),-0.33 (+/- 0.20),0.001
sofa_drop_3,2.29 (+/- 4.49),1.54 (+/- 4.09),-0.75 (+/- 0.21),0.0
norepinephrine_max,0.81 (+/- 2.39),1.76 (+/- 5.77),0.95 (+/- 0.22),0.0
lactate_diff,0.54 (+/- 2.44),1.24 (+/- 2.50),0.70 (+/- 0.46),0.0032


In [13]:
data.table::fwrite(tab, file.path(data_dir, "subgroup_tableone.csv"))