In [1]:
library(RPostgreSQL)
library(survey)
library(tidyverse)

Loading required package: DBI
Loading required package: grid
Loading required package: Matrix
Loading required package: survival

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.0     ✔ stringr 1.3.0
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
data_dir <- file.path("..", "data")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
sql <-
"select hadm_id, icustay_id, echo,
    norepinephrine_max, dobutamine_flag,
    vasofreeday28, ventfreeday28,
    lactate_reduction, creatinine_reduction,
    sofa, sofa_drop_2, sofa_drop_3, day1, day2, day3,
    down2, down3,
    creatinine_diff,
    lactate_diff
from (select hadm_id, icustay_id, echo from merged_data) co
left join subgroup using (hadm_id, icustay_id, echo)
left join serum_diff using (hadm_id)
left join (select icustay_id, day1, day2, day3, down2, down3 from fluid) fl using (icustay_id)"

In [5]:
subgroup <- dbGetQuery(con, sql)
head(subgroup)

hadm_id,icustay_id,echo,norepinephrine_max,dobutamine_flag,vasofreeday28,ventfreeday28,lactate_reduction,creatinine_reduction,sofa,sofa_drop_2,sofa_drop_3,day1,day2,day3,down2,down3,creatinine_diff,lactate_diff
159895,228416,1,0.0,0,28.0,7.581632,0.0,1.3,3,1,0,-2110.0,-1930.0,-1475.0,-180.0,-635.0,,
169700,278148,0,0.0,0,0.0,0.0,,0.1,8,-16,8,,,,,,,
103722,274174,1,0.0,0,28.0,28.0,0.0,2.0,5,3,2,-300.0,-1827.0,-10.0,1527.0,-290.0,,
170515,214954,1,7.977352,0,26.04167,24.854167,1.8,0.4,8,4,5,4299.59,2899.645,-1283.263,1399.945,5582.853,0.5,
183493,222457,0,0.0,0,28.0,28.0,,0.1,1,0,1,5988.072,2102.761,,3885.311,,,
134244,264413,1,2.748114,0,27.37778,24.25,0.5,0.1,4,2,3,1507.392,-1400.0,,2907.392,,,


In [6]:
outcomes <- c("ventfreeday28", "vasofreeday28", "dobutamine_flag", "day1", "day2", "day3",
              "sofa_drop_2", "sofa_drop_3",
              "norepinephrine_max", "lactate_diff", "creatinine_diff")
outcomes

In [7]:
subgroup <- subgroup %>%
select(!!! rlang::syms(c("icustay_id", outcomes)))

head(subgroup)

icustay_id,ventfreeday28,vasofreeday28,dobutamine_flag,day1,day2,day3,sofa_drop_2,sofa_drop_3,norepinephrine_max,lactate_diff,creatinine_diff
228416,7.581632,28.0,0,-2110.0,-1930.0,-1475.0,1,0,0.0,,
278148,0.0,0.0,0,,,,-16,8,0.0,,
274174,28.0,28.0,0,-300.0,-1827.0,-10.0,3,2,0.0,,
214954,24.854167,26.04167,0,4299.59,2899.645,-1283.263,4,5,7.977352,,0.5
222457,28.0,28.0,0,5988.072,2102.761,,0,1,0.0,,
264413,24.25,27.37778,0,1507.392,-1400.0,,2,3,2.748114,,


In [8]:
nrow(subgroup)

In [9]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [10]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds"))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_pco2_flag,lab_creatinine_flag,lab_potassium_flag,lab_troponin_flag,lab_po2_flag,lab_lactate_flag,echo_int,mort_28_day_int,ps,ps_weight
228416,159895,5491,MICU,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,1,1,1,0,1,1,1,0,0.5525232,1.809879
278148,169700,49395,MICU,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,0,1,1,0,0,0,0,1,0.2804613,1.389779
274174,103722,14855,MICU,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,1,1,1,0,1,1,1,0,0.301202,3.320031
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,1,1,1,1,1,1,1,0,0.7982833,1.252688
222457,183493,96815,MICU,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,0,1,1,0,0,0,0,0,0.2244447,1.289399
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,0,1,1,0,0,1,1,0,0.3320492,3.011602


In [11]:
full_data <- full_data %>%
left_join(subgroup, by = "icustay_id") %>%
mutate(dobutamine_flag = as.factor(dobutamine_flag))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,vasofreeday28,dobutamine_flag,day1,day2,day3,sofa_drop_2,sofa_drop_3,norepinephrine_max,lactate_diff,creatinine_diff
228416,159895,5491,MICU,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,28.0,0,-2110.0,-1930.0,-1475.0,1,0,0.0,,
278148,169700,49395,MICU,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,0.0,0,,,,-16,8,0.0,,
274174,103722,14855,MICU,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,28.0,0,-300.0,-1827.0,-10.0,3,2,0.0,,
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,26.04167,0,4299.59,2899.645,-1283.263,4,5,7.977352,,0.5
222457,183493,96815,MICU,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,28.0,0,5988.072,2102.761,,0,1,0.0,,
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,27.37778,0,1507.392,-1400.0,,2,3,2.748114,,


In [12]:
missing <- full_data %>%
select(outcomes) %>%
summarise_all(funs(sum(is.na(.)) / n())) %>%
gather("outcome", "na")

missing

outcome,na
ventfreeday28,0.0
vasofreeday28,0.0
dobutamine_flag,0.0
day1,0.1067442
day2,0.1888068
day3,0.3444427
sofa_drop_2,0.0
sofa_drop_3,0.0
norepinephrine_max,0.0
lactate_diff,0.8739192


In [13]:
features <- data.table::fread(file.path(data_dir, "features.csv"), data.table = FALSE) %>%
pull(feature)

features
length(features)

In [14]:
wtd_pval <- data.table::fread(file.path(data_dir, "wtd_pval.csv"), data.table = FALSE)
features_unbalanced <- wtd_pval %>%
    filter(pval < 0.05) %>%
    pull(cov)

features_unbalanced

In [15]:
ipw_svydesign <- svydesign(ids = ~ icustay_id, weights = ~ ps_weight, data = full_data)

In [16]:
all_cov <- outcomes %>%
map_dbl(function(outcome) {
    fml <- features %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("%s ~ %s", outcome, .)
    
    if (is.factor(full_data[[outcome]])) {
        mod <- svyglm(as.formula(fml), family = quasibinomial, design = ipw_svydesign)
    } else {
        mod <- svyglm(as.formula(fml), family = quasi, design = ipw_svydesign)
    }
    
    mod %>% summary %>% coefficients %>% .["echo1", 4]
}) %>%
data.frame(outcome = outcomes, pval = ., stringsAsFactors = FALSE)

all_cov

outcome,pval
ventfreeday28,0.1464278
vasofreeday28,0.0001394802
dobutamine_flag,0.001012378
day1,0.2712638
day2,0.01562755
day3,7.977669e-05
sofa_drop_2,0.002632704
sofa_drop_3,5.176941e-09
norepinephrine_max,2.367474e-05
lactate_diff,0.0006271623


In [17]:
unb_cov <- outcomes %>%
map_dbl(function(outcome) {
    fml <- features_unbalanced %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("%s ~ %s", outcome, .)
    
    if (is.factor(full_data[[outcome]])) {
        mod <- svyglm(as.formula(fml), family = quasibinomial, design = ipw_svydesign)
    } else {
        mod <- svyglm(as.formula(fml), family = quasi, design = ipw_svydesign)
    }
    
    mod %>% summary %>% coefficients %>% .["echo1", 4]
}) %>%
data.frame(outcome = outcomes, pval = ., stringsAsFactors = FALSE)

unb_cov

outcome,pval
ventfreeday28,0.4802945
vasofreeday28,0.004531889
dobutamine_flag,1.846086e-05
day1,0.1057933
day2,0.0001423886
day3,5.007807e-07
sofa_drop_2,4.953871e-06
sofa_drop_3,6.1838519999999995e-24
norepinephrine_max,2.852326e-08
lactate_diff,0.04344303


In [18]:
multivariate <- outcomes %>%
map_dbl(function(outcome) {
    fml <- features %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("%s ~ %s", outcome, .)
    
    if (is.factor(full_data[[outcome]])) {
        mod <- glm(as.formula(fml), family = binomial, data = full_data)
    } else {
        mod <- glm(as.formula(fml), family = gaussian, data = full_data)
    }
    
    mod %>% summary %>% coefficients %>% .["echo1", 4]
}) %>%
data.frame(outcome = outcomes, pval = ., stringsAsFactors = FALSE)

multivariate

outcome,pval
ventfreeday28,0.09651383
vasofreeday28,5.794427e-05
dobutamine_flag,0.001374514
day1,0.3067124
day2,0.007482473
day3,4.250007e-06
sofa_drop_2,4.659638e-05
sofa_drop_3,6.415538e-12
norepinephrine_max,0.00338541
lactate_diff,0.006727393


In [19]:
tb_raw <- (all_cov %>% rename(dbwa = pval)) %>%
full_join(unb_cov %>% rename(dbwu = pval), by = "outcome") %>%
full_join(multivariate %>% rename(multi = pval), by = "outcome") %>%
full_join(missing, by = "outcome")

tb_raw

outcome,dbwa,dbwu,multi,na
ventfreeday28,0.1464278,0.4802945,0.09651383,0.0
vasofreeday28,0.0001394802,0.004531889,5.794427e-05,0.0
dobutamine_flag,0.001012378,1.846086e-05,0.001374514,0.0
day1,0.2712638,0.1057933,0.3067124,0.1067442
day2,0.01562755,0.0001423886,0.007482473,0.1888068
day3,7.977669e-05,5.007807e-07,4.250007e-06,0.3444427
sofa_drop_2,0.002632704,4.953871e-06,4.659638e-05,0.0
sofa_drop_3,5.176941e-09,6.1838519999999995e-24,6.415538e-12,0.0
norepinephrine_max,2.367474e-05,2.852326e-08,0.00338541,0.0
lactate_diff,0.0006271623,0.04344303,0.006727393,0.8739192


In [20]:
tb_fmt <- tb_raw %>%
mutate_at(vars(dbwa, dbwu, multi), funs(case_when(
    . < 0.001 ~ "<0.001",
    TRUE ~ as.character(round(., 3))
))) %>%
rename(cov = outcome) %>%
mutate(cov = case_when(
    cov == "ventfreeday28" ~ "Ventilation free days in 28 days",
    cov == "vasofreeday28" ~ "Vasopressor free days in 28 days",
    cov == "dobutamine_flag" ~ "Dobutamine Use",
    cov == "day1" ~ "IV Fluid Day 1 (mL)",
    cov == "day2" ~ "IV Fluid Day 2 (mL)",
    cov == "day3" ~ "IV Fluid Day 3 (mL)",
    cov == "sofa_drop_2" ~ "SOFA Reduction Day 2",
    cov == "sofa_drop_3" ~ "SOFA Reduction Day 3",
    cov == "norepinephrine_max" ~ "Norepinephrine (maximum dosage mg/min)",
    cov == "lactate_diff" ~ "Serum Lactate Reduction",
    cov == "creatinine_diff" ~ "Serum Creatinine Reduction",
    TRUE ~ cov
)) %>%
mutate(na = round(na * 100, 3)) %>%
setNames(c("cov", "p value (Doubly Robust All Covariates)",
           "p value (Doubly Robust Unbalanced Covariates)",
           "p value (Multivariate)", "Missing data (%)"))

tb_fmt

cov,p value (Doubly Robust All Covariates),p value (Doubly Robust Unbalanced Covariates),p value (Multivariate),Missing data (%)
Ventilation free days in 28 days,0.146,0.48,0.097,0.0
Vasopressor free days in 28 days,<0.001,0.005,<0.001,0.0
Dobutamine Use,0.001,<0.001,0.001,0.0
IV Fluid Day 1 (mL),0.271,0.106,0.307,10.674
IV Fluid Day 2 (mL),0.016,<0.001,0.007,18.881
IV Fluid Day 3 (mL),<0.001,<0.001,<0.001,34.444
SOFA Reduction Day 2,0.003,<0.001,<0.001,0.0
SOFA Reduction Day 3,<0.001,<0.001,<0.001,0.0
Norepinephrine (maximum dosage mg/min),<0.001,<0.001,0.003,0.0
Serum Lactate Reduction,<0.001,0.043,0.007,87.392


In [21]:
subgroup_tb1 <- data.table::fread(file.path(data_dir, "subgroup.csv"), data.table = FALSE)
subgroup_tb1

cov,Non-Echo,Echo,p value,SMD,Non-Echo (weighted cohort),Echo (weighted cohort),p value (IPW),SMD (weighted cohort),Non-Echo (PS matching),Echo (PS matching),SMD (PS matching),p value (PS matching),Missing data
n,3099,3262,,,5739.48,5842.36,,,1626,1626,,,
Ventilation free days in 28 days,19.09 (13.53),18.02 (25.13),0.033,0.053,14.50 (16.25),19.67 (52.07),0.903,0.003,18.10 (14.70),19.60 (32.64),0.059,0.089,0%
Vasopressor free days in 28 days,20.25 (12.61),20.13 (14.91),0.741,0.008,18.24 (13.80),19.00 (16.66),0.047,0.054,19.31 (12.73),20.81 (16.95),0.1,0.004,0%
Dobutamine Use,0.7%,4.2%,<0.001,0.224,1.1%,3.4%,<0.001,0.155,1.2%,2.1%,0.067,0.077,0%
IV Fluid Day 1 (mL),1939.14 (3181.88),2456.26 (3853.10),<0.001,0.146,3370.89 (3737.35),3199.19 (4899.17),0.001,0.096,2112.35 (3372.21),2432.39 (3768.86),0.089,0.047,10.7%
IV Fluid Day 2 (mL),835.26 (2429.91),1258.22 (2933.92),<0.001,0.157,2285.64 (2698.15),1514.82 (4086.83),<0.001,0.132,900.41 (2557.54),1275.30 (2872.91),0.138,0.002,18.9%
IV Fluid Day 3 (mL),256.15 (2108.02),686.56 (2614.26),<0.001,0.181,1113.52 (2598.95),609.35 (2817.57),<0.001,0.179,253.25 (2147.55),771.78 (2683.65),0.213,<0.001,34.4%
SOFA Reduction Day 2,1.22 (4.34),0.89 (3.39),0.001,0.086,0.92 (2.84),0.98 (2.66),<0.001,0.128,1.31 (4.55),0.68 (3.26),0.159,<0.001,0%
SOFA Reduction Day 3,2.29 (4.49),1.49 (4.07),<0.001,0.186,1.18 (3.05),1.15 (3.11),<0.001,0.247,2.46 (4.76),1.15 (3.99),0.299,<0.001,0%
Norepinephrine (maximum dosage mg/min),0.81 (2.39),1.78 (5.69),<0.001,0.221,1.81 (3.29),2.43 (4.20),<0.001,0.154,1.04 (2.68),1.38 (3.13),0.117,<0.001,0%


In [22]:
subgroup_tb <- subgroup_tb1 %>%
full_join(tb_fmt, by = "cov") %>%
select(1, 2, 3, 5, 17, 6, 7, 9, 8, 15, 16, 10, 11, 12, 13, 14)

In [23]:
subgroup_tb

cov,Non-Echo,Echo,SMD,p value (Multivariate),Non-Echo (weighted cohort),Echo (weighted cohort),SMD (weighted cohort),p value (IPW),p value (Doubly Robust All Covariates),p value (Doubly Robust Unbalanced Covariates),Non-Echo (PS matching),Echo (PS matching),SMD (PS matching),p value (PS matching),Missing data
n,3099,3262,,,5739.48,5842.36,,,,,1626,1626,,,
Ventilation free days in 28 days,19.09 (13.53),18.02 (25.13),0.053,0.097,14.50 (16.25),19.67 (52.07),0.003,0.903,0.146,0.48,18.10 (14.70),19.60 (32.64),0.059,0.089,0%
Vasopressor free days in 28 days,20.25 (12.61),20.13 (14.91),0.008,<0.001,18.24 (13.80),19.00 (16.66),0.054,0.047,<0.001,0.005,19.31 (12.73),20.81 (16.95),0.1,0.004,0%
Dobutamine Use,0.7%,4.2%,0.224,0.001,1.1%,3.4%,0.155,<0.001,0.001,<0.001,1.2%,2.1%,0.067,0.077,0%
IV Fluid Day 1 (mL),1939.14 (3181.88),2456.26 (3853.10),0.146,0.307,3370.89 (3737.35),3199.19 (4899.17),0.096,0.001,0.271,0.106,2112.35 (3372.21),2432.39 (3768.86),0.089,0.047,10.7%
IV Fluid Day 2 (mL),835.26 (2429.91),1258.22 (2933.92),0.157,0.007,2285.64 (2698.15),1514.82 (4086.83),0.132,<0.001,0.016,<0.001,900.41 (2557.54),1275.30 (2872.91),0.138,0.002,18.9%
IV Fluid Day 3 (mL),256.15 (2108.02),686.56 (2614.26),0.181,<0.001,1113.52 (2598.95),609.35 (2817.57),0.179,<0.001,<0.001,<0.001,253.25 (2147.55),771.78 (2683.65),0.213,<0.001,34.4%
SOFA Reduction Day 2,1.22 (4.34),0.89 (3.39),0.086,<0.001,0.92 (2.84),0.98 (2.66),0.128,<0.001,0.003,<0.001,1.31 (4.55),0.68 (3.26),0.159,<0.001,0%
SOFA Reduction Day 3,2.29 (4.49),1.49 (4.07),0.186,<0.001,1.18 (3.05),1.15 (3.11),0.247,<0.001,<0.001,<0.001,2.46 (4.76),1.15 (3.99),0.299,<0.001,0%
Norepinephrine (maximum dosage mg/min),0.81 (2.39),1.78 (5.69),0.221,0.003,1.81 (3.29),2.43 (4.20),0.154,<0.001,<0.001,<0.001,1.04 (2.68),1.38 (3.13),0.117,<0.001,0%


In [24]:
data.table::fwrite(subgroup_tb, file.path(data_dir, "secondary.csv"))