In [1]:
library(RPostgreSQL)
library(tableone)
library(weights)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: Hmisc
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Loading required package: ggplot2

Attaching package: ‘Hmisc’

The following objects are masked from ‘package:base’:

    format.pval, units

Loading required package: gdata
gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

Attaching package: ‘gdata’

The following object is masked from ‘package:stats’:

    nobs

The following object is masked from ‘package:utils’:

    object.size

The following object is masked from ‘package:base’:

    startsWith

Loading required package: mice
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
#

In [2]:
data_dir <- file.path("..", "data")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
sql <-
"
select hadm_id, icustay_id, echo,
    norepinephrine_max, dobutamine_flag,
    vasofreeday28, ventfreeday28,
    lactate_reduction, creatinine_reduction,
    sofa, sofa_drop_2, sofa_drop_3, day1, day2, day3,
    down2, down3,
    creatinine_diff,
    lactate_diff
from (select hadm_id, icustay_id, echo from merged_data) co
left join subgroup using (hadm_id,icustay_id,echo)
left join serum_diff using (hadm_id)
left join (select icustay_id, day1, day2, day3, down2, down3 from fluid) fl using (icustay_id)
"

In [5]:
subgroup <- dbGetQuery(con, "select * from subgroup left join fluid using (icustay_id) left join serum_diff using (hadm_id);")
subgroup <- dbGetQuery(con, sql)
head(subgroup)

hadm_id,icustay_id,echo,norepinephrine_max,dobutamine_flag,vasofreeday28,ventfreeday28,lactate_reduction,creatinine_reduction,sofa,sofa_drop_2,sofa_drop_3,day1,day2,day3,down2,down3,creatinine_diff,lactate_diff
159895,228416,1,0.0,0,28.0,7.581632,0.0,1.3,3,1,0,-2110.0,-1930.0,-1475.0,-180.0,-635.0,,
169700,278148,0,0.0,0,0.0,0.0,,0.1,8,-16,8,,,,,,,
103722,274174,1,0.0,0,28.0,28.0,0.0,2.0,5,3,2,-300.0,-1827.0,-10.0,1527.0,-290.0,,
170515,214954,1,7.977352,0,26.04167,24.854167,1.8,0.4,8,4,5,4299.59,2899.645,-1283.263,1399.945,5582.853,0.5,
183493,222457,0,0.0,0,28.0,28.0,,0.1,1,0,1,5988.072,2102.761,,3885.311,,,
134244,264413,1,2.748114,0,27.37778,24.25,0.5,0.1,4,2,3,1507.392,-1400.0,,2907.392,,,


In [6]:
nrow(subgroup)

In [7]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [8]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds"))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_ph_flag,lab_ph_first,lab_ph_min,lab_ph_max,lab_ph_abnormal,sedative,echo_int,mort_28_day_int,ps,ps_weight
201220,125078,66690,0,2106-04-27 09:47:50,2106-05-01 19:25:46,1,62.67646,1,,⋯,1,7.45,7.42,7.45,1.0,1,0,1,0.4544669,1.833069
215842,151232,11663,0,2188-02-14 09:48:15,2188-02-16 03:02:48,1,86.76186,1,,⋯,0,,,,,0,0,0,0.1568838,1.186076
234312,164444,86645,1,2165-06-22 09:47:16,2165-07-07 21:55:20,1,56.08904,1,,⋯,1,7.49,7.49,7.49,1.0,0,0,0,0.3002571,1.429096
289157,146726,10304,0,2156-06-23 22:26:00,2156-06-30 17:26:00,1,45.91093,1,2156-06-25 08:00:00,⋯,1,7.44,7.44,7.44,1.0,0,1,0,0.6991412,1.430326
211964,160170,94534,0,2160-03-05 22:23:19,2160-03-07 06:48:41,1,59.38693,1,,⋯,0,,,,,0,0,0,0.3432285,1.5226
230173,112553,31544,0,2140-01-31 04:39:25,2140-02-03 02:41:39,1,91.5,1,,⋯,0,,,,,0,0,1,0.2464983,1.327137


In [9]:
subgroup <- subgroup %>%
left_join(full_data %>% select(icustay_id, ps_weight, ps), by = "icustay_id") %>%
mutate(dobutamine_flag = as.factor(dobutamine_flag))
head(subgroup)

hadm_id,icustay_id,echo,norepinephrine_max,dobutamine_flag,vasofreeday28,ventfreeday28,lactate_reduction,creatinine_reduction,sofa,⋯,sofa_drop_3,day1,day2,day3,down2,down3,creatinine_diff,lactate_diff,ps_weight,ps
159895,228416,1,0.0,0,28.0,7.581632,0.0,1.3,3,⋯,0,-2110.0,-1930.0,-1475.0,-180.0,-635.0,,,1.800733,0.5553295
169700,278148,0,0.0,0,0.0,0.0,,0.1,8,⋯,8,,,,,,,,1.443761,0.3073646
103722,274174,1,0.0,0,28.0,28.0,0.0,2.0,5,⋯,2,-300.0,-1827.0,-10.0,1527.0,-290.0,,,3.262187,0.3065429
170515,214954,1,7.977352,0,26.04167,24.854167,1.8,0.4,8,⋯,5,4299.59,2899.645,-1283.263,1399.945,5582.853,0.5,,1.24088,0.8058798
183493,222457,0,0.0,0,28.0,28.0,,0.1,1,⋯,1,5988.072,2102.761,,3885.311,,,,1.270479,0.2128954
134244,264413,1,2.748114,0,27.37778,24.25,0.5,0.1,4,⋯,3,1507.392,-1400.0,,2907.392,,,,3.148667,0.3175947


In [10]:
# features <- names(subgroup) %>%
#     discard(~ .x %in% c("hadm_id", "icustay_id", "echo"))
features <- c("ventfreeday28", "vasofreeday28", "dobutamine_flag", "day1", "day2", "day3",
              "sofa_drop_2", "sofa_drop_3",
              "norepinephrine_max", "lactate_diff", "creatinine_diff")
features

In [11]:
factor_vars <- "dobutamine_flag"
factor_vars

In [12]:
tab <- CreateTableOne(vars = features, strata = "echo",
                      factorVars = "dobutamine_flag",
                      data = subgroup, test = TRUE, testNormal = oneway.test)
tab

                                Stratified by echo
                                 0                 1                 p     
  n                                 3099              3063                 
  ventfreeday28 (mean (sd))        19.09 (13.53)     18.08 (24.22)    0.043
  vasofreeday28 (mean (sd))        20.25 (12.61)     20.30 (14.83)    0.888
  dobutamine_flag = 1 (%)             23 (0.7)         126 (4.1)     <0.001
  day1 (mean (sd))               1939.14 (3181.88) 2527.35 (3891.01) <0.001
  day2 (mean (sd))                835.26 (2429.91) 1294.66 (2964.32) <0.001
  day3 (mean (sd))                256.15 (2108.02)  687.19 (2623.07) <0.001
  sofa_drop_2 (mean (sd))           1.22 (4.34)       0.89 (3.43)     0.001
  sofa_drop_3 (mean (sd))           2.29 (4.49)       1.54 (4.09)    <0.001
  norepinephrine_max (mean (sd))    0.81 (2.39)       1.76 (5.77)    <0.001
  lactate_diff (mean (sd))          0.54 (2.44)       1.24 (2.50)     0.003
  creatinine_diff (mean (sd))       0

In [13]:
tbone <- features %>%
map(function(var) {
    x <- subgroup[[var]]
    sym_var <- rlang::sym(var)
    if (is.numeric(x)) {
        res <- subgroup %>%
            group_by(echo) %>%
            summarise(main = mean(!!sym_var, na.rm = TRUE),
                      sub = sd(!!sym_var, na.rm = TRUE),
                      wtd_main = wtd.mean(!!sym_var, ps_weight),
                      wtd_sub = sqrt(wtd.var(!!sym_var, ps_weight))) %>%
            cbind(
                subgroup %>%
                { split(.[[var]], .$echo) } %>%
                setNames(c("x", "y")) %>%
                do.call(t.test, .) %>%
                .$p.value %>%
                data.frame(p_val = .)
            ) %>%
            cbind(
                subgroup %>%
                { split(select(., !!sym_var, ps_weight), .$echo) } %>%
                setNames(c("fst", "sec")) %>%
                {
                    list(x = { pull(pluck(., "fst"), !!sym_var) },
                         y = { pull(pluck(., "sec"), !!sym_var) },
                         weight = { pull(pluck(., "fst"), ps_weight) },
                         weighty = { pull(pluck(., "sec"), ps_weight) },
                         samedata = FALSE)
                }
                %>%
                do.call(wtd.t.test, .) %>%
                .$coefficients %>%
                .["p.value"] %>%
                unname %>%
                data.frame(wtd_p_val = .)
            )
    }
    
    if (is.factor(x)) {
        if (length(levels(x)) == 2) {
            res <- subgroup %>%
                mutate(!!sym_var := as.integer(as.character(!!sym_var))) %>%
                group_by(echo) %>%
                summarise(main = mean(!!sym_var, na.rm = TRUE) * 100,
                          sub = NA,
                          wtd_main = wtd.mean(!!sym_var, ps_weight) * 100,
                          wtd_sub = NA) %>%
                cbind(
                    subgroup %>%
                    {
                        list(x = pull(., !!sym_var),
                             y = pull(., echo))
                    } %>%
                    do.call(chisq.test, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(p_val = .)
                ) %>%
                cbind(
                    subgroup %>%
                    {
                        list(var1 = pull(., !!sym_var),
                             var2 = pull(., echo),
                             weight = pull(., ps_weight))
                    } %>%
                    do.call(wtd.chi.sq, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(wtd_p_val = .)
                )
        } else return(NULL)
    }
    
    res %>%
    gather("key", "value", main, sub, wtd_main, wtd_sub) %>%
    unite("key", key, echo) %>%
    spread(key, value) %>%
    mutate(var = var) %>%
    return
}) %>%
discard(is.null) %>%
data.table::rbindlist() %>%
as.data.frame(stringsAsFactors = FALSE) %>%
mutate(raw_0 = ifelse(is.na(sub_0), sprintf("%.2f%%", main_0),
                      sprintf("%.2f (%.2f)", main_0, sub_0)),
       raw_1 = ifelse(is.na(sub_1), sprintf("%.2f%%", main_1),
                      sprintf("%.2f (%.2f)", main_1, sub_1)),
       wtd_raw_0 = ifelse(is.na(wtd_sub_0), sprintf("%.2f%%", wtd_main_0),
                          sprintf("%.2f (%.2f)", wtd_main_0, wtd_sub_0)),
       wtd_raw_1 = ifelse(is.na(wtd_sub_1), sprintf("%.2f%%", wtd_main_1),
                          sprintf("%.2f (%.2f)", wtd_main_1, wtd_sub_1)),
       p_val = ifelse(p_val < 0.001, "<0.001", as.character(round(p_val, 3))),
       wtd_p_val = ifelse(wtd_p_val < 0.001, "<0.001", as.character(round(wtd_p_val, 3)))) %>%
select(var, raw_1, raw_0, p_val, wtd_raw_1, wtd_raw_0, wtd_p_val) %>%
setNames(c("Covariate", "Echo", "Non-Echo", "p value",
           "Echo (weighted cohort)", "Non-Echo (weighted cohort)",
           "p value (weighted cohort)"))

tbone

Covariate,Echo,Non-Echo,p value,Echo (weighted cohort),Non-Echo (weighted cohort),p value (weighted cohort)
ventfreeday28,18.08 (24.22),19.09 (13.53),0.044,18.37 (23.79),18.43 (13.38),0.901
vasofreeday28,20.30 (14.83),20.25 (12.61),0.888,20.45 (14.70),19.67 (12.80),0.026
dobutamine_flag,4.11%,0.74%,<0.001,3.18%,1.10%,<0.001
day1,2527.35 (3891.01),1939.14 (3181.88),<0.001,2379.64 (3751.78),2032.59 (3267.94),<0.001
day2,1294.66 (2964.32),835.26 (2429.91),<0.001,1266.24 (2877.40),899.16 (2491.55),<0.001
day3,687.19 (2623.07),256.15 (2108.02),<0.001,714.09 (2588.89),301.67 (2138.92),<0.001
sofa_drop_2,0.89 (3.43),1.22 (4.34),<0.001,0.74 (3.36),1.25 (4.49),<0.001
sofa_drop_3,1.54 (4.09),2.29 (4.49),<0.001,1.35 (3.99),2.38 (4.59),<0.001
norepinephrine_max,1.76 (5.77),0.81 (2.39),<0.001,1.55 (5.21),0.96 (2.56),<0.001
lactate_diff,1.24 (2.50),0.54 (2.44),0.003,1.17 (2.31),0.55 (2.51),0.008


In [14]:
t.test(subgroup %>% filter(echo == 0) %>% pull(creatinine_diff), var.equal = TRUE,
subgroup %>% filter(echo == 1) %>% pull(creatinine_diff))


	Two Sample t-test

data:  subgroup %>% filter(echo == 0) %>% pull(creatinine_diff) and subgroup %>% filter(echo == 1) %>% pull(creatinine_diff)
t = -1.099, df = 1770, p-value = 0.2719
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1639749  0.0462049
sample estimates:
mean of x mean of y 
0.1114743 0.1703593 


In [15]:
tabs <- CreateTableOne(vars = features, factorVars = factor_vars, strata = "echo", data = subgroup,
                       argsNormal = list(var.equal = FALSE))
tabs_df <- tabs %>% print(smd = TRUE) %>% as.data.frame(stringsAsFactors = FALSE)

                                Stratified by echo
                                 0                 1                 p     
  n                                 3099              3063                 
  ventfreeday28 (mean (sd))        19.09 (13.53)     18.08 (24.22)    0.044
  vasofreeday28 (mean (sd))        20.25 (12.61)     20.30 (14.83)    0.888
  dobutamine_flag = 1 (%)             23 (0.7)         126 (4.1)     <0.001
  day1 (mean (sd))               1939.14 (3181.88) 2527.35 (3891.01) <0.001
  day2 (mean (sd))                835.26 (2429.91) 1294.66 (2964.32) <0.001
  day3 (mean (sd))                256.15 (2108.02)  687.19 (2623.07) <0.001
  sofa_drop_2 (mean (sd))           1.22 (4.34)       0.89 (3.43)     0.001
  sofa_drop_3 (mean (sd))           2.29 (4.49)       1.54 (4.09)    <0.001
  norepinephrine_max (mean (sd))    0.81 (2.39)       1.76 (5.77)    <0.001
  lactate_diff (mean (sd))          0.54 (2.44)       1.24 (2.50)     0.003
  creatinine_diff (mean (sd))       0

In [16]:
tabs_df_fmt <- tabs_df %>%
setNames(c("control", "treated", "pval", "test", "smd")) %>%
mutate(cov = rownames(.)) %>%
mutate(cov_name = str_split(cov, " ") %>% map_chr(first)) %>%
mutate(cate = cov_name %in% factor_vars) %>%
mutate(ctrl = ifelse(cate,
                     str_extract(control, "\\((.*?)\\)") %>% str_replace_all("\\(|\\)", "") %>% sprintf("%s%%", .),
                     control)) %>%
mutate(trtd = ifelse(cate,
                     str_extract(treated, "\\((.*?)\\)") %>% str_replace_all("\\(|\\)", "") %>% sprintf("%s%%", .),
                     treated)) %>%
# mutate(cov_fmt = str_replace_all(cov_name, "_", " ")) %>%
# mutate(cov_fmt = str_replace_all(cov_fmt, "lab|vs|flag|icd|first", "")) %>%
mutate_all(funs(str_replace_all(., "\\s+$|^\\s+", ""))) %>%
# mutate(cov_fmt = tools::toTitleCase(cov_fmt)) %>%
select(cov_name, ctrl, trtd, pval, smd) %>%
setNames(c("Covariate", "Non-Echo", "Echo", "p value", "SMD"))

In [17]:
tabs_df_fmt

Covariate,Non-Echo,Echo,p value,SMD
n,3099,3063,,
ventfreeday28,19.09 (13.53),18.08 (24.22),0.044,0.051
vasofreeday28,20.25 (12.61),20.30 (14.83),0.888,0.004
dobutamine_flag,0.7%,4.1%,<0.001,0.22
day1,1939.14 (3181.88),2527.35 (3891.01),<0.001,0.165
day2,835.26 (2429.91),1294.66 (2964.32),<0.001,0.17
day3,256.15 (2108.02),687.19 (2623.07),<0.001,0.181
sofa_drop_2,1.22 (4.34),0.89 (3.43),0.001,0.084
sofa_drop_3,2.29 (4.49),1.54 (4.09),<0.001,0.175
norepinephrine_max,0.81 (2.39),1.76 (5.77),<0.001,0.214


In [18]:
set.seed(4958)

In [19]:
ps_matches <- Match(Y = NULL, Tr = full_data$echo_int, X = full_data$ps, M = 1,
                    estimand = "ATT", caliper = 0.01,
                    exact = FALSE, replace = FALSE)

In [20]:
tab <- table(full_data$mort_28_day[ps_matches$index.treated],
             full_data$mort_28_day[ps_matches$index.control],
             dnn = c("Echo", "Control"))
tab

    Control
Echo   0   1
   0 817 349
   1 261 123

In [21]:
icu_id_ps <- full_data[unlist(ps_matches[c("index.treated", "index.control")]), ] %>% pull(icustay_id)

In [22]:
tabs_ps <- CreateTableOne(vars = features, factorVars = factor_vars, strata = "echo",
                          data = subgroup %>% filter(icustay_id %in% icu_id_ps),
                          argsNormal = list(var.equal = FALSE))
tabs_ps_df <- tabs_ps %>% print(smd = TRUE) %>% as.data.frame(stringsAsFactors = FALSE)

                                Stratified by echo
                                 0                 1                 p     
  n                                 1550              1550                 
  ventfreeday28 (mean (sd))        17.87 (12.68)     18.11 (14.72)    0.634
  vasofreeday28 (mean (sd))        19.29 (13.26)     20.62 (13.86)    0.007
  dobutamine_flag = 1 (%)             22 (1.4)          39 (2.5)      0.039
  day1 (mean (sd))               2142.91 (3314.23) 2322.70 (3624.03)  0.173
  day2 (mean (sd))                947.04 (2450.91) 1233.58 (2702.74)  0.005
  day3 (mean (sd))                256.93 (2105.38)  772.36 (2515.72) <0.001
  sofa_drop_2 (mean (sd))           1.33 (4.52)       0.68 (3.47)    <0.001
  sofa_drop_3 (mean (sd))           2.38 (4.79)       1.34 (3.89)    <0.001
  norepinephrine_max (mean (sd))    1.07 (2.73)       1.34 (3.05)     0.011
  lactate_diff (mean (sd))          0.57 (2.59)       1.11 (2.23)     0.107
  creatinine_diff (mean (sd))       0

In [23]:
tabs_ps_df_fmt <- tabs_ps_df %>%
setNames(c("control", "treated", "pval", "test", "smd")) %>%
mutate(cov = rownames(.)) %>%
mutate(cov_name = str_split(cov, " ") %>% map_chr(first)) %>%
mutate(cate = cov_name %in% factor_vars) %>%
mutate(ctrl = ifelse(cate,
                     str_extract(control, "\\((.*?)\\)") %>% str_replace_all("\\(|\\)", "") %>% sprintf("%s%%", .),
                     control)) %>%
mutate(trtd = ifelse(cate,
                     str_extract(treated, "\\((.*?)\\)") %>% str_replace_all("\\(|\\)", "") %>% sprintf("%s%%", .),
                     treated)) %>%
# mutate(cov_fmt = str_replace_all(cov_name, "_", " ")) %>%
# mutate(cov_fmt = str_replace_all(cov_fmt, "lab|vs|flag|icd|first", "")) %>%
mutate_all(funs(str_replace_all(., "\\s+$|^\\s+", ""))) %>%
# mutate(cov_fmt = tools::toTitleCase(cov_fmt)) %>%
select(cov_name, ctrl, trtd, pval, smd) %>%
setNames(c("Covariate", "Non-Echo (PS matching)", "Echo (PS matching)", "p value (PS matching)", "SMD (PS matching)"))

In [24]:
tabs_ps_df_fmt

Covariate,Non-Echo (PS matching),Echo (PS matching),p value (PS matching),SMD (PS matching)
n,1550,1550,,
ventfreeday28,17.87 (12.68),18.11 (14.72),0.634,0.017
vasofreeday28,19.29 (13.26),20.62 (13.86),0.007,0.098
dobutamine_flag,1.4%,2.5%,0.039,0.079
day1,2142.91 (3314.23),2322.70 (3624.03),0.173,0.052
day2,947.04 (2450.91),1233.58 (2702.74),0.005,0.111
day3,256.93 (2105.38),772.36 (2515.72),<0.001,0.222
sofa_drop_2,1.33 (4.52),0.68 (3.47),<0.001,0.163
sofa_drop_3,2.38 (4.79),1.34 (3.89),<0.001,0.238
norepinephrine_max,1.07 (2.73),1.34 (3.05),0.011,0.091


In [25]:
tabs_subgroup_full <- tabs_df_fmt %>% select(Covariate, SMD) %>%
full_join(tbone, by = "Covariate") %>%
full_join(tabs_ps_df_fmt, by = "Covariate")

tabs_subgroup_full

Covariate,SMD,Echo,Non-Echo,p value,Echo (weighted cohort),Non-Echo (weighted cohort),p value (weighted cohort),Non-Echo (PS matching),Echo (PS matching),p value (PS matching),SMD (PS matching)
n,,,,,,,,1550,1550,,
ventfreeday28,0.051,18.08 (24.22),19.09 (13.53),0.044,18.37 (23.79),18.43 (13.38),0.901,17.87 (12.68),18.11 (14.72),0.634,0.017
vasofreeday28,0.004,20.30 (14.83),20.25 (12.61),0.888,20.45 (14.70),19.67 (12.80),0.026,19.29 (13.26),20.62 (13.86),0.007,0.098
dobutamine_flag,0.22,4.11%,0.74%,<0.001,3.18%,1.10%,<0.001,1.4%,2.5%,0.039,0.079
day1,0.165,2527.35 (3891.01),1939.14 (3181.88),<0.001,2379.64 (3751.78),2032.59 (3267.94),<0.001,2142.91 (3314.23),2322.70 (3624.03),0.173,0.052
day2,0.17,1294.66 (2964.32),835.26 (2429.91),<0.001,1266.24 (2877.40),899.16 (2491.55),<0.001,947.04 (2450.91),1233.58 (2702.74),0.005,0.111
day3,0.181,687.19 (2623.07),256.15 (2108.02),<0.001,714.09 (2588.89),301.67 (2138.92),<0.001,256.93 (2105.38),772.36 (2515.72),<0.001,0.222
sofa_drop_2,0.084,0.89 (3.43),1.22 (4.34),<0.001,0.74 (3.36),1.25 (4.49),<0.001,1.33 (4.52),0.68 (3.47),<0.001,0.163
sofa_drop_3,0.175,1.54 (4.09),2.29 (4.49),<0.001,1.35 (3.99),2.38 (4.59),<0.001,2.38 (4.79),1.34 (3.89),<0.001,0.238
norepinephrine_max,0.214,1.76 (5.77),0.81 (2.39),<0.001,1.55 (5.21),0.96 (2.56),<0.001,1.07 (2.73),1.34 (3.05),0.011,0.091


In [26]:
data.table::fwrite(tabs_subgroup_full, file.path(data_dir, "subgroup_full.csv"))