In [1]:
library(RPostgreSQL)
library(twang)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##

── Attaching packages ─────────────────────────────────────── tidyverse 1

In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
full_data <- dbGetQuery(con, "select * from merged_data")
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_sodium_flag,lab_bun_flag,lab_bicarbonate_flag,lab_bnp_flag,lab_pco2_flag,lab_creatinine_flag,lab_potassium_flag,lab_troponin_flag,lab_po2_flag,lab_lactate_flag
228416,159895,5491,MICU,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,1,1,1,0,1,1,1,0,1,1
278148,169700,49395,MICU,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,1,1,1,0,0,1,1,0,0,0
274174,103722,14855,MICU,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,1,1,1,0,1,1,1,0,1,1
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,1,1,1,0,1,1,1,1,1,1
222457,183493,96815,MICU,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,1,1,1,0,0,1,1,0,0,0
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,1,1,1,0,0,1,1,0,0,1


In [5]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [6]:
to_factor <- function(x) {
    if(length(na.omit(unique(x))) <= 1) return(factor(x, levels = c(0, 1)))
    return(factor(x))
}

In [7]:
factor_vars <- full_data %>%
    names %>%
    grep("flag|abnormal|icd|sedative", ., value = TRUE) %>%
    c("gender", "first_careunit", "echo", "vent", "vaso",
      "icu_adm_weekday", "icu_adm_hour", "mort_28_day")
factor_vars

In [8]:
weekday <- c("sun", "mon", "tues", "wednes", "thurs", "fri", "satur") %>%
paste0("day")
weekday

In [9]:
full_data <- full_data %>%
    mutate(echo_int = as.integer(echo)) %>%
    mutate(icu_adm_weekday = weekday[icu_adm_weekday + 1]) %>%
    mutate(mort_28_day_int = as.integer(mort_28_day)) %>%
    mutate_at(factor_vars, to_factor) %>%
    mutate(gender = relevel(gender, "M")) %>%
    mutate(first_careunit = relevel(first_careunit, "SICU"))
full_data %>% pull(echo) %>% head

In [10]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|vaso|icu_adm|careunit|sedative", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase|cvp", .)) %>%
    discard(grepl("bnp|troponin|kinase|cvp", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .)) %>%
    discard(grepl("abnormal", .))
feature_names
length(feature_names)

In [11]:
features <- full_data %>%
    select(!!!rlang::syms(feature_names))
head(features)

first_careunit,age,gender,weight,saps,sofa,elix_score,vent,vaso,icu_adm_weekday,⋯,lab_pco2_first,lab_creatinine_first,lab_potassium_first,lab_po2_first,lab_lactate_first,sedative,vs_cvp_flag,lab_creatinine_kinase_flag,lab_bnp_flag,lab_troponin_flag
MICU,76.58225,M,,24,3,10,1,0,tuesday,⋯,56.0,1.2,4.4,98.0,1.4,0,0,0,0,0
MICU,82.72606,F,90.3,25,8,15,0,0,monday,⋯,,1.6,5.7,,,0,0,0,0,0
MICU,61.8439,M,53.6,20,5,6,0,0,saturday,⋯,40.0,2.7,4.1,110.0,1.6,0,1,0,0,0
MICU,63.86507,M,92.9,24,8,6,1,1,monday,⋯,42.0,1.3,4.2,85.0,3.8,1,1,1,0,1
MICU,62.84018,F,75.0,10,1,6,0,0,thursday,⋯,,0.9,3.5,,,0,0,0,0,0
MICU,62.60148,F,80.0,15,4,2,0,1,friday,⋯,,0.8,3.5,,1.4,0,1,0,0,0


In [12]:
label_name <- "echo"

In [13]:
label <- full_data %>% pull(echo)
str(label)

 Factor w/ 2 levels "0","1": 2 1 2 2 1 2 1 1 2 2 ...


In [14]:
fml <- feature_names %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [15]:
unweighted <- glm(as.formula(fml), data = full_data, family = binomial, na.action = na.exclude)
summary(unweighted)
exp(cbind(OR = coef(unweighted), confint(unweighted)))


Call:
glm(formula = as.formula(fml), family = binomial, data = full_data, 
    na.action = na.exclude)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.3687  -0.7486  -0.4555   0.7679   2.6551  

Coefficients:
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  0.7466943  6.1578443   0.121 0.903486    
echo1                       -0.4346195  0.1014609  -4.284 1.84e-05 ***
first_careunitMICU           0.3322350  0.1260193   2.636 0.008380 ** 
age                          0.0215216  0.0038762   5.552 2.82e-08 ***
genderF                     -0.1602286  0.0993127  -1.613 0.106663    
weight                      -0.0082306  0.0021732  -3.787 0.000152 ***
saps                         0.0573810  0.0127997   4.483 7.36e-06 ***
sofa                         0.1670255  0.0195313   8.552  < 2e-16 ***
elix_score                   0.0292912  0.0078026   3.754 0.000174 ***
vent1                        0.2739736  0.1658700   1.652

Waiting for profiling to be done...


Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),2.1100134,1.000516e-05,3.158235e+05
echo1,0.6475110,5.305195e-01,7.897433e-01
first_careunitMICU,1.3940804,1.090818e+00,1.788157e+00
age,1.0217549,1.014075e+00,1.029608e+00
genderF,0.8519490,7.010928e-01,1.034901e+00
weight,0.9918032,9.875320e-01,9.959827e-01
saps,1.0590592,1.032877e+00,1.086046e+00
sofa,1.1817844,1.137661e+00,1.228220e+00
elix_score,1.0297244,1.014111e+00,1.045621e+00
vent1,1.3151800,9.498565e-01,1.820424e+00


In [16]:
saveRDS(unweighted, file = file.path(data_dir, "multivariate_model.rds"))

In [17]:
fml <- feature_names %>%
    paste(collapse = " + ") %>%
    sprintf("echo_int ~ %s", .)
fml

In [18]:
echo_ps_ate <- ps(as.formula(fml),
                  data = full_data,
                  interaction.depth = 2,
                  shrinkage = 0.01,
                  perm.test.iters = 0,
                  estimand = "ATE",
                  verbose = FALSE,
                  stop.method = c("es.mean", "es.max", "ks.mean", "ks.max"),
                  n.trees = 10000,
                  train.fraction = 0.8,
                  cv.folds = 3,
                  n.cores = 8)

In [19]:
pred <- echo_ps_ate$ps$es.mean.ATE
full_data <- full_data %>% mutate(ps = pred)
ROCR::performance(ROCR::prediction(pred, label), "auc")@y.values %>% first

In [20]:
ft_importance <- summary(echo_ps_ate$gbm.obj,
                         n.trees = echo_ps_ate$desc$es.mean.ATE$n.trees,
                         plot = FALSE)

In [21]:
full_data <- full_data %>%
    mutate(ps_weight = get.weights(echo_ps_ate, stop.method = "es.mean"))

In [22]:
saveRDS(full_data, file = file.path(data_dir, "full_data_ps.rds"))

In [23]:
saveRDS(ft_importance, file = file.path(data_dir, "feature_importance.rds"))

In [24]:
primary_ipw <- glm(mort_28_day ~ echo, data = full_data,
                   weights = full_data$ps_weight, family = binomial)
summary(primary_ipw)
exp(cbind(OR = coef(primary_ipw), confint(primary_ipw)))

“non-integer #successes in a binomial glm!”


Call:
glm(formula = mort_28_day ~ echo, family = binomial, data = full_data, 
    weights = full_data$ps_weight)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.3814  -1.0784  -0.9486   1.7385   4.4970  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.88797    0.02904 -30.574  < 2e-16 ***
echo1       -0.16639    0.04167  -3.993 6.53e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13620  on 6360  degrees of freedom
Residual deviance: 13604  on 6359  degrees of freedom
AIC: 13246

Number of Fisher Scoring iterations: 4


Waiting for profiling to be done...
“non-integer #successes in a binomial glm!”

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.4114919,0.3886324,0.4354995
echo1,0.8467171,0.7802762,0.9187552


In [25]:
saveRDS(primary_ipw, file = file.path(data_dir, "ipw_model.rds"))

In [26]:
ipw_svydesign <- svydesign(ids = ~ icustay_id, weights = ~ ps_weight, data = full_data)

In [27]:
saveRDS(ipw_svydesign, file = file.path(data_dir, "ipw_svydesign.rds"))

In [28]:
fml <- feature_names %>%
    c(label_name, .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [29]:
logi <- svyglm(as.formula(fml),
               family = quasibinomial,
               design = ipw_svydesign)

In [30]:
summary(logi)


Call:
svyglm(formula = as.formula(fml), family = quasibinomial, design = ipw_svydesign)

Survey design:
svydesign(ids = ~icustay_id, weights = ~ps_weight, data = full_data)

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  2.9066769  6.6676398   0.436 0.662914    
echo1                       -0.4479433  0.1045921  -4.283 1.90e-05 ***
first_careunitMICU           0.4592459  0.1342266   3.421 0.000631 ***
age                          0.0209182  0.0041544   5.035 5.06e-07 ***
genderF                     -0.1387174  0.1066699  -1.300 0.193553    
weight                      -0.0102176  0.0023507  -4.347 1.43e-05 ***
saps                         0.0575511  0.0139018   4.140 3.57e-05 ***
sofa                         0.1825633  0.0202617   9.010  < 2e-16 ***
elix_score                   0.0272038  0.0081078   3.355 0.000803 ***
vent1                        0.3089768  0.1741606   1.774 0.076151 .  
vaso1                       -0

In [31]:
exp(cbind(OR = coef(logi), confint(logi)))

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),18.2958976,3.862322e-05,8.666804e+06
echo1,0.6389409,5.205132e-01,7.843134e-01
first_careunitMICU,1.5828799,1.216730e+00,2.059216e+00
age,1.0211385,1.012858e+00,1.029487e+00
genderF,0.8704740,7.062497e-01,1.072885e+00
weight,0.9898344,9.852845e-01,9.944054e-01
saps,1.0592393,1.030768e+00,1.088497e+00
sofa,1.2002901,1.153558e+00,1.248915e+00
elix_score,1.0275772,1.011377e+00,1.044037e+00
vent1,1.3620307,9.681466e-01,1.916164e+00


In [32]:
saveRDS(logi, file = file.path(data_dir, "doubly_robust_all_model.rds"))