In [1]:
library(RPostgreSQL)
library(twang)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##

── Attaching packages ─────────────────────────────────────── tidyverse 1

In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
full_data <- dbGetQuery(con, "select * from merged_data")
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_po2_first,lab_po2_min,lab_po2_max,lab_po2_abnormal,lab_lactate_flag,lab_lactate_first,lab_lactate_min,lab_lactate_max,lab_lactate_abnormal,sedative
228416,159895,5491,MICU,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,98.0,98.0,98.0,0.0,1,1.4,1.4,1.4,0.0,0
278148,169700,49395,MICU,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,,,,,0,,,,,0
274174,103722,14855,MICU,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,110.0,110.0,110.0,1.0,1,1.6,1.6,2.6,1.0,0
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,85.0,85.0,149.0,1.0,1,3.8,2.2,3.8,1.0,1
222457,183493,96815,MICU,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,,,,,0,,,,,0
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,,,,,1,1.4,1.4,1.4,0.0,0


In [5]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [6]:
to_factor <- function(x) {
    if(length(na.omit(unique(x))) <= 1) return(factor(x, levels = c(0, 1)))
    return(factor(x))
}

In [7]:
factor_vars <- full_data %>%
    names %>%
    grep("flag|abnormal|icd|sedative", ., value = TRUE) %>%
    c("gender", "first_careunit", "echo", "vent", "vaso",
      "icu_adm_weekday", "icu_adm_hour", "mort_28_day")
factor_vars

In [8]:
weekday <- c("sun", "mon", "tues", "wednes", "thurs", "fri", "satur") %>%
paste0("day")
weekday

In [9]:
full_data <- full_data %>%
    mutate(echo_int = as.integer(echo)) %>%
    mutate(icu_adm_weekday = weekday[icu_adm_weekday + 1]) %>%
    mutate(mort_28_day_int = as.integer(mort_28_day)) %>%
    mutate_at(factor_vars, to_factor) %>%
    mutate(gender = relevel(gender, "M")) %>%
    mutate(first_careunit = relevel(first_careunit, "SICU"))
full_data %>% pull(echo) %>% head

In [10]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|vaso|icu_adm|careunit|sedative", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase", .)) %>%
    discard(grepl("bnp|troponin|kinase", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .)) %>%
    discard(grepl("abnormal", .))
feature_names
length(feature_names)

In [11]:
features <- full_data %>%
    select(!!!rlang::syms(feature_names))
head(features)

first_careunit,age,gender,weight,saps,sofa,elix_score,vent,vaso,icu_adm_weekday,⋯,lab_bun_first,lab_bicarbonate_first,lab_bnp_flag,lab_pco2_first,lab_creatinine_first,lab_potassium_first,lab_troponin_flag,lab_po2_first,lab_lactate_first,sedative
MICU,76.58225,M,,24,3,10,1,0,tuesday,⋯,35,40,0,56.0,1.2,4.4,0,98.0,1.4,0
MICU,82.72606,F,90.3,25,8,15,0,0,monday,⋯,32,17,0,,1.6,5.7,0,,,0
MICU,61.8439,M,53.6,20,5,6,0,0,saturday,⋯,64,23,0,40.0,2.7,4.1,0,110.0,1.6,0
MICU,63.86507,M,92.9,24,8,6,1,1,monday,⋯,34,18,0,42.0,1.3,4.2,1,85.0,3.8,1
MICU,62.84018,F,75.0,10,1,6,0,0,thursday,⋯,17,20,0,,0.9,3.5,0,,,0
MICU,62.60148,F,80.0,15,4,2,0,1,friday,⋯,17,21,0,,0.8,3.5,0,,1.4,0


In [12]:
label_name <- "echo"

In [13]:
label <- full_data %>% pull(echo)
str(label)

 Factor w/ 2 levels "0","1": 2 1 2 2 1 2 1 1 2 2 ...


In [14]:
fml <- feature_names %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [15]:
unweighted <- glm(as.formula(fml), data = full_data, family = binomial, na.action = na.exclude)
summary(unweighted)
exp(cbind(OR = coef(unweighted), confint(unweighted)))


Call:
glm(formula = as.formula(fml), family = binomial, data = full_data, 
    na.action = na.exclude)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0705  -0.7292  -0.4078   0.7568   2.8052  

Coefficients:
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  5.8103652  8.7855575   0.661 0.508385    
echo1                       -0.3164662  0.1478652  -2.140 0.032336 *  
first_careunitMICU           0.6409615  0.1768372   3.625 0.000289 ***
age                          0.0233073  0.0055442   4.204 2.62e-05 ***
genderF                     -0.1821133  0.1412358  -1.289 0.197250    
weight                      -0.0056337  0.0031028  -1.816 0.069419 .  
saps                         0.0636993  0.0177010   3.599 0.000320 ***
sofa                         0.1647531  0.0260785   6.318 2.66e-10 ***
elix_score                   0.0260520  0.0112065   2.325 0.020087 *  
vent1                        0.4009877  0.2637048   1.521

Waiting for profiling to be done...


Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),333.7409885,9.808999e-06,9.081806e+09
echo1,0.7287197,5.451337e-01,9.736551e-01
first_careunitMICU,1.8983052,1.347629e+00,2.697309e+00
age,1.0235810,1.012624e+00,1.034892e+00
genderF,0.8335069,6.316321e-01,1.099152e+00
weight,0.9943822,9.882848e-01,1.000391e+00
saps,1.0657719,1.029548e+00,1.103583e+00
sofa,1.1791019,1.120829e+00,1.241565e+00
elix_score,1.0263943,1.004066e+00,1.049196e+00
vent1,1.4932989,8.902867e-01,2.505506e+00


In [16]:
saveRDS(unweighted, file = file.path(data_dir, "multivariate_model.rds"))

In [17]:
fml <- feature_names %>%
    paste(collapse = " + ") %>%
    sprintf("echo_int ~ %s", .)
fml

In [18]:
echo_ps_ate <- ps(as.formula(fml),
                  data = full_data,
                  interaction.depth = 2,
                  shrinkage = 0.01,
                  perm.test.iters = 0,
                  estimand = "ATE",
                  verbose = FALSE,
                  stop.method = c("es.mean", "es.max", "ks.mean", "ks.max"),
                  n.trees = 10000,
                  train.fraction = 0.8,
                  cv.folds = 3,
                  n.cores = 8)

In [19]:
pred <- echo_ps_ate$ps$es.mean.ATE
full_data <- full_data %>% mutate(ps = pred)
ROCR::performance(ROCR::prediction(pred, label), "auc")@y.values %>% first

In [20]:
ft_importance <- summary(echo_ps_ate$gbm.obj,
                         n.trees = echo_ps_ate$desc$es.mean.ATE$n.trees,
                         plot = FALSE)

In [21]:
full_data <- full_data %>%
    mutate(ps_weight = get.weights(echo_ps_ate, stop.method = "es.mean"))

In [22]:
saveRDS(full_data, file = file.path(data_dir, "full_data_ps.rds"))

In [23]:
saveRDS(ft_importance, file = file.path(data_dir, "feature_importance.rds"))

In [24]:
primary_ipw <- glm(mort_28_day ~ echo, data = full_data,
                   weights = full_data$ps_weight, family = binomial)
summary(primary_ipw)
exp(cbind(OR = coef(primary_ipw), confint(primary_ipw)))

“non-integer #successes in a binomial glm!”


Call:
glm(formula = mort_28_day ~ echo, family = binomial, data = full_data, 
    weights = full_data$ps_weight)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.3470  -1.0797  -0.9491   1.7377   4.5638  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.88804    0.02904 -30.577  < 2e-16 ***
echo1       -0.16434    0.04166  -3.945 7.98e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13628  on 6360  degrees of freedom
Residual deviance: 13613  on 6359  degrees of freedom
AIC: 13246

Number of Fisher Scoring iterations: 4


Waiting for profiling to be done...
“non-integer #successes in a binomial glm!”

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.4114605,0.3886034,0.4354656
echo1,0.8484532,0.7819015,0.9206101


In [25]:
saveRDS(primary_ipw, file = file.path(data_dir, "ipw_model.rds"))

In [26]:
ipw_svydesign <- svydesign(ids = ~ icustay_id, weights = ~ ps_weight, data = full_data)

In [27]:
saveRDS(ipw_svydesign, file = file.path(data_dir, "ipw_svydesign.rds"))

In [28]:
fml <- feature_names %>%
    c(label_name, .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [29]:
logi <- svyglm(as.formula(fml),
               family = quasibinomial,
               design = ipw_svydesign)

In [30]:
summary(logi)


Call:
svyglm(formula = as.formula(fml), family = quasibinomial, design = ipw_svydesign)

Survey design:
svydesign(ids = ~icustay_id, weights = ~ps_weight, data = full_data)

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  9.5185367  9.2642893   1.027 0.304375    
echo1                       -0.3574270  0.1498180  -2.386 0.017167 *  
first_careunitMICU           0.8356606  0.1855502   4.504 7.19e-06 ***
age                          0.0230922  0.0061296   3.767 0.000171 ***
genderF                     -0.1332097  0.1526063  -0.873 0.382857    
weight                      -0.0074648  0.0034933  -2.137 0.032768 *  
saps                         0.0605907  0.0190265   3.185 0.001479 ** 
sofa                         0.1812425  0.0273323   6.631 4.62e-11 ***
elix_score                   0.0213227  0.0118552   1.799 0.072282 .  
vent1                        0.6331011  0.2848776   2.222 0.026406 *  
vaso1                       -0

In [31]:
exp(cbind(OR = coef(logi), confint(logi)))

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),1.360968e+04,0.0001770397,1.046226e+12
echo1,6.994738e-01,0.5214901854,9.382028e-01
first_careunitMICU,2.306337e+00,1.6031796877,3.317900e+00
age,1.023361e+00,1.0111399939,1.035730e+00
genderF,8.752815e-01,0.6490064427,1.180447e+00
weight,9.925630e-01,0.9857902958,9.993822e-01
saps,1.062464e+00,1.0235729218,1.102833e+00
sofa,1.198706e+00,1.1361804491,1.264672e+00
elix_score,1.021552e+00,0.9980887221,1.045566e+00
vent1,1.883442e+00,1.0776160439,3.291854e+00


In [32]:
saveRDS(logi, file = file.path(data_dir, "doubly_robust_all_model.rds"))