In [1]:
library(RPostgreSQL)
library(twang)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##

── Attaching packages ─────────────────────────────────────── tidyverse 1

In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
full_data <- dbGetQuery(con, "select * from merged_data where vaso = 1")
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_sodium_flag,lab_bun_flag,lab_bicarbonate_flag,lab_bnp_flag,lab_pco2_flag,lab_creatinine_flag,lab_potassium_flag,lab_troponin_flag,lab_po2_flag,lab_lactate_flag
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,1,1,1,0,1,1,1,1,1,1
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,1,1,1,0,0,1,1,0,0,1
214106,123152,74869,MICU,2150-09-05 17:50:32,2150-09-12 21:40:59,1,74.13371,1,2150-09-07,⋯,1,1,1,0,1,1,1,1,1,1
254478,128652,31,MICU,2108-08-22 23:28:42,2108-08-30 21:59:20,1,72.26709,1,,⋯,1,1,1,0,1,1,1,0,1,1
264446,165520,10013,MICU,2125-10-04 23:38:00,2125-10-07 15:13:52,1,87.08742,1,,⋯,1,1,1,0,1,1,1,0,1,0
218143,122936,10370,SICU,2145-01-15 03:36:30,2145-01-20 19:00:30,1,45.06368,1,2145-01-14,⋯,1,1,1,0,1,1,1,0,1,1


In [5]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [6]:
to_factor <- function(x) {
    if(length(na.omit(unique(x))) <= 1) return(factor(x, levels = c(0, 1)))
    return(factor(x))
}

In [7]:
factor_vars <- full_data %>%
    names %>%
    grep("flag|abnormal|icd|sedative", ., value = TRUE) %>%
    c("gender", "first_careunit", "echo", "vent", "vaso",
      "icu_adm_hour", "mort_28_day")
factor_vars

In [8]:
weekday <- c("sun", "mon", "tues", "wednes", "thurs", "fri", "satur") %>%
paste0("day")
weekday

In [9]:
full_data <- full_data %>%
    mutate(echo_int = as.integer(echo)) %>%
    mutate(icu_adm_weekday = weekday[icu_adm_weekday + 1]) %>%
    mutate(mort_28_day_int = as.integer(mort_28_day)) %>%
    mutate_at(factor_vars, to_factor) %>%
    mutate(icu_adm_weekday = factor(icu_adm_weekday, levels = weekday)) %>%
    mutate(gender = relevel(gender, "M")) %>%
    mutate(first_careunit = relevel(first_careunit, "SICU"))
full_data %>% pull(echo) %>% head

In [10]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|icu_adm|careunit|sedative", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase|cvp", .)) %>%
    discard(grepl("bnp|troponin|kinase|cvp", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .)) %>%
    discard(grepl("abnormal", .))
feature_names
length(feature_names)

In [11]:
feature_names %>%
data.frame(feature = ., stringsAsFactors = FALSE) %>%
data.table::fwrite(file = file.path(data_dir, "features_subgroup_vaso.csv"))

In [12]:
features <- full_data %>%
    select(!!!rlang::syms(feature_names))
head(features)

first_careunit,age,gender,weight,saps,sofa,elix_score,vent,icu_adm_weekday,icu_adm_hour,⋯,lab_pco2_first,lab_creatinine_first,lab_potassium_first,lab_po2_first,lab_lactate_first,sedative,vs_cvp_flag,lab_creatinine_kinase_flag,lab_bnp_flag,lab_troponin_flag
MICU,63.86507,M,92.9,24,8,6,1,monday,4,⋯,42.0,1.3,4.2,85.0,3.8,1,1,1,0,1
MICU,62.60148,F,80.0,15,4,2,0,friday,16,⋯,,0.8,3.5,,1.4,0,1,0,0,0
MICU,74.13371,F,61.0,21,17,17,1,saturday,17,⋯,38.0,0.4,5.0,94.0,1.5,1,1,1,0,1
MICU,72.26709,M,74.0,17,2,9,1,wednesday,23,⋯,36.0,0.7,3.7,148.0,1.4,0,0,0,0,0
MICU,87.08742,F,95.0,20,7,8,1,thursday,23,⋯,63.0,1.7,4.2,60.0,,0,1,1,0,0
SICU,45.06368,F,49.4,28,9,18,1,friday,3,⋯,33.0,3.1,4.1,99.0,1.2,1,0,1,0,0


In [13]:
label_name <- "echo"

In [14]:
label <- full_data %>% pull(echo)
str(label)

 Factor w/ 2 levels "0","1": 2 2 2 1 1 2 2 2 2 1 ...


In [15]:
fml <- feature_names %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [16]:
unweighted <- glm(as.formula(fml), data = full_data, family = binomial, na.action = na.exclude)
summary(unweighted)
exp(cbind(OR = coef(unweighted), confint(unweighted)))


Call:
glm(formula = as.formula(fml), family = binomial, data = full_data, 
    na.action = na.exclude)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4191  -0.7801  -0.4057   0.8177   2.7933  

Coefficients:
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -7.5760499  8.9517792  -0.846 0.397375    
echo1                       -0.6891420  0.1498730  -4.598 4.26e-06 ***
first_careunitMICU           0.5677816  0.1847124   3.074 0.002113 ** 
age                          0.0174680  0.0055491   3.148 0.001644 ** 
genderF                     -0.1695708  0.1414976  -1.198 0.230761    
weight                      -0.0079026  0.0029811  -2.651 0.008028 ** 
saps                         0.0673487  0.0179083   3.761 0.000169 ***
sofa                         0.1520795  0.0265866   5.720 1.06e-08 ***
elix_score                   0.0408843  0.0116811   3.500 0.000465 ***
vent1                        0.4364957  0.2653001   1.645

Waiting for profiling to be done...


Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.000512582,1.049117e-11,1.851588e+04
echo1,0.502006622,3.737021e-01,6.727457e-01
first_careunitMICU,1.764348658,1.233076e+00,2.545576e+00
age,1.017621476,1.006696e+00,1.028853e+00
genderF,0.844026978,6.392590e-01,1.113562e+00
weight,0.992128565,9.862417e-01,9.978426e-01
saps,1.069668355,1.032928e+00,1.108110e+00
sofa,1.164252804,1.105622e+00,1.227166e+00
elix_score,1.041731618,1.018241e+00,1.065994e+00
vent1,1.547275530,9.202075e-01,2.606099e+00


In [17]:
saveRDS(unweighted, file = file.path(data_dir, "multivariate_model_subgroup_vaso.rds"))

In [18]:
fml <- feature_names %>%
    paste(collapse = " + ") %>%
    sprintf("echo_int ~ %s", .)
fml

In [19]:
echo_ps_ate <- ps(as.formula(fml),
                  data = full_data,
                  interaction.depth = 2,
                  shrinkage = 0.01,
                  perm.test.iters = 0,
                  estimand = "ATE",
                  verbose = FALSE,
                  stop.method = c("es.mean", "es.max", "ks.mean", "ks.max"),
                  n.trees = 10000,
                  train.fraction = 0.8,
                  cv.folds = 3,
                  n.cores = 8)

In [20]:
pred <- echo_ps_ate$ps$es.mean.ATE
full_data <- full_data %>% mutate(ps = pred)
ROCR::performance(ROCR::prediction(pred, label), "auc")@y.values %>% first

In [21]:
ft_importance <- summary(echo_ps_ate$gbm.obj,
                         n.trees = echo_ps_ate$desc$es.mean.ATE$n.trees,
                         plot = FALSE)

In [22]:
full_data <- full_data %>%
    mutate(ps_weight = get.weights(echo_ps_ate, stop.method = "es.mean"))

In [23]:
saveRDS(full_data, file = file.path(data_dir, "full_data_ps_subgroup_vaso.rds"))

In [24]:
saveRDS(ft_importance, file = file.path(data_dir, "feature_importance_subgroup_vaso.rds"))

In [25]:
primary_ipw <- glm(mort_28_day ~ echo, data = full_data,
                   weights = full_data$ps_weight, family = binomial)
summary(primary_ipw)
exp(cbind(OR = coef(primary_ipw), confint(primary_ipw)))

“non-integer #successes in a binomial glm!”


Call:
glm(formula = mort_28_day ~ echo, family = binomial, data = full_data, 
    weights = full_data$ps_weight)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-2.937  -1.246  -1.017   1.670   3.243  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.25007    0.04888  -5.116 3.12e-07 ***
echo1       -0.45392    0.06939  -6.541 6.09e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 4737.7  on 2066  degrees of freedom
Residual deviance: 4694.7  on 2065  degrees of freedom
AIC: 4573.1

Number of Fisher Scoring iterations: 4


Waiting for profiling to be done...
“non-integer #successes in a binomial glm!”

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.7787484,0.707456,0.8568983
echo1,0.6351311,0.554263,0.7275499


In [26]:
saveRDS(primary_ipw, file = file.path(data_dir, "ipw_model_subgroup_vaso.rds"))

In [27]:
ipw_svydesign <- svydesign(ids = ~ icustay_id, weights = ~ ps_weight, data = full_data)

In [28]:
saveRDS(ipw_svydesign, file = file.path(data_dir, "ipw_svydesign_subgroup_vaso.rds"))

In [29]:
fml <- feature_names %>%
    c(label_name, .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [30]:
logi <- svyglm(as.formula(fml),
               family = quasibinomial,
               design = ipw_svydesign)

In [31]:
summary(logi)


Call:
svyglm(formula = as.formula(fml), family = quasibinomial, design = ipw_svydesign)

Survey design:
svydesign(ids = ~icustay_id, weights = ~ps_weight, data = full_data)

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 -2.9109858  9.3386097  -0.312 0.755304    
echo1                       -0.7210549  0.1531726  -4.707 2.76e-06 ***
first_careunitMICU           0.6858962  0.1925265   3.563 0.000380 ***
age                          0.0193006  0.0060662   3.182 0.001497 ** 
genderF                     -0.1313274  0.1521379  -0.863 0.388170    
weight                      -0.0081745  0.0031685  -2.580 0.009986 ** 
saps                         0.0667188  0.0194750   3.426 0.000631 ***
sofa                         0.1724999  0.0282177   6.113 1.27e-09 ***
elix_score                   0.0408743  0.0120360   3.396 0.000703 ***
vent1                        0.5357448  0.2784233   1.924 0.054534 .  
icu_adm_weekdaymonday       -0

In [32]:
exp(cbind(OR = coef(logi), confint(logi)))

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.05442205,6.119782e-10,4.839649e+06
echo1,0.48623906,3.601381e-01,6.564937e-01
first_careunitMICU,1.98555055,1.361451e+00,2.895742e+00
age,1.01948806,1.007439e+00,1.031682e+00
genderF,0.87693058,6.508265e-01,1.181586e+00
weight,0.99185881,9.857182e-01,9.980377e-01
saps,1.06899487,1.028960e+00,1.110588e+00
sofa,1.18827173,1.124338e+00,1.255841e+00
elix_score,1.04172120,1.017434e+00,1.066588e+00
vent1,1.70872051,9.900945e-01,2.948936e+00


In [33]:
saveRDS(logi, file = file.path(data_dir, "doubly_robust_all_model_subgroup_vaso.rds"))