In [1]:
library(RPostgreSQL)
library(twang)
library(tidyverse)

Loading required package: DBI
Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.3.4     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand()  masks Matrix::expand()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
✖ ggplot2::layer() masks latticeExtra::layer()


In [70]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [71]:
full_data <- dbGetQuery(con, "select * from merged_data")

In [72]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [64]:
factor_vars <- full_data %>%
    names %>%
    grep("flag|abnormal|icd", ., value = TRUE) %>%
    c("gender", "first_careunit", "echo", "vent", "vaso",
      "icu_adm_weekday", "icu_adm_hour")
factor_vars

In [65]:
to_factor <- function(x) {
    res <- (x %>% as.factor %>% as.integer - 1) %>% as.factor
    if(length(levels(res)) <= 1) levels(res) <- c(0, 1)
    res
}

In [69]:
full_data <- full_data %>%
    mutate(echo_int = as.integer(echo)) %>%
    mutate_at(factor_vars, to_factor)
full_data %>% pull(echo) %>% head

In [86]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|vaso|icu_adm", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase", .)) %>%
    discard(grepl("bnp|troponin|kinase", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .)) %>%
    discard(grepl("abnormal", .))
feature_names
length(feature_names)

In [87]:
features <- full_data %>%
    select(!!!rlang::syms(feature_names), echo) %>%
    mutate(gender = as.integer(as.factor(gender)))
head(features)

age,gender,weight,saps,sofa,elix_score,vent,vaso,icu_adm_weekday,icu_adm_hour,⋯,lab_creatinine_first,lab_pco2_first,lab_bnp_flag,lab_bicarbonate_first,lab_bun_first,lab_platelet_first,lab_sodium_first,lab_chloride_first,lab_ph_first,echo
62.67646,2,1.780838,25,5,5,1,0,2,1,⋯,3.7,32.0,0,22,208,313,160,123,7.45,0
86.76186,1,1.18702,13,1,10,0,0,4,1,⋯,0.9,,0,27,17,189,139,105,,0
56.08904,2,1.44704,18,5,14,1,0,6,1,⋯,0.9,32.0,0,24,15,231,144,108,7.49,0
45.91093,2,1.435399,16,9,13,0,0,3,14,⋯,0.8,30.0,0,20,19,28,134,100,7.44,1
59.38693,2,1.497568,13,3,22,0,0,3,14,⋯,0.7,,0,26,7,40,138,103,,0
300.00345,1,1.314525,25,5,0,0,0,6,20,⋯,0.8,,0,20,19,249,147,118,,0


In [88]:
label_name <- "echo"

In [89]:
label <- full_data %>% pull(echo)
head(label)

In [95]:
fml <- feature_names %>%
    c("echo", .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [96]:
unweighted <- glm(as.formula(fml), data = full_data, family = binomial, na.action = na.exclude)
summary(unweighted)
exp(cbind(OR = coef(unweighted), confint(unweighted)))


Call:
glm(formula = as.formula(fml), family = binomial, data = full_data, 
    na.action = na.exclude)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4893  -0.7486  -0.4481   0.7799   2.6494  

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                -0.2906855  8.0750064  -0.036 0.971284    
echo                        0.0081572  0.1601857   0.051 0.959387    
age                         0.0046693  0.0011542   4.045 5.22e-05 ***
genderM                     0.2420212  0.1337974   1.809 0.070472 .  
weight                      0.3427170  0.0900850   3.804 0.000142 ***
saps                        0.0867994  0.0164025   5.292 1.21e-07 ***
sofa                        0.1460780  0.0243691   5.994 2.04e-09 ***
elix_score                  0.0258133  0.0108557   2.378 0.017414 *  
vent                       -0.0081165  0.1938314  -0.042 0.966599    
vaso                        0.0649022  0.1649038   0.394 0.693894  

Waiting for profiling to be done...


Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.7477508,8.637402e-08,5005508.0
echo,1.0081905,0.736417,1.38051
age,1.0046802,1.00241,1.006964
genderM,1.2738212,0.9802728,1.656754
weight,1.40877,1.182154,1.683838
saps,1.0906778,1.056373,1.126578
sofa,1.1572864,1.103693,1.214407
elix_score,1.0261493,1.004531,1.048231
vent,0.9919164,0.6798851,1.454631
vaso,1.0670546,0.7728758,1.475978


In [90]:
fml <- feature_names %>%
    paste(collapse = " + ") %>%
    sprintf("echo ~ %s", .)
fml

In [15]:
echo_ps_ate <- ps(as.formula(fml),
                  data = full_data,
                  interaction.depth = 2,
                  shrinkage = 0.01,
                  perm.test.iters = 0,
                  estimand = "ATE",
                  verbose = FALSE,
                  stop.method = c("es.mean", "es.max", "ks.mean", "ks.max"),
                  n.trees = 10000,
                  train.fraction = 0.8,
                  cv.folds = 3,
                  n.cores = 8)

In [16]:
pred <- echo_ps_ate$ps$es.mean.ATE
ROCR::performance(ROCR::prediction(pred, label), "auc")@y.values %>% first

In [None]:
ft_importance <- summary(echo_ps_ate$gbm.obj,
                         n.trees = echo_ps_ate$desc$es.mean.ATE$n.trees,
                         plot = FALSE)

In [73]:
full_data <- full_data %>%
    mutate(weight = get.weights(echo_ps_ate, stop.method = "es.mean"))

In [79]:
primary_ipw <- glm(mort_28_day ~ echo, data = full_data,
                   weights = full_data$weight, family = binomial)
summary(primary_ipw)
exp(cbind(OR = coef(primary_ipw), confint(primary_ipw)))

“non-integer #successes in a binomial glm!”


Call:
glm(formula = mort_28_day ~ echo, family = binomial, data = full_data, 
    weights = full_data$weight)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2870  -1.0706  -0.9457   1.7277   4.5025  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.89524    0.02948 -30.364  < 2e-16 ***
echo        -0.18201    0.04250  -4.283 1.85e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13129  on 6161  degrees of freedom
Residual deviance: 13111  on 6160  degrees of freedom
AIC: 12771

Number of Fisher Scoring iterations: 4


Waiting for profiling to be done...
“non-integer #successes in a binomial glm!”

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),0.4085111,0.3854811,0.4327143
echo,0.8335917,0.7669299,0.9059739


In [75]:
design_echo_ps_ate <- svydesign(ids = ~ icustay_id, weights = ~ weight, data = full_data)

In [76]:
fml <- feature_names %>%
    c(label_name, .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [77]:
logi <- svyglm(as.formula(fml),
               family = quasibinomial,
               design = design_echo_ps_ate)

In [81]:
summary(logi)


Call:
svyglm(formula = as.formula(fml), family = quasibinomial, design = design_echo_ps_ate)

Survey design:
svydesign(ids = ~icustay_id, weights = ~weight, data = full_data)

Coefficients: (1 not defined because of singularities)
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 2.9377066  8.4636647   0.347 0.728565    
echo                        0.0162561  0.1649511   0.099 0.921507    
age                         0.0052253  0.0012169   4.294 1.86e-05 ***
genderM                     0.2128878  0.1445597   1.473 0.141039    
weight                      0.2879562  0.0807618   3.565 0.000374 ***
saps                        0.0782513  0.0174829   4.476 8.15e-06 ***
sofa                        0.1703063  0.0252277   6.751 2.05e-11 ***
elix_score                  0.0192496  0.0118125   1.630 0.103385    
vent                        0.1410992  0.2075338   0.680 0.496676    
vaso                        0.0109019  0.1740912   0.063 0.950075   

In [97]:
exp(cbind(OR = coef(logi), confint(logi)))

Unnamed: 0,OR,2.5 %,97.5 %
(Intercept),18.8725139,1.179086e-06,302074600.0
echo,1.016389,0.7356196,1.404322
age,1.005239,1.002844,1.007639
genderM,1.2372458,0.9319799,1.6425
weight,1.3336989,1.138448,1.562437
saps,1.0813944,1.044967,1.119092
sofa,1.185668,1.128468,1.245767
elix_score,1.019436,0.9961051,1.043313
vent,1.1515389,0.7667001,1.729544
vaso,1.0109616,0.7187005,1.422071
