In [1]:
library(twang)
library(tidyverse)

Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.1     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand()  masks Matrix::expand()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
✖ ggplot2::layer() masks latticeExtra::layer()


In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds"))

In [4]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|vaso|icu_adm|sedative", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase", .)) %>%
    discard(grepl("bnp|troponin|kinase", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .)) %>%
    discard(grepl("abnormal", .)) %>%
    discard(grepl("ps_weight", .))
feature_names
length(feature_names)

In [19]:
levels(full_data$gender) <- c("f", "m")

In [20]:
label_name <- "echo"

In [21]:
design_echo_ps_ate <- svydesign(ids = ~ icustay_id, weights = ~ ps_weight, data = full_data)
# design_echo_ps_ate <- svydesign(ids = ~ icustay_id, data = full_data)

In [22]:
fml <- feature_names %>%
    c(label_name, .) %>%
    paste(collapse = " + ") %>%
    sprintf("mort_28_day ~ %s", .)
fml

In [23]:
logi <- svyglm(as.formula(fml),
               family = quasibinomial,
               design = design_echo_ps_ate)

In [24]:
summary(logi)


Call:
svyglm(formula = as.formula(fml), family = quasibinomial, design = design_echo_ps_ate)

Survey design:
svydesign(ids = ~icustay_id, weights = ~ps_weight, data = full_data)

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 13.0277014  9.2480994   1.409 0.159139    
echo1                       -0.3358510  0.1519932  -2.210 0.027284 *  
age                          0.0240723  0.0062279   3.865 0.000116 ***
genderm                      0.2022247  0.1568407   1.289 0.197475    
weight                      -0.0097747  0.0034972  -2.795 0.005257 ** 
saps                         0.0544894  0.0193176   2.821 0.004856 ** 
sofa                         0.1924874  0.0273976   7.026 3.25e-12 ***
elix_score                   0.0255830  0.0123188   2.077 0.037999 *  
vent1                        0.5547693  0.2896602   1.915 0.055656 .  
vaso1                       -0.0448572  0.1860674  -0.241 0.809526    
icu_adm_weekday1         

In [25]:
pvals <- logi %>%
summary %>%
pluck("coefficients") %>%
as.data.frame %>%
mutate(var = row.names(.)) %>%
select(5, 4) %>%
setNames(c("var", "p_value"))

nrow(pvals)
pvals

var,p_value
(Intercept),1.591390e-01
echo1,2.728389e-02
age,1.158407e-04
genderm,1.974751e-01
weight,5.257233e-03
saps,4.856182e-03
sofa,3.250781e-12
elix_score,3.799908e-02
vent1,5.565606e-02
vaso1,8.095264e-01


In [26]:
res <- exp(cbind(OR = coef(logi), confint(logi))) %>%
as.data.frame %>%
mutate(var = row.names(.))

nrow(res)
res

OR,2.5 %,97.5 %,var
4.548402e+05,0.006107484,3.387313e+13,(Intercept)
7.147296e-01,0.530597121,9.627613e-01,echo1
1.024364e+00,1.011936442,1.036945e+00,age
1.224123e+00,0.900164795,1.664670e+00,genderm
9.902730e-01,0.983508516,9.970839e-01,weight
1.056001e+00,1.016766673,1.096750e+00,saps
1.212261e+00,1.148881787,1.279137e+00,sofa
1.025913e+00,1.001439622,1.050985e+00,elix_score
1.741539e+00,0.987129314,3.072504e+00,vent1
9.561340e-01,0.663953880,1.376891e+00,vaso1


In [27]:
setdiff(pvals$var, res$var)

In [28]:
result <- pvals %>%
inner_join(res, "var") %>%
setNames(c("var", "p_val", "or", "lo", "up"))

head(result)

var,p_val,or,lo,up
(Intercept),0.1591390244,454840.2,0.006107484,33873130000000.0
echo1,0.0272838924,0.7147296,0.530597121,0.9627613
age,0.0001158407,1.024364,1.011936442,1.036945
genderm,0.1974750961,1.224123,0.900164795,1.66467
weight,0.0052572334,0.990273,0.983508516,0.9970839
saps,0.0048561825,1.056001,1.016766673,1.09675


In [29]:
result_fmt <- result %>% 
mutate(or = sprintf("%.2f (%.2f~%.2f)", or, lo, up)) %>%
mutate(p_val = round(p_val, 2)) %>%
select(var, or, p_val)

head(result_fmt)

var,or,p_val
(Intercept),454840.21 (0.01~33873132529181.97),0.16
echo1,0.71 (0.53~0.96),0.03
age,1.02 (1.01~1.04),0.0
genderm,1.22 (0.90~1.66),0.2
weight,0.99 (0.98~1.00),0.01
saps,1.06 (1.02~1.10),0.0


In [15]:
data.table::fwrite(result_fmt, file.path(data_dir, "logi.csv"))