In [1]:
library(twang)
library(weights)
library(tidyverse)

Loading required package: gbm
Loading required package: survival
Loading required package: lattice
Loading required package: splines
Loading required package: parallel
Loaded gbm 2.1.3
Loading required package: survey
Loading required package: grid
Loading required package: Matrix

Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: xtable
Loading required package: latticeExtra
Loading required package: RColorBrewer
Loading required package: Hmisc
Loading required package: Formula
Loading required package: ggplot2

Attaching package: ‘ggplot2’

The following object is masked from ‘package:latticeExtra’:

    layer


Attaching package: ‘Hmisc’

The following objects are masked from ‘package:xtable’:

    label, label<-

The following object is masked from ‘package:survey’:

    deff

The following objects are masked from ‘package:base’:

    format.pval, units

Loading required package: gdata
gdata: read.xls suppo

In [2]:
data_dir <- file.path("..", "data")

In [3]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds")) %>%
    mutate(sunday = factor(as.integer(icu_adm_weekday == 0), levels = c(0, 1)),
           monday = factor(as.integer(icu_adm_weekday == 1), levels = c(0, 1)),
           tuesday = factor(as.integer(icu_adm_weekday == 2), levels = c(0, 1)),
           wednesday = factor(as.integer(icu_adm_weekday == 3), levels = c(0, 1)),
           thursday = factor(as.integer(icu_adm_weekday == 4), levels = c(0, 1)),
           friday = factor(as.integer(icu_adm_weekday == 5), levels = c(0, 1)),
           saturday = factor(as.integer(icu_adm_weekday == 6), levels = c(0, 1))) %>%
    mutate(female = factor(as.integer(as.integer(as.character(gender)) == 0), levels = c(0, 1)))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,ps,ps_weight,sunday,monday,tuesday,wednesday,thursday,friday,saturday,female
228416,159895,5491,0,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,0.5444486,1.836721,0,0,1,0,0,0,0,0
278148,169700,49395,0,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,0.2569337,1.345775,0,1,0,0,0,0,0,1
274174,103722,14855,0,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,0.3264142,3.063592,0,0,0,0,0,0,1,0
214954,170515,54642,0,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,0.8063102,1.240217,0,1,0,0,0,0,0,0
222457,183493,96815,0,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,0.223952,1.28858,0,0,0,0,1,0,0,1
264413,134244,81436,0,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,0.3457691,2.892104,0,0,0,0,0,1,0,1


In [4]:
full_data %>% pull(sunday) %>% head(10)
full_data %>% pull(icu_adm_weekday) %>% head(10)

full_data %>% pull(female) %>% head(10)
full_data %>% pull(gender) %>% head(10)

In [5]:
library(tableone)

In [6]:
tab_features <- CreateTableOne(vars = c("vs_map_first", "vs_heart_rate_first", "vs_temp_first", "vs_cvp_first",
                                        "lab_wbc_first", "lab_hemoglobin_first", "lab_platelet_first",
                                        "lab_sodium_first", "lab_potassium_first", "lab_bicarbonate_first",
                                        "lab_chloride_first", "lab_bun_first", "lab_lactate_first",
                                        "lab_creatinine_first", "lab_ph_first", "lab_po2_first", "lab_pco2_first",
                                        "lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag"),
                               strata = "echo",
                               factorVars = c("lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag"),
                               data = full_data)
tab_features$ContTable

                                   Stratified by echo
                                    0               1               p      test
  n                                 3099            3262                       
  vs_map_first (mean (sd))           79.91 (19.44)   80.03 (20.48)   0.810     
  vs_heart_rate_first (mean (sd))    93.01 (19.81)   95.07 (21.79)  <0.001     
  vs_temp_first (mean (sd))          36.75 (1.05)    36.85 (1.90)    0.006     
  vs_cvp_first (mean (sd))           11.88 (17.09)   13.74 (20.45)   0.024     
  lab_wbc_first (mean (sd))          13.48 (14.03)   13.76 (12.41)   0.401     
  lab_hemoglobin_first (mean (sd))   10.55 (1.98)    10.60 (2.04)    0.407     
  lab_platelet_first (mean (sd))    223.96 (134.79) 211.55 (128.46) <0.001     
  lab_sodium_first (mean (sd))      139.09 (6.45)   138.56 (5.89)    0.001     
  lab_potassium_first (mean (sd))     4.10 (0.79)     4.15 (0.84)    0.017     
  lab_bicarbonate_first (mean (sd))  22.28 (5.40)    22.35 (5.71) 

In [7]:
tab_basics <- CreateTableOne(vars = c("age", "gender", "weight", "elix_score",
                                      "saps", "sofa", "vent", "vaso", "sedative",
                                      "icd_chf", "icd_afib", "icd_renal", "icd_liver",
                                      "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
                                      "icu_adm_weekday"),
                             strata = "echo",
                             factorVars = c("gender", "vent", "vaso", "sedative",
                                            "icd_chf", "icd_afib", "icd_renal", "icd_liver",
                                            "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
                                            "icu_adm_weekday"),
                             data = full_data) %>%
    print

                        Stratified by echo
                         0             1             p      test
  n                       3099          3262                    
  age (mean (sd))        66.69 (17.21) 65.74 (16.55)  0.025     
  gender = 1 (%)          1524 (49.2)   1704 (52.2)   0.016     
  weight (mean (sd))     78.56 (23.58) 83.17 (26.87) <0.001     
  elix_score (mean (sd))  8.51 (7.45)  10.07 (7.67)  <0.001     
  saps (mean (sd))       19.63 (5.79)  20.76 (5.44)  <0.001     
  sofa (mean (sd))        5.31 (3.62)   6.33 (3.79)  <0.001     
  vent = 1 (%)            1459 (47.1)   1901 (58.3)  <0.001     
  vaso = 1 (%)             839 (27.1)   1228 (37.6)  <0.001     
  sedative = 1 (%)        1247 (40.2)   1623 (49.8)  <0.001     
  icd_chf = 1 (%)          563 (18.2)   1304 (40.0)  <0.001     
  icd_afib = 1 (%)         622 (20.1)   1056 (32.4)  <0.001     
  icd_renal = 1 (%)        436 (14.1)    536 (16.4)   0.010     
  icd_liver = 1 (%)        314 (10.1)    365 (1

In [8]:
table_one_raw <- c("age", "female", "weight", "elix_score",
  "saps", "sofa", "vent", "vaso", "sedative",
  "icd_chf", "icd_afib", "icd_renal", "icd_liver",
  "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
  "sunday", "monday", "tuesday", "wednesday",
  "thursday", "friday", "saturday",
  "vs_map_first", "vs_heart_rate_first", "vs_temp_first", "vs_cvp_first",
  "lab_wbc_first", "lab_hemoglobin_first", "lab_platelet_first",
  "lab_sodium_first", "lab_potassium_first", "lab_bicarbonate_first",
  "lab_chloride_first", "lab_bun_first", "lab_lactate_first",
  "lab_creatinine_first", "lab_ph_first", "lab_po2_first", "lab_pco2_first",
  "lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag") %>%
map(function(var) {
    x <- full_data[[var]]
    sym_var <- rlang::sym(var)
    if (is.numeric(x)) {
        res <- full_data %>%
            group_by(echo) %>%
            summarise(main = mean(!!sym_var, na.rm = TRUE),
                      sub = sd(!!sym_var, na.rm = TRUE),
                      wtd_main = wtd.mean(!!sym_var, ps_weight),
                      wtd_sub = sqrt(wtd.var(!!sym_var, ps_weight))) %>%
            cbind(
                full_data %>%
                { split(.[[var]], .$echo) } %>%
                setNames(c("x", "y")) %>%
                do.call(t.test, .) %>%
                .$p.value %>%
                data.frame(p_val = .)
            ) %>%
            cbind(
                full_data %>%
                { split(select(., !!sym_var, ps_weight), .$echo) } %>%
                setNames(c("fst", "sec")) %>%
                {
                    list(x = { pull(pluck(., "fst"), !!sym_var) },
                         y = { pull(pluck(., "sec"), !!sym_var) },
                         weight = { pull(pluck(., "fst"), ps_weight) },
                         weighty = { pull(pluck(., "sec"), ps_weight) },
                         samedata = FALSE)
                }
                %>%
                do.call(wtd.t.test, .) %>%
                .$coefficients %>%
                .["p.value"] %>%
                unname %>%
                data.frame(wtd_p_val = .)
            )
    }
    
    if (is.factor(x)) {
        if (length(levels(x)) == 2) {
            res <- full_data %>%
                mutate(!!sym_var := as.integer(as.character(!!sym_var))) %>%
                group_by(echo) %>%
                summarise(main = mean(!!sym_var, na.rm = TRUE) * 100,
                          sub = NA,
                          wtd_main = wtd.mean(!!sym_var, ps_weight) * 100,
                          wtd_sub = NA) %>%
                cbind(
                    full_data %>%
                    {
                        list(x = pull(., !!sym_var),
                             y = pull(., echo))
                    } %>%
                    do.call(chisq.test, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(p_val = .)
                ) %>%
                cbind(
                    full_data %>%
                    {
                        list(var1 = pull(., !!sym_var),
                             var2 = pull(., echo),
                             weight = pull(., ps_weight))
                    } %>%
                    do.call(wtd.chi.sq, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(wtd_p_val = .)
                )
        } else return(NULL)
    }
    
    res %>%
    gather("key", "value", main, sub, wtd_main, wtd_sub) %>%
    unite("key", key, echo) %>%
    spread(key, value) %>%
    mutate(var = var) %>%
    return
}) %>%
discard(is.null) %>%
data.table::rbindlist() %>%
as.data.frame

head(table_one_raw)

p_val,wtd_p_val,main_0,main_1,sub_0,sub_1,wtd_main_0,wtd_main_1,wtd_sub_0,wtd_sub_1,var
0.02555251,0.04837335,66.68924,65.74283,17.207039,16.554501,66.819125,65.992579,16.751317,16.619096,age
0.01571359,0.4336029,50.82285,47.762109,,,49.269948,48.288405,,,female
4.654788e-12,0.13519476,78.56295,83.168338,23.579777,26.872532,80.382578,81.361076,24.347537,25.392902,weight
2.00972e-16,0.07229033,8.50597,10.06867,7.451124,7.666592,9.142537,9.484882,7.554606,7.630381,elix_score
1.179391e-15,0.10326054,19.63246,20.764255,5.788347,5.438449,20.020541,20.24675,5.638692,5.42108,saps
3.538666e-28,0.0245159,5.30526,6.332618,3.618029,3.790436,5.678279,5.88631,3.671404,3.702144,sofa


In [9]:
table_one_fmt <- table_one_raw %>%
mutate(raw_0 = ifelse(is.na(sub_0), sprintf("%.2f%%", main_0),
                      sprintf("%.2f (%.2f)", main_0, sub_0)),
       raw_1 = ifelse(is.na(sub_1), sprintf("%.2f%%", main_1),
                      sprintf("%.2f (%.2f)", main_1, sub_1)),
       wtd_raw_0 = ifelse(is.na(wtd_sub_0), sprintf("%.2f%%", wtd_main_0),
                          sprintf("%.2f (%.2f)", wtd_main_0, wtd_sub_0)),
       wtd_raw_1 = ifelse(is.na(wtd_sub_1), sprintf("%.2f%%", wtd_main_1),
                          sprintf("%.2f (%.2f)", wtd_main_1, wtd_sub_1)),
       p_val = ifelse(p_val < 0.001, "<0.001", as.character(round(p_val, 3))),
       wtd_p_val = ifelse(wtd_p_val < 0.001, "<0.001", as.character(round(wtd_p_val, 3)))) %>%
select(var, raw_1, raw_0, p_val, wtd_raw_1, wtd_raw_0, wtd_p_val) %>%
mutate(var = gsub("vs_", "", var)) %>%
mutate(var = gsub("lab_", "", var)) %>%
mutate(var = gsub("icd_", "", var)) %>%
mutate(var = gsub("_flag", "", var)) %>%
mutate(var = gsub("_first", "", var)) %>%
mutate(var = gsub("_", " ", var)) %>%
mutate(var = tools::toTitleCase(var)) %>%
setNames(c("Covariate", "Echo", "Non-Echo", "p value",
           "Echo (weighted cohort)", "Non-Echo (weighted cohort)",
           "p value (weighted cohort)"))

table_one_fmt

Covariate,Echo,Non-Echo,p value,Echo (weighted cohort),Non-Echo (weighted cohort),p value (weighted cohort)
Age,65.74 (16.55),66.69 (17.21),0.026,65.99 (16.62),66.82 (16.75),0.048
Female,47.76%,50.82%,0.016,48.29%,49.27%,0.434
Weight,83.17 (26.87),78.56 (23.58),<0.001,81.36 (25.39),80.38 (24.35),0.135
Elix Score,10.07 (7.67),8.51 (7.45),<0.001,9.48 (7.63),9.14 (7.55),0.072
Saps,20.76 (5.44),19.63 (5.79),<0.001,20.25 (5.42),20.02 (5.64),0.103
Sofa,6.33 (3.79),5.31 (3.62),<0.001,5.89 (3.70),5.68 (3.67),0.025
Vent,58.28%,47.08%,<0.001,53.63%,50.61%,0.016
Vaso,37.65%,27.07%,<0.001,33.64%,30.55%,0.008
Sedative,49.75%,40.24%,<0.001,45.83%,42.92%,0.019
Chf,39.98%,18.17%,<0.001,31.10%,26.09%,<0.001


In [10]:
data.table::fwrite(table_one_fmt, file.path(data_dir, "tableone.csv"))
data.table::fwrite(table_one_raw, file.path(data_dir, "tableone_raw.csv"))