In [10]:
library(twang)
library(weights)
library(tidyverse)

In [11]:
data_dir <- file.path("..", "data")

In [12]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds")) %>%
    mutate(sunday = factor(as.integer(icu_adm_weekday == 0), levels = c(0, 1)),
           monday = factor(as.integer(icu_adm_weekday == 1), levels = c(0, 1)),
           tuesday = factor(as.integer(icu_adm_weekday == 2), levels = c(0, 1)),
           wednesday = factor(as.integer(icu_adm_weekday == 3), levels = c(0, 1)),
           thursday = factor(as.integer(icu_adm_weekday == 4), levels = c(0, 1)),
           friday = factor(as.integer(icu_adm_weekday == 5), levels = c(0, 1)),
           saturday = factor(as.integer(icu_adm_weekday == 6), levels = c(0, 1))) %>%
    mutate(female = factor(as.integer(as.integer(as.character(gender)) == 0), levels = c(0, 1)))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,ps,ps_weight,sunday,monday,tuesday,wednesday,thursday,friday,saturday,female
201220,125078,66690,0,2106-04-27 08:47:50,2106-05-01 18:25:46,1,62.67646,1,,⋯,0.4544669,1.833069,0,0,1,0,0,0,0,0
215842,151232,11663,0,2188-02-14 08:48:15,2188-02-16 02:02:48,1,86.76186,1,,⋯,0.1568838,1.186076,0,0,0,0,1,0,0,1
234312,164444,86645,1,2165-06-22 08:47:16,2165-07-07 20:55:20,1,56.08904,1,,⋯,0.3002571,1.429096,0,0,0,0,0,0,1,0
289157,146726,10304,0,2156-06-23 21:26:00,2156-06-30 16:26:00,1,45.91093,1,2156-06-25 07:00:00,⋯,0.6991412,1.430326,0,0,0,1,0,0,0,0
211964,160170,94534,0,2160-03-05 21:23:19,2160-03-07 05:48:41,1,59.38693,1,,⋯,0.3432285,1.5226,0,0,0,1,0,0,0,0
230173,112553,31544,0,2140-01-31 03:39:25,2140-02-03 01:41:39,1,91.5,1,,⋯,0.2464983,1.327137,0,0,0,0,0,0,1,1


In [15]:
full_data %>% pull(sunday) %>% head(10)
full_data %>% pull(icu_adm_weekday) %>% head(10)

full_data %>% pull(female) %>% head(10)
full_data %>% pull(gender) %>% head(10)

In [4]:
library(tableone)

In [5]:
tab_features <- CreateTableOne(vars = c("vs_map_first", "vs_heart_rate_first", "vs_temp_first", "vs_cvp_first",
                                        "lab_wbc_first", "lab_hemoglobin_first", "lab_platelet_first",
                                        "lab_sodium_first", "lab_potassium_first", "lab_bicarbonate_first",
                                        "lab_chloride_first", "lab_bun_first", "lab_lactate_first",
                                        "lab_creatinine_first", "lab_ph_first", "lab_po2_first", "lab_pco2_first",
                                        "lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag"),
                               strata = "echo",
                               factorVars = c("lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag"),
                               data = full_data)
tab_features$ContTable

                                   Stratified by echo
                                    0               1               p      test
  n                                 3099            3063                       
  vs_map_first (mean (sd))           79.79 (19.25)   80.07 (19.95)   0.569     
  vs_heart_rate_first (mean (sd))    93.01 (19.81)   94.96 (21.76)  <0.001     
  vs_temp_first (mean (sd))          36.75 (1.05)    36.83 (1.58)    0.013     
  vs_cvp_first (mean (sd))           11.88 (17.09)   13.57 (19.12)   0.034     
  lab_wbc_first (mean (sd))          13.48 (14.03)   13.91 (12.58)   0.215     
  lab_hemoglobin_first (mean (sd))   10.55 (1.98)    10.65 (2.04)    0.062     
  lab_platelet_first (mean (sd))    223.96 (134.79) 213.44 (127.53)  0.002     
  lab_sodium_first (mean (sd))      139.09 (6.45)   138.62 (5.92)    0.003     
  lab_potassium_first (mean (sd))     4.10 (0.79)     4.15 (0.85)    0.024     
  lab_bicarbonate_first (mean (sd))  22.28 (5.40)    22.26 (5.71) 

In [6]:
tab_basics <- CreateTableOne(vars = c("age", "gender", "weight", "elix_score",
                                      "saps", "sofa", "vent", "vaso", "sedative",
                                      "icd_chf", "icd_afib", "icd_renal", "icd_liver",
                                      "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
                                      "icu_adm_weekday"),
                             strata = "echo",
                             factorVars = c("gender", "vent", "vaso", "sedative",
                                            "icd_chf", "icd_afib", "icd_renal", "icd_liver",
                                            "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
                                            "icu_adm_weekday"),
                             data = full_data) %>%
    print

                        Stratified by echo
                         0             1             p      test
  n                       3099          3063                    
  age (mean (sd))        66.69 (17.21) 65.82 (16.62)  0.045     
  gender = 1 (%)          1524 (49.2)   1603 (52.3)   0.014     
  weight (mean (sd))     78.56 (23.58) 82.98 (26.70) <0.001     
  elix_score (mean (sd))  8.51 (7.45)  10.05 (7.68)  <0.001     
  saps (mean (sd))       19.63 (5.79)  20.78 (5.45)  <0.001     
  sofa (mean (sd))        5.30 (3.62)   6.32 (3.80)  <0.001     
  vent = 1 (%)            1459 (47.1)   1808 (59.0)  <0.001     
  vaso = 1 (%)             839 (27.1)   1176 (38.4)  <0.001     
  sedative = 1 (%)        1247 (40.2)   1540 (50.3)  <0.001     
  icd_chf = 1 (%)          563 (18.2)   1198 (39.1)  <0.001     
  icd_afib = 1 (%)         622 (20.1)    988 (32.3)  <0.001     
  icd_renal = 1 (%)        436 (14.1)    503 (16.4)   0.011     
  icd_liver = 1 (%)        314 (10.1)    340 (1

In [7]:
table_one_raw <- c("age", "female", "weight", "elix_score",
  "saps", "sofa", "vent", "vaso", "sedative",
  "icd_chf", "icd_afib", "icd_renal", "icd_liver",
  "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
  "sunday", "monday", "tuesday", "wednesday",
  "thursday", "friday", "saturday",
  "vs_map_first", "vs_heart_rate_first", "vs_temp_first", "vs_cvp_first",
  "lab_wbc_first", "lab_hemoglobin_first", "lab_platelet_first",
  "lab_sodium_first", "lab_potassium_first", "lab_bicarbonate_first",
  "lab_chloride_first", "lab_bun_first", "lab_lactate_first",
  "lab_creatinine_first", "lab_ph_first", "lab_po2_first", "lab_pco2_first",
  "lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag") %>%
map(function(var) {
    x <- full_data[[var]]
    sym_var <- rlang::sym(var)
    if (is.numeric(x)) {
        res <- full_data %>%
            group_by(echo) %>%
            summarise(main = mean(!!sym_var, na.rm = TRUE),
                      sub = sd(!!sym_var, na.rm = TRUE),
                      wtd_main = wtd.mean(!!sym_var, ps_weight),
                      wtd_sub = sqrt(wtd.var(!!sym_var, ps_weight))) %>%
            cbind(
                full_data %>%
                { split(.[[var]], .$echo) } %>%
                setNames(c("x", "y")) %>%
                do.call(t.test, .) %>%
                .$p.value %>%
                data.frame(p_val = .)
            ) %>%
            cbind(
                full_data %>%
                { split(select(., !!sym_var, ps_weight), .$echo) } %>%
                setNames(c("fst", "sec")) %>%
                {
                    list(x = { pull(pluck(., "fst"), !!sym_var) },
                         y = { pull(pluck(., "sec"), !!sym_var) },
                         weight = { pull(pluck(., "fst"), ps_weight) },
                         weighty = { pull(pluck(., "sec"), ps_weight) },
                         samedata = FALSE)
                }
                %>%
                do.call(wtd.t.test, .) %>%
                .$coefficients %>%
                .["p.value"] %>%
                unname %>%
                data.frame(wtd_p_val = .)
            )
    }
    
    if (is.factor(x)) {
        if (length(levels(x)) == 2) {
            res <- full_data %>%
                mutate(!!sym_var := as.integer(as.character(!!sym_var))) %>%
                group_by(echo) %>%
                summarise(main = mean(!!sym_var, na.rm = TRUE) * 100,
                          sub = NA,
                          wtd_main = wtd.mean(!!sym_var, ps_weight) * 100,
                          wtd_sub = NA) %>%
                cbind(
                    full_data %>%
                    {
                        list(x = pull(., !!sym_var),
                             y = pull(., echo))
                    } %>%
                    do.call(chisq.test, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(p_val = .)
                ) %>%
                cbind(
                    full_data %>%
                    {
                        list(var1 = pull(., !!sym_var),
                             var2 = pull(., echo),
                             weight = pull(., ps_weight))
                    } %>%
                    do.call(wtd.chi.sq, .) %>%
                    pluck("p.value") %>%
                    unname %>%
                    data.frame(wtd_p_val = .)
                )
        } else return(NULL)
    }
    
    res %>%
    gather("key", "value", main, sub, wtd_main, wtd_sub) %>%
    unite("key", key, echo) %>%
    spread(key, value) %>%
    mutate(var = var) %>%
    return
}) %>%
discard(is.null) %>%
data.table::rbindlist() %>%
as.data.frame

head(table_one_raw)

p_val,wtd_p_val,main_0,main_1,sub_0,sub_1,wtd_main_0,wtd_main_1,wtd_sub_0,wtd_sub_1,var
0.04472893,0.06372199,66.689241,65.824051,17.207039,16.620088,66.847503,66.057657,16.794715,16.639186,age
0.0141642,0.34952619,50.822846,47.665687,,,49.39086,48.199442,,,female
6.415559e-11,0.12782063,78.562949,82.979049,23.579777,26.702495,80.238931,81.250426,24.310882,25.25576,weight
1.705518e-15,0.07885866,8.50597,10.045054,7.451124,7.681609,9.098576,9.438641,7.547186,7.639513,elix_score
1.378591e-15,0.12030359,19.632462,20.778975,5.788347,5.446293,20.013436,20.232637,5.650021,5.423473,saps
7.217556e-27,0.03089029,5.304937,6.323865,3.6181,3.796365,5.659572,5.861969,3.662696,3.69584,sofa


In [8]:
table_one_fmt <- table_one_raw %>%
mutate(raw_0 = ifelse(is.na(sub_0), sprintf("%.2f%%", main_0),
                      sprintf("%.2f (%.2f)", main_0, sub_0)),
       raw_1 = ifelse(is.na(sub_1), sprintf("%.2f%%", main_1),
                      sprintf("%.2f (%.2f)", main_1, sub_1)),
       wtd_raw_0 = ifelse(is.na(wtd_sub_0), sprintf("%.2f%%", wtd_main_0),
                          sprintf("%.2f (%.2f)", wtd_main_0, wtd_sub_0)),
       wtd_raw_1 = ifelse(is.na(wtd_sub_1), sprintf("%.2f%%", wtd_main_1),
                          sprintf("%.2f (%.2f)", wtd_main_1, wtd_sub_1)),
       p_val = ifelse(p_val < 0.001, "<0.001", as.character(round(p_val, 3))),
       wtd_p_val = ifelse(wtd_p_val < 0.001, "<0.001", as.character(round(wtd_p_val, 3)))) %>%
select(var, raw_1, raw_0, p_val, wtd_raw_1, wtd_raw_0, wtd_p_val) %>%
mutate(var = gsub("vs_", "", var)) %>%
mutate(var = gsub("lab_", "", var)) %>%
mutate(var = gsub("icd_", "", var)) %>%
mutate(var = gsub("_flag", "", var)) %>%
mutate(var = gsub("_first", "", var)) %>%
mutate(var = gsub("_", " ", var)) %>%
mutate(var = tools::toTitleCase(var)) %>%
setNames(c("Covariate", "Echo", "Non-Echo", "p value",
           "Echo (weighted cohort)", "Non-Echo (weighted cohort)",
           "p value (weighted cohort)"))

table_one_fmt

Covariate,Echo,Non-Echo,p value,Echo (weighted cohort),Non-Echo (weighted cohort),p value (weighted cohort)
Age,65.82 (16.62),66.69 (17.21),0.045,66.06 (16.64),66.85 (16.79),0.064
Female,47.67%,50.82%,0.014,48.20%,49.39%,0.35
Weight,82.98 (26.70),78.56 (23.58),<0.001,81.25 (25.26),80.24 (24.31),0.128
Elix Score,10.05 (7.68),8.51 (7.45),<0.001,9.44 (7.64),9.10 (7.55),0.079
Saps,20.78 (5.45),19.63 (5.79),<0.001,20.23 (5.42),20.01 (5.65),0.12
Sofa,6.32 (3.80),5.30 (3.62),<0.001,5.86 (3.70),5.66 (3.66),0.031
Vent,59.03%,47.08%,<0.001,54.11%,50.67%,0.007
Vaso,38.39%,27.07%,<0.001,33.91%,30.84%,0.01
Sedative,50.28%,40.24%,<0.001,46.18%,43.09%,0.015
Chf,39.11%,18.17%,<0.001,30.38%,25.43%,<0.001


In [9]:
data.table::fwrite(table_one_fmt, file.path(data_dir, "tableone.csv"))
data.table::fwrite(table_one_raw, file.path(data_dir, "tableone_raw.csv"))