In [1]:
library(RPostgreSQL)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.0     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ dplyr::select() masks MASS::select()


In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
ventfreedays <- dbGetQuery(con, "select icustay_id, ventfreeday28 from subgroup;")
head(ventfreedays)

icustay_id,ventfreeday28
200003,24.20833
200014,0.0
200030,28.0
200033,0.0
200036,28.0
200075,27.80903


In [5]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds"))
head(full_data)

icustay_id,hadm_id,subject_id,first_careunit,intime,outtime,angus,age,icu_order,echo_time,⋯,lab_pco2_flag,lab_creatinine_flag,lab_potassium_flag,lab_troponin_flag,lab_po2_flag,lab_lactate_flag,echo_int,mort_28_day_int,ps,ps_weight
228416,159895,5491,MICU,2151-06-29 01:18:27,2151-07-27 16:00:56,1,76.58225,1,2151-06-30,⋯,1,1,1,0,1,1,1,0,0.5525232,1.809879
278148,169700,49395,MICU,2197-01-09 00:45:42,2197-01-10 06:46:29,1,82.72606,1,,⋯,0,1,1,0,0,0,0,1,0.2804613,1.389779
274174,103722,14855,MICU,2181-09-08 23:43:13,2181-09-26 16:52:02,1,61.8439,1,2181-09-11,⋯,1,1,1,0,1,1,1,0,0.301202,3.320031
214954,170515,54642,MICU,2176-03-11 04:58:23,2176-03-15 18:00:48,1,63.86507,1,2176-03-11,⋯,1,1,1,1,1,1,1,0,0.7982833,1.252688
222457,183493,96815,MICU,2105-06-04 20:57:55,2105-06-06 20:01:51,1,62.84018,1,,⋯,0,1,1,0,0,0,0,0,0.2244447,1.289399
264413,134244,81436,MICU,2133-03-27 16:31:37,2133-04-04 16:15:28,1,62.60148,1,2133-03-31,⋯,0,1,1,0,0,1,1,0,0.3320492,3.011602


In [6]:
set.seed(4958)

In [7]:
ps_matches <- Match(Y = NULL, Tr = full_data$echo_int, X = full_data$ps, M = 1,
                    estimand = "ATT", caliper = 0.01,
                    exact = FALSE, replace = FALSE)

“replace==FALSE, but there are more (weighted) treated obs than control obs.  Some treated obs will not be matched.  You may want to estimate ATC instead.”

In [8]:
matches_df <-
full_data[ps_matches$index.treated, "icustay_id", drop = FALSE] %>%
mutate(match = full_data[ps_matches$index.control, "icustay_id"])

head(matches_df)

icustay_id,match
228416,262436
274174,244460
214954,298177
264413,290165
214106,278397
266275,212022


In [9]:
features <- c("age", "gender", "first_careunit", "weight",
              "saps", "sofa", "elix_score", "vent", "vaso", "sedative",
              "icd_chf", "icd_afib", "icd_renal", "icd_liver",
              "icd_copd", "icd_cad", "icd_stroke", "icd_malignancy",
              "icu_adm_weekday", "icu_adm_hour",
              "vs_map_first", "vs_heart_rate_first", "vs_temp_first", "vs_cvp_flag",
              "lab_wbc_first", "lab_hemoglobin_first", "lab_platelet_first",
              "lab_sodium_first", "lab_potassium_first", "lab_bicarbonate_first",
              "lab_chloride_first", "lab_bun_first", "lab_lactate_first",
              "lab_creatinine_first", "lab_ph_first", "lab_po2_first", "lab_pco2_first",
              "lab_bnp_flag", "lab_troponin_flag", "lab_creatinine_kinase_flag")

In [10]:
covariates <- full_data %>%
select(c("icustay_id", features))

names(covariates) <- names(covariates) %>%
str_replace_all("lab_|vs_|icd_|_first", " ") %>%
str_replace_all("_", " ") %>%
str_replace_all("\\s+$|^\\s+", "") %>%
str_replace_all("vent", "ventilation use") %>%
str_replace_all("vaso", "vasopressor use") %>%
str_replace_all("sedative", "sedative use") %>%
str_replace_all("elix score", "elixhauser score") %>%
str_replace_all("flag", "(tested)") %>%
str_replace_all("cvp \\(tested\\)", "cvp (measured)") %>%
str_replace_all("icustay id", "icustay_id")

head(covariates)
names(covariates)

icustay_id,age,gender,first careunit,weight,saps,sofa,elixhauser score,ventilation use,vasopressor use,⋯,chloride,bun,lactate,creatinine,ph,po2,pco2,bnp (tested),troponin (tested),creatinine kinase (tested)
228416,76.58225,M,MICU,,24,3,10,1,0,⋯,92,35,1.4,1.2,7.5,98.0,56.0,0,0,0
278148,82.72606,F,MICU,90.3,25,8,15,0,0,⋯,113,32,,1.6,,,,0,0,0
274174,61.8439,M,MICU,53.6,20,5,6,0,0,⋯,90,64,1.6,2.7,7.38,110.0,40.0,0,0,0
214954,63.86507,M,MICU,92.9,24,8,6,1,1,⋯,111,34,3.8,1.3,7.2,85.0,42.0,0,1,1
222457,62.84018,F,MICU,75.0,10,1,6,0,0,⋯,112,17,,0.9,,,,0,0,0
264413,62.60148,F,MICU,80.0,15,4,2,0,1,⋯,103,17,1.4,0.8,,,,0,0,0


In [11]:
result <- full_data %>%
left_join(ventfreedays, by = "icustay_id") %>%
left_join(matches_df, by = "icustay_id") %>%
select(icustay_id, echo_int, icu_los_day, mort_28_day_int, ventfreeday28, match, ps) %>%
setNames(c("icustay_id", "echo", "icu length of stay", "28 day mortality",
           "ventilation free days (28 days)", "match id", "propensity score")) %>%
left_join(covariates, by = "icustay_id")

head(result)

icustay_id,echo,icu length of stay,28 day mortality,ventilation free days (28 days),match id,propensity score,age,gender,first careunit,⋯,chloride,bun,lactate,creatinine,ph,po2,pco2,bnp (tested),troponin (tested),creatinine kinase (tested)
228416,1,28.612836,0,7.581632,262436.0,0.5525232,76.58225,M,MICU,⋯,92,35,1.4,1.2,7.5,98.0,56.0,0,0,0
278148,0,1.250544,1,0.0,,0.2804613,82.72606,F,MICU,⋯,113,32,,1.6,,,,0,0,0
274174,1,17.714456,0,28.0,244460.0,0.301202,61.8439,M,MICU,⋯,90,64,1.6,2.7,7.38,110.0,40.0,0,0,0
214954,1,4.543345,0,24.854167,298177.0,0.7982833,63.86507,M,MICU,⋯,111,34,3.8,1.3,7.2,85.0,42.0,0,1,1
222457,0,1.961065,0,28.0,,0.2244447,62.84018,F,MICU,⋯,112,17,,0.9,,,,0,0,0
264413,1,7.988785,0,24.25,290165.0,0.3320492,62.60148,F,MICU,⋯,103,17,1.4,0.8,,,,0,0,0


In [12]:
summary(ps_matches)


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  6361 
Original number of treated obs...............  3262 
Matched number of observations...............  1626 
Matched number of observations  (unweighted).  1626 

Number of obs dropped by 'exact' or 'caliper'  1636 



In [13]:
tab <- table(full_data$mort_28_day[ps_matches$index.treated],
             full_data$mort_28_day[ps_matches$index.control],
             dnn = c("Echo", "Control"))
tab

    Control
Echo   0   1
   0 869 343
   1 267 147

In [14]:
tab[2, 1] / tab[1, 2]
paste("95% Confint",
      round(exp(c(log(tab[2, 1] / tab[1, 2]) - qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]),
                  log(tab[2, 1] / tab[1, 2]) + qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]))), 7))

In [15]:
mcnemar.test(tab)


	McNemar's Chi-squared test with continuity correction

data:  tab
McNemar's chi-squared = 9.2213, df = 1, p-value = 0.002392


In [16]:
data.table::fwrite(result, file.path(data_dir, "ps_details.csv"))