In [1]:
library(RPostgreSQL)
library(Matching)
library(tidyverse)

Loading required package: DBI
Loading required package: MASS
## 
##  Matching (Version 4.9-2, Build Date: 2015-12-25)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.0     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ dplyr::select() masks MASS::select()


In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
ventfreedays <- dbGetQuery(con, "select icustay_id, ventfreeday28 from subgroup;")
head(ventfreedays)

icustay_id,ventfreeday28
200003,24.20833
200014,0.0
200030,28.0
200033,0.0
200036,28.0
200075,27.80903


In [5]:
full_data <- readRDS(file.path(data_dir, "full_data_ps.rds"))

In [6]:
names(full_data)

In [7]:
set.seed(4958)

In [8]:
ps_matches <- Match(Y = NULL, Tr = full_data$echo_int, X = full_data$ps, M = 1,
                    estimand = "ATT", caliper = 0.01,
                    exact = FALSE, replace = FALSE)

“replace==FALSE, but there are more (weighted) treated obs than control obs.  Some treated obs will not be matched.  You may want to estimate ATC instead.”

In [9]:
matches_df <-
full_data[ps_matches$index.treated, "icustay_id", drop = FALSE] %>%
mutate(match = full_data[ps_matches$index.control, "icustay_id"])

head(matches_df)

icustay_id,match
228416,262436
274174,244460
214954,298177
264413,290165
214106,278397
266275,212022


In [10]:
result <- full_data %>%
left_join(ventfreedays, by = "icustay_id") %>%
left_join(matches_df, by = "icustay_id") %>%
select(icustay_id, echo_int, icu_los_day, mort_28_day_int, ventfreeday28, match, ps) %>%
setNames(c("icustay_id", "Echo", "ICU length of stay", "28 day mortality",
           "Ventilation free days (28 days)", "Match id", "Propensity score"))

head(result)

icustay_id,Echo,ICU length of stay,28 day mortality,Ventilation free days (28 days),Match id,Propensity score
228416,1,28.612836,0,7.581632,262436.0,0.5525232
278148,0,1.250544,1,0.0,,0.2804613
274174,1,17.714456,0,28.0,244460.0,0.301202
214954,1,4.543345,0,24.854167,298177.0,0.7982833
222457,0,1.961065,0,28.0,,0.2244447
264413,1,7.988785,0,24.25,290165.0,0.3320492


In [11]:
summary(ps_matches)


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  6361 
Original number of treated obs...............  3262 
Matched number of observations...............  1626 
Matched number of observations  (unweighted).  1626 

Number of obs dropped by 'exact' or 'caliper'  1636 



In [12]:
tab <- table(full_data$mort_28_day[ps_matches$index.treated],
             full_data$mort_28_day[ps_matches$index.control],
             dnn = c("Echo", "Control"))
tab

    Control
Echo   0   1
   0 869 343
   1 267 147

In [13]:
tab[2, 1] / tab[1, 2]
paste("95% Confint",
      round(exp(c(log(tab[2, 1] / tab[1, 2]) - qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]),
                  log(tab[2, 1] / tab[1, 2]) + qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]))), 7))

In [14]:
mcnemar.test(tab)


	McNemar's Chi-squared test with continuity correction

data:  tab
McNemar's chi-squared = 9.2213, df = 1, p-value = 0.002392


In [15]:
data.table::fwrite(result, file.path(data_dir, "table-5-columns.csv"))