In [1]:
library(RPostgreSQL)
library(tidyverse)

Loading required package: DBI
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.0     ✔ stringr 1.3.0
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
sql <-
"
with infection_0 as (
    select hadm_id,
        max(case when icd9_code in ('480', '481', '482', '483', '484', '485', '486', '487', '488') then 1 else 0 end) as infection
    from diagnoses_icd
    group by hadm_id
)

, infection_1 as (
    select *
    from merged_data
    left join infection_0 using (hadm_id)
)

, infection as (
    select icustay_id, echo as echo, coalesce(infection, 0) as infection
    from infection_1
)

select * from infection;
"

In [5]:
infection <- dbGetQuery(con, sql)
str(infection)
head(infection)

'data.frame':	6361 obs. of  3 variables:
 $ icustay_id: int  293543 280321 250198 245352 256601 240860 249040 209124 254710 288179 ...
 $ echo      : int  0 0 1 0 1 1 0 0 0 1 ...
 $ infection : int  0 0 0 0 0 0 0 1 0 0 ...


icustay_id,echo,infection
293543,0,0
280321,0,0
250198,1,0
245352,0,0
256601,1,0
240860,1,0


In [6]:
infection %>%
group_by(echo) %>%
summarise(n = n(), infection = sum(infection)) %>%
mutate(p = infection / n)

echo,n,infection,p
0,3099,717,0.231365
1,3262,929,0.2847946


In [7]:
chisq.test(infection$infection, infection$echo)


	Pearson's Chi-squared test with Yates' continuity correction

data:  infection$infection and infection$echo
X-squared = 23.375, df = 1, p-value = 1.333e-06


In [8]:
ps_matches_df <- data.table::fread(file.path(data_dir, "ps_matches_df.csv"), data.table = FALSE)

head(ps_matches_df)

icustay_id,group,match
262436,ctrl,1
244460,ctrl,2
298177,ctrl,3
290165,ctrl,4
278397,ctrl,5
212022,ctrl,6


In [9]:
ps_df <-
ps_matches_df %>%
left_join(infection, by = "icustay_id")

head(ps_df)

icustay_id,group,match,echo,infection
262436,ctrl,1,0,0
244460,ctrl,2,0,0
298177,ctrl,3,0,0
290165,ctrl,4,0,0
278397,ctrl,5,0,0
212022,ctrl,6,0,0


In [10]:
matched <-
ps_df %>%
filter(echo == 1) %>%
rename(trtd = infection) %>%
inner_join(ps_df %>%
           filter(echo == 0) %>%
           rename(ctrl = infection), by = "match")

head(matched)

icustay_id.x,group.x,match,echo.x,trtd,icustay_id.y,group.y,echo.y,ctrl
228416,trtd,1,1,0,262436,ctrl,0,0
274174,trtd,2,1,0,244460,ctrl,0,0
214954,trtd,3,1,0,298177,ctrl,0,0
264413,trtd,4,1,0,290165,ctrl,0,0
214106,trtd,5,1,0,278397,ctrl,0,0
266275,trtd,6,1,0,212022,ctrl,0,0


In [11]:
tab <- table(matched$trtd, matched$ctrl, dnn = c("Echo", "Non-Echo"))
tab

    Non-Echo
Echo   0   1
   0 879 314
   1 332 101

In [12]:
tab[2, 1] / tab[1, 2]
round(exp(c(log(tab[2, 1] / tab[1, 2]) - qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]),
            log(tab[2, 1] / tab[1, 2]) + qnorm(0.975) * sqrt(1 / tab[1, 2] + 1 / tab[2, 1]))), 7)

In [13]:
mcnemar.test(tab)


	McNemar's Chi-squared test with continuity correction

data:  tab
McNemar's chi-squared = 0.44737, df = 1, p-value = 0.5036


In [14]:
dbDisconnect(con)
dbUnloadDriver(drv)