In [1]:
library(RPostgreSQL)
library(tidyverse)

Loading required package: DBI
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.0     ✔ stringr 1.3.0
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
data_dir <- file.path("..", "data")
sql_dir <- file.path("..", "sql")

In [3]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

<PostgreSQLResult>

In [4]:
sql <-
"
with infection as (
    select hadm_id,
        max(case when icd9_code in ('480', '481', '482', '483', '484', '485', '486', '487', '488') then 1 else 0 end) as infection
    from diagnoses_icd
    group by hadm_id
)

, merged_data_infection as (
    select *
    from merged_data
    left join infection using (hadm_id)
)

, infection_summary_1 as (
    select icustay_id,
        coalesce(infection, 0) as infection,
        echo
    from merged_data_infection
)

, infection_summary as (
    select infection,
        count(*) as total,
        sum(echo) as echo,
        sum(echo) * 1.0 / count(*) as proportion
    from infection_summary_1
    group by infection
)

select * from infection_summary;
"

In [5]:
infection <- dbGetQuery(con, sql)
infection

infection,total,echo,proportion
0,4715,2333,0.4948038
1,1646,929,0.5643985


In [6]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [7]:
data.table::fwrite(infection, file = file.path(data_dir, "infection.csv"))