# Import Library

In [2]:
import pandas as pd
import psycopg2

# MIMIC III DB connection

In [16]:
# information used to create a database connection
sqluser = 'postgres'
dbname = 'mimic'
hostname = 'localhost'
port_number = 5434

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, port=port_number, password='mysecretpassword')

# Read Data

In [17]:
# extract static vars into a separate dataframe
df_static = pd.read_sql_query('select * from mp_static_data', con)
vars_static = [u'is_male', u'emergency_admission', u'age',
               # services
               u'service_any_noncard_surg',
               u'service_any_card_surg',
               u'service_cmed',
               u'service_traum',
               u'service_nmed',
               # ethnicities
               u'race_black',u'race_hispanic',u'race_asian',u'race_other',
               # phatness
               u'height', u'weight', u'bmi']
# extract static vars into a separate dataframe
df_static = pd.read_sql_query('select * from mp_static_data', con)

In [18]:
df_static

Unnamed: 0,subject_id,hadm_id,icustay_id,is_male,curr_service,service_med,service_cmed,service_omed,service_nmed,service_nsurg,...,service_any_card_surg,age,race_black,race_hispanic,race_asian,race_other,emergency_admission,height,weight,bmi
0,3,145834,211552,1,VSURG,0,0,0,0,0,...,1,76.5246,0,0,0,0,1,179.07,96.8,30.187677
1,4,185777,294638,0,MED,1,0,0,0,0,...,0,47.8450,0,0,0,0,1,,53.6,
2,6,107064,228232,0,SURG,0,0,0,0,0,...,0,65.9398,0,0,0,0,0,,,
3,9,150750,220597,1,NMED,0,0,0,1,0,...,0,41.7887,0,0,0,1,1,182.88,104.0,31.095741
4,11,194540,229441,0,NSURG,0,0,0,0,1,...,0,50.1476,0,0,0,0,1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52045,99985,176670,279638,1,MED,1,0,0,0,0,...,0,53.8136,0,0,0,0,1,,,
52046,99991,151118,226241,1,SURG,0,0,0,0,0,...,0,47.7273,0,0,0,0,0,,100.5,
52047,99992,197084,242052,0,MED,1,0,0,0,0,...,0,65.7701,0,0,0,0,1,,65.4,
52048,99995,137810,229633,0,VSURG,0,0,0,0,0,...,1,88.6974,0,0,0,0,0,159.00,68.0,26.897670


In [12]:
df = pd.read_sql_query('select * from mp_data', con)
df.drop('subject_id',axis=1,inplace=True)
df.drop('hadm_id',axis=1,inplace=True)
df.sort_values(['icustay_id','hr'],axis=0,ascending=True,inplace=True)

In [20]:
# get death information
df_death = pd.read_sql_query("""
select 
co.subject_id, co.hadm_id, co.icustay_id
, ceil(extract(epoch from (co.outtime - co.intime))/60.0/60.0) as dischtime_hours
, ceil(extract(epoch from (adm.deathtime - co.intime))/60.0/60.0) as deathtime_hours
, case when adm.deathtime is null then 0 else 1 end as death
from dm_cohort co
inner join admissions adm
on co.hadm_id = adm.hadm_id
where co.excluded = 0
""", con)

DatabaseError: Execution failed on sql '
select 
co.subject_id, co.hadm_id, co.icustay_id
, ceil(extract(epoch from (co.outtime - co.intime))/60.0/60.0) as dischtime_hours
, ceil(extract(epoch from (adm.deathtime - co.intime))/60.0/60.0) as deathtime_hours
, case when adm.deathtime is null then 0 else 1 end as death
from dm_cohort co
inner join admissions adm
on co.hadm_id = adm.hadm_id
where co.excluded = 0
': relation "dm_cohort" does not exist
LINE 7: from dm_cohort co
             ^


In [None]:
# get censoring information
df_censor = pd.read_sql_query("""
select co.icustay_id, min(cs.charttime) as censortime
, ceil(extract(epoch from min(cs.charttime-co.intime) )/60.0/60.0) as censortime_hours
from dm_cohort co 
inner join mp_code_status cs
on co.icustay_id = cs.icustay_id
where cmo+dnr+dni+dncpr+cmo_notes>0
and co.excluded = 0
group by co.icustay_id
""", con)

In [None]:
# exclusion criteria:
#   - less than 15 years old
#   - stayed in the ICU less than 4 hours
#   - never have any chartevents data (i.e. likely administrative error)
#   - organ donor accounts (administrative "readmissions" for patients who died in hospital)
query = query_schema + \
"""
select 
    *
from dm_cohort
"""
co = pd.read_sql_query(query,con)

# convert the inclusion flags to boolean
for c in co.columns:
    if c[0:10]=='inclusion_':
        co[c] = co[c].astype(bool)

# extract static vars into a separate dataframe
df_static = pd.read_sql_query(query_schema + 'select * from mp_static_data', con)
#for dtvar in ['intime','outtime','deathtime']:
#    df_static[dtvar] = pd.to_datetime(df_static[dtvar])

vars_static = [u'is_male', u'emergency_admission', u'age',
               # services
               u'service_any_noncard_surg',
               u'service_any_card_surg',
               u'service_cmed',
               u'service_traum',
               u'service_nmed',
               # ethnicities
               u'race_black',u'race_hispanic',u'race_asian',u'race_other',
               # phatness
               u'height', u'weight', u'bmi']


# get ~5 million rows containing data from errbody
# this takes a little bit of time to load into memory (~2 minutes)

# %%time results
# CPU times: user 42.8 s, sys: 1min 3s, total: 1min 46s
# Wall time: 2min 7s

df = pd.read_sql_query(query_schema + 'select * from mp_data', con)
df.drop('subject_id',axis=1,inplace=True)
df.drop('hadm_id',axis=1,inplace=True)
df.sort_values(['icustay_id','hr'],axis=0,ascending=True,inplace=True)

# get death information
df_death = pd.read_sql_query(query_schema + """
select 
co.subject_id, co.hadm_id, co.icustay_id
, ceil(extract(epoch from (co.outtime - co.intime))/60.0/60.0) as dischtime_hours
, ceil(extract(epoch from (adm.deathtime - co.intime))/60.0/60.0) as deathtime_hours
, case when adm.deathtime is null then 0 else 1 end as death
from dm_cohort co
inner join admissions adm
on co.hadm_id = adm.hadm_id
where co.excluded = 0
""", con)

# get censoring information
df_censor = pd.read_sql_query(query_schema + """
select co.icustay_id, min(cs.charttime) as censortime
, ceil(extract(epoch from min(cs.charttime-co.intime) )/60.0/60.0) as censortime_hours
from dm_cohort co 
inner join mp_code_status cs
on co.icustay_id = cs.icustay_id
where cmo+dnr+dni+dncpr+cmo_notes>0
and co.excluded = 0
group by co.icustay_id
""", con)

# extract static vars into a separate dataframe
df_static = pd.read_sql_query(query_schema + 'select * from mp_static_data', con)