In [1]:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
import seaborn as sns

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
dbname = 'mimic_demo_db'
username = 'nwespe'
schema_name = 'mimiciii_demo'

## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine('postgresql://%s@localhost/%s'%(username,dbname))
print engine.url
# Replace localhost with IP address if accessing a remote server

# connect:
con = None
con = psycopg2.connect(database = dbname, user = username, host='localhost') #, password=pswd
cur = con.cursor()
cur.execute('SET search_path to '+ schema_name)

postgresql://nwespe@localhost/mimic_demo_db


In [5]:
sql_query = """
DROP MATERIALIZED VIEW IF EXISTS testing CASCADE;
CREATE MATERIALIZED VIEW testing AS
WITH primary_diagnoses AS
(SELECT subject_id, hadm_id, icd9_code FROM diagnoses_icd WHERE seq_num = 1)
SELECT adm.*, diag.icd9_code 
FROM infection_admissions adm
LEFT JOIN primary_diagnoses diag ON adm.hadm_id = diag.hadm_id;
"""
cur.execute(sql_query)
con.commit()
details = pd.read_sql_query('SELECT * FROM testing', con)
details.head()

Unnamed: 0,row_id,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_chartevents_data,icd9_code
0,12286,10036,189483,2185-03-24 16:56:00,2185-03-26 09:15:00,2185-03-26 09:15:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,JEWISH,MARRIED,WHITE,2185-03-24 12:38:00,2185-03-24 19:14:00,SEPSIS,1,1,389
1,12295,10045,126949,2129-11-24 00:31:00,2129-12-01 01:45:00,2129-12-01 01:45:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,PROTESTANT QUAKER,MARRIED,WHITE,2129-11-23 18:26:00,2129-11-24 01:35:00,FEVER,1,1,383
2,12305,10056,100375,2129-05-02 00:12:00,2129-05-06 13:40:00,NaT,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,SNF,Medicare,,CHRISTIAN SCIENTIST,UNKNOWN (DEFAULT),WHITE,NaT,NaT,SEPSIS,0,1,389
3,12339,10088,149044,2107-05-12 18:00:00,2107-05-18 13:30:00,NaT,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,SNF,Private,,UNOBTAINABLE,UNKNOWN (DEFAULT),WHITE,NaT,NaT,UROSEPSIS,0,1,389
4,12337,10088,169938,2107-01-04 11:59:00,2107-01-11 15:45:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,,UNOBTAINABLE,UNKNOWN (DEFAULT),WHITE,2107-01-04 21:47:00,2107-01-05 01:38:00,SEPSIS;PNEUMONIA;TELEMETRY,0,1,389


In [4]:
sql_query = """
SELECT * FROM labsfirst6h;
"""
details = pd.read_sql_query(sql_query,con)
details.head()

Unnamed: 0,subject_id,hadm_id,aniongap_min,aniongap_max,albumin_min,albumin_max,bands_min,bands_max,bicarbonate_min,bicarbonate_max,...,inr_min,inr_max,pt_min,pt_max,sodium_min,sodium_max,bun_min,bun_max,wbc_min,wbc_max
0,10006,142345,12.0,20.0,2.7,3.4,,,29.0,31.0,...,2.8,3.5,21.2,23.5,139.0,139.0,9.0,11.0,4.6,7.8
1,10011,105331,12.0,12.0,2.6,2.6,2.0,2.0,23.0,23.0,...,8.7,8.7,35.4,35.4,136.0,136.0,3.0,3.0,10.6,10.6
2,10013,165520,13.0,13.0,,,13.0,13.0,29.0,29.0,...,1.5,1.5,14.6,14.6,136.0,138.0,32.0,32.0,13.8,16.2
3,10017,199207,16.0,16.0,,,12.0,12.0,26.0,26.0,...,1.0,1.0,11.3,11.3,137.0,137.0,10.0,10.0,30.5,30.5
4,10019,177759,20.0,20.0,3.2,3.2,,,16.0,16.0,...,2.1,6.8,17.6,32.2,137.0,137.0,53.0,53.0,3.7,3.7


In [9]:
sql_query = """
SELECT h.*
 , w.weight_first, w.charttime as weight_charttime
 , adm.admittime, adm.age, adm.gender
FROM height h
JOIN weight w ON w.hadm_id = h.hadm_id
JOIN admit_info adm ON adm.hadm_id = h.hadm_id;"""

adm_phys_info = pd.read_sql_query(sql_query, con)

In [10]:
adm_phys_info

Unnamed: 0,hadm_id,subject_id,height_first,charttime,weight_first,weight_charttime,admittime,age,gender
0,165520,10013,165.1,2125-10-04 23:32:00,95.0,2125-10-04 23:30:00,2125-10-04 23:36:00,87.0874,F
1,177759,10019,180.34,2163-05-14 19:55:00,105.3,2163-05-14 19:45:00,2163-05-14 20:43:00,48.9014,M
2,126949,10045,165.1,2129-11-25 21:55:00,89.6,2129-11-25 08:55:00,2129-11-24 00:31:00,68.6669,F
3,126002,40655,154.94,2144-07-19 00:28:00,72.2,2144-07-19 00:28:00,2144-07-18 19:32:00,300.0033,F
4,125013,41976,175.26,2201-09-29 10:14:00,77.0,2201-09-28 21:27:00,2201-09-28 16:47:00,65.1697,M
5,151798,41976,175.26,2202-02-16 17:07:00,74.1,2202-02-16 17:07:00,2202-02-15 19:01:00,65.5532,M
6,176016,41976,167.64,2199-02-01 09:52:00,70.0,2199-02-01 02:09:00,2199-01-31 22:26:00,62.5145,M
7,179418,41976,175.26,2202-01-01 00:42:00,74.8,2202-01-01 00:59:00,2201-12-31 19:19:00,65.4273,M
8,163189,44212,157.48,2123-11-26 11:42:00,92.0,2123-11-26 11:42:00,2123-11-24 14:14:00,45.44,F


In [7]:
sql_query = """
SELECT * FROM diagnoses_icd;
"""
diagnoses = pd.read_sql_query(sql_query,con) 
diagnoses.head()
#need to get set of subject_id's associated with one or more icd codes whose 
# first 3 digits are lower than 140
# first remove rows with codes starting with V or E

# diagnoses['icd9_letter'] = diagnoses.icd9_code.str[0]
# diagnoses = diagnoses[(diagnoses.icd9_letter != 'E') & (diagnoses.icd9_letter != 'V')]
# diagnoses['icd9_3digit'] = diagnoses.icd9_code.str[:3]
# diagnoses.icd9_3digit = pd.to_numeric(diagnoses.icd9_3digit)

# infection_diagnoses = diagnoses[(diagnoses.icd9_3digit < 140) & (diagnoses.seq_num == 1)]
# infected_admits = set(infection_diagnoses.hadm_id)  # admissions ids that had an infect icd-9 code

Unnamed: 0,row_id,subject_id,hadm_id,seq_num,icd9_code
0,112565,10032,140372,3,5070
1,112566,10032,140372,4,42830
2,112567,10032,140372,5,4280
3,112568,10032,140372,6,2851
4,112569,10032,140372,7,2765


In [5]:
sql_query = """
WITH infection_group AS
(SELECT subject_id, hadm_id, icd9_code, seq_num,
	CASE
		WHEN substring(icd9_code,1,3) IN ('001','002','003','004','005','008',
			   '009','010','011','012','013','014','015','016','017','018',
			   '020','021','022','023','024','025','026','027','030','031',
			   '032','033','034','035','036','037','038','039','040','041',
			   '090','091','092','093','094','095','096','097','098','100',
			   '101','102','103','104','110','111','112','114','115','116',
			   '117','118','320','322','324','325','420','421','451','461',
			   '462','463','464','465','481','482','485','486','494','510',
			   '513','540','541','542','566','567','590','597','601','614',
			   '615','616','681','682','683','686','730') THEN 1
		WHEN substring(icd9_code,1,4) IN ('5695','5720','5721','5750','5990','7110',
				'7907','9966','9985','9993') THEN 1
		WHEN substring(icd9_code,1,5) IN ('49121','56201','56203','56211','56213',
				'56983') THEN 1
		ELSE 0 END AS infection
	FROM diagnoses_icd)
SELECT subject_id, hadm_id, icd9_code
FROM infection_group
WHERE infection = 1
AND seq_num = 1;
"""
diagnoses = pd.read_sql_query(sql_query,con)

In [6]:
diagnoses

Unnamed: 0,subject_id,hadm_id,icd9_code
0,10036,189483,389
1,10038,111115,99667
2,10045,126949,383
3,10056,100375,389
4,10088,149044,389
5,10088,169938,389
6,10094,122928,389
7,10013,165520,389
8,10076,198503,486
9,10019,177759,389


In [None]:
sql_query = """
DROP MATERIALIZED VIEW IF EXISTS infection_admissions CASCADE;
CREATE MATERIALIZED VIEW infection_admissions AS 
SELECT * FROM admissions
WHERE hadm_id IN %s;"""
hadm_ids = tuple(infected_admits)

cur.execute(sql_query, (hadm_ids,))

In [None]:
con.commit()

In [None]:
infection_admissions = pd.read_sql_query('SELECT * FROM infection_admissions', con)
infection_admissions.head()

In [None]:
sql_query = """
SELECT (extract(DAY FROM ad.admittime - p.dob)
            + extract(HOUR FROM ad.admittime - p.dob) / 24
            + extract(MINUTE FROM ad.admittime - p.dob) / 24 / 60
            ) / 365.25
            AS age
      FROM admissions ad
      INNER JOIN patients p
      ON ad.subject_id = p.subject_id
"""
ages = pd.read_sql_query(sql_query,con)
ages

In [None]:
print infection_admissions.hospital_expire_flag.value_counts()
print infection_admissions.admission_location.value_counts()

In [None]:
sql_query = """
SELECT ia.hadm_id, ia.subject_id, ia.admittime, ce.icustay_id, ce.charttime, 
ia.hospital_expire_flag, di.itemid, di.label, ce.value, ce.valuenum, ce.valueuom, 
ce.warning, ce.error, ce.resultstatus
FROM infection_admissions ia
LEFT JOIN chartevents ce ON ia.hadm_id = ce.hadm_id
LEFT JOIN d_items di ON ce.itemid = di.itemid;"""

subset = pd.read_sql_query(sql_query, con)

In [None]:
subset['chartlag'] = (subset.charttime - subset.admittime) / np.timedelta64(1, 'h')

In [None]:
subset.head()

In [None]:
early_charts = subset[subset.chartlag < 3]
early_charts

In [None]:
unique(early_charts.subject_id)

In [None]:
sql_query = """
SELECT ia.hadm_id, ia.subject_id, ia.admittime, pr.icustay_id, pr.startdate, 
ia.hospital_expire_flag, pr.drug
FROM infection_admissions ia
LEFT JOIN prescriptions pr ON ia.hadm_id = pr.hadm_id
WHERE pr.drug_type = 'MAIN';"""

prescriptions = pd.read_sql_query(sql_query, con)

In [None]:
sql_query = """
SELECT le.itemid
FROM labevents le;
"""
find_items = pd.read_sql_query(sql_query,con)
unique(find_items.itemid)

In [None]:
sql_query = """
SELECT subject_id, gender, dob, dod, expire_flag FROM patients;
"""
patients = pd.read_sql_query(sql_query,con)
patients.head()

In [None]:
patients['age_at_death'] = (patients.dod - patients.dob)/np.timedelta64(1, 'Y')

In [None]:
age_bins = [-300, 0, 18, 40, 50, 60 ,70, 80, 90, 100]
age_labels = ['exclude', 'youth', '18-39', '40-49', '50-59', '60-69', '70-79', '80-89', '>89']
age_categories = pd.cut(patients['age_at_death'], age_bins, labels=age_labels)
patients['age_category'] = pd.cut(patients['age_at_death'], age_bins, labels=age_labels)

In [None]:
patients.head()

In [None]:
infect_pts = infect_admits_db.join(patients.set_index('subject_id'), on='subject_id', how='left', lsuffix='_ad', rsuffix='_pt', sort=False)

In [None]:
outcome = 'hospital_expire_flag'
feature = 'age_category'
db = infect_pts

g = sns.countplot(x=outcome, hue=feature, data=db)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Patient outcomes and age at death')
#plt.savefig('/Users/nwespe/Desktop/pt_expire_ethnicity.png', bbox_inches='tight')

In [None]:
feature = 'age_category'
db = infect_pts

g = sns.countplot(feature, data=db)

In [None]:
sql_query = """
SELECT * FROM labevents;
"""
labevents = pd.read_sql_query(sql_query,con)
labevents.head()

In [None]:
infect_pts = infect_admits_db.join(labevents.set_index('hadm_id'), on='hadm_id', how='left', lsuffix='_ad', rsuffix='_lab', sort=False)

In [None]:
infect_pts[infect_pts.hadm_id == 10006]