In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import seaborn as sns
# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline

In [2]:
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = '192.168.60.144'
    conn_info["sqlport"] = 6432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == '192.168.60.144') & (conn_info["sqlport"]=='6432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres


In [5]:
from sqlalchemy import create_engine,text
con= create_engine('postgresql://eicu@192.168.60.144:6432/eicu')

In [6]:
datadir = '/home/mei/nas/docker/dataset/EICU/eicu-collaborative-research-database-2.0/'
porcesseddir2 = '/home/mei/nas/docker/processedData_2/'

In [7]:
def round_up(x, base=5):
    return base * round(x/base)

In [8]:
query = query_schema + """
select p.patientunitstayid, p.hospitaladmitOffset,l.labresultoffset,p.gender, p.age, 
      p.apacheadmissiondx,l.labname,l.labresult,p.unitadmitsource, p.admissionweight,	
      p.dischargeweight, p.unitdischargeoffset, p.unitdischargelocation,	
      p.unitdischargestatus
from lab l
join patient_2 p
  on l.patientunitstayid = p.patientunitstayid
  
order by p.patientunitstayid, p.hospitaladmitOffset,l.labresultoffset
"""

In [9]:
df_lab= pd.read_sql_query(query,con)
df_lab.head(n=20)

Unnamed: 0,patientunitstayid,hospitaladmitoffset,labresultoffset,gender,age,apacheadmissiondx,labname,labresult,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
0,141168,0,231,Female,70,"Rhythm disturbance (atrial, supraventricular)",PT - INR,1.7,Direct Admit,84.3,85.8,3596,Death,Expired
1,141168,0,231,Female,70,"Rhythm disturbance (atrial, supraventricular)",PT,17.1,Direct Admit,84.3,85.8,3596,Death,Expired
2,141168,0,231,Female,70,"Rhythm disturbance (atrial, supraventricular)",PTT,29.0,Direct Admit,84.3,85.8,3596,Death,Expired
3,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",WBC x 1000,9.8,Direct Admit,84.3,85.8,3596,Death,Expired
4,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",albumin,3.1,Direct Admit,84.3,85.8,3596,Death,Expired
5,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",bicarbonate,26.0,Direct Admit,84.3,85.8,3596,Death,Expired
6,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",-eos,1.0,Direct Admit,84.3,85.8,3596,Death,Expired
7,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",total protein,7.1,Direct Admit,84.3,85.8,3596,Death,Expired
8,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",BUN,26.0,Direct Admit,84.3,85.8,3596,Death,Expired
9,141168,0,516,Female,70,"Rhythm disturbance (atrial, supraventricular)",PT - INR,1.9,Direct Admit,84.3,85.8,3596,Death,Expired


In [10]:
lab=df_lab.copy()
lab.set_index(['patientunitstayid', 'hospitaladmitoffset','labresultoffset'], inplace = True)
lab.drop(lab.index[np.where(lab.index.get_level_values('labresultoffset') < 0)], inplace= True)
# Pivot table
lab = pd.pivot_table(lab ,values = 'labresult', index=['patientunitstayid', 'hospitaladmitoffset','labresultoffset'], columns='labname')
lab.columns.name = None
# Resample every 5 mins
lab.rename(round_up, level = 'labresultoffset', inplace = True)
# lab = lab.groupby(['patientunitstayid', 'hospitaladmitoffset','labresultoffset']).mean()
lab.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,-bands,-basos,-eos,-lymphs,-monos,-polys,24 h urine protein,24 h urine urea nitrogen,ALT (SGPT),ANF/ANA,...,total protein,transferrin,triglycerides,troponin - I,troponin - T,uric acid,urinary creatinine,urinary osmolality,urinary sodium,urinary specific gravity
patientunitstayid,hospitaladmitoffset,labresultoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
141168,0,230,,,,,,,,,,,...,,,,,,,,,,
141168,0,515,,0.0,1.0,19.0,19.0,61.0,,,40.0,,...,7.1,,,,,,,,,
141168,0,1135,,0.0,0.0,6.0,14.0,80.0,,,358.0,,...,7.1,,,,,,,,,
141168,0,1135,,,,,,,,,,,...,,,,,,8.1,,,,
141168,0,1700,,,,,,,,,,,...,,,,,,,173.12,,12.0,


In [11]:
lab_ = lab.groupby(['patientunitstayid', 'hospitaladmitoffset','labresultoffset']).mean()
lab_.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,-bands,-basos,-eos,-lymphs,-monos,-polys,24 h urine protein,24 h urine urea nitrogen,ALT (SGPT),ANF/ANA,...,total protein,transferrin,triglycerides,troponin - I,troponin - T,uric acid,urinary creatinine,urinary osmolality,urinary sodium,urinary specific gravity
patientunitstayid,hospitaladmitoffset,labresultoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
141168,0,230,,,,,,,,,,,...,,,,,,,,,,
141168,0,515,,0.0,1.0,19.0,19.0,61.0,,,40.0,,...,7.1,,,,,,,,,
141168,0,1135,,0.0,0.0,6.0,14.0,80.0,,,358.0,,...,7.1,,,,,8.1,,,,
141168,0,1700,,,,,,,,,,,...,,,,,,,173.12,,12.0,
141168,0,1805,,,,,,,,,,,...,,,,,,,,,,
