In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import seaborn as sns
# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline

In [2]:
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres


In [3]:
from sqlalchemy import create_engine
con= create_engine('postgresql://eicu@localhost:5432/eicu')

In [4]:
def round_up(x, base=5):
    return base * round(x/base)

In [5]:
query = query_schema + """
select *
from lab
"""

df_lab = pd.read_sql_query(query, con)
n_lab = df_lab['patientunitstayid'].nunique()
print("There are {} icu patients in the lab  record.".format(n_lab))

There are 195730 icu patients in the lab  record.


In [6]:
query = query_schema + """

with selected_patients as (
SELECT DISTINCT patientunitstayid,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,	dischargeweight,unitdischargelocation,	unitdischargestatus
FROM patient
WHERE unitadmitsource IN ('Emergency Department', 'Direct Admit') 
)
select l.labid, l.labtypeid	,l.labname, l.labresult,l.labmeasurenamesystem, sp.*
from lab l
join selected_patients sp
  on l.patientunitstayid = sp.patientunitstayid
 """

df_lab_selected = pd.read_sql_query(query, con)
df_lab_selected.head()

Unnamed: 0,labid,labtypeid,labname,labresult,labmeasurenamesystem,patientunitstayid,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,dischargeweight,unitdischargelocation,unitdischargestatus
0,52307161,3.0,fibrinogen,177.0,mg/dL,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,Death,Expired
1,50363251,3.0,PT - INR,2.5,ratio,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,Death,Expired
2,49149139,1.0,magnesium,2.0,mg/dL,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,Death,Expired
3,50363250,3.0,PT,26.6,sec,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,Death,Expired
4,66695374,7.0,pH,7.2,,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,Death,Expired


In [7]:
# df_lab_selected.to_csv('/Users/meisun/Documents/study/thesis/master project/data-preprocessing/selected_patients_lab.csv')

In [None]:
df_lab_selected = pd.read_csv('/Users/meisun/Documents/study/thesis/master project/data-preprocessing/selected_patients_lab.csv')

In [None]:
# Set indices
lab=df_lab_selected.set_index(['patientunitstayid', 'labid'], inplace = True)
# Pivot table
lab= pd.pivot_table(df_lab_selected ,values = 'labresult', index=['patientunitstayid', 'labid'], columns='labname')
lab.columns.name = None
lab = lab.groupby(['patientunitstayid', 'labid']).mean()