In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import time
from configobj import ConfigObj
from glob import glob

In [2]:
# Create a posgresql database connection using settings from config file 
# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = '243'
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '243':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres
Password: ········


In [3]:
#import patient data, if not exsit allpatientdata.csv file than please excute the patientdata.ipynb file first
df=pd.read_csv(os.getcwd()+'\\allpatientfile.csv')

In [9]:
def main():
    ftName_0 = ['observationoffset','noninvasivesystolic', 'noninvasivediastolic', 'noninvasivemean']
    ftName = ['patientunitstayid']
    for i in range(0,len(ftName_0)):
        ftName.append((ftName_0[i]))
    df_VP2=pd.DataFrame(columns=ftName)
    for i in range(0,len(df)):
        print('%d/%d:%s'%(i,len(df),df['patientunitstayid'][i]),end='\r')
        df_VP2=gettreatmentoffset(df['patientunitstayid'][i],df['Firstday'][i],df_VP2)
    df_VP2.to_csv(os.getcwd()+'\\vitalAperiodic.csv',index=False)

In [10]:
def gettreatmentoffset(patientunitstayid,firstday,df_VP2):
    #calculate timing of dialysis
    query = query_schema + """
    select distinct treatmentoffset
    from treatment
    where patientunitstayid = '{}' and treatmentstring like '%dialysis%' and activeupondischarge = 'False'
    order by treatmentoffset
    """.format(int(patientunitstayid))
    df_treatment=pd.read_sql_query(query, con)
    offset=[]
    if(len(df_treatment)==0):
        df_IOcelllabel=celllabel(patientunitstayid,firstday,df_VP2)
        return df_IOcelllabel
    for i in range (0,len(df_treatment)):
        offset.append(df_treatment['treatmentoffset'][i])
    df_treatmentoffset=getvitalaperiodic(patientunitstayid,firstday,offset,df_VP2)
    return df_treatmentoffset

In [11]:
def celllabel(patientunitstayid,firstday,df_VP2):
    #calculte the timing of dialysis
    query = query_schema + """
    select distinct intakeoutputoffset
    from intakeoutput
    where patientunitstayid = '{}' and celllabel like '%dialysis%'
    order by intakeoutputoffset
    """.format(int(patientunitstayid))
    df_IO=pd.read_sql_query(query, con)
    offset2=[]
    if(len(df_IO)==0):
        return None
    else:
        for i in range (0,len(df_IO)):
            offset2.append(df_IO['intakeoutputoffset'][i])
        df_check2=getvitalaperiodic(patientunitstayid,firstday,offset2,df_VP2)
        return df_check2

In [12]:
#calculate the time interval after the patient had dialysis and the day after dialysis, and fetch all the data in the time interval
def getvitalaperiodic(patientunitstayid,firstday,offset,df_VP2):
    hemodialysisdayend=-1000000
    flag=0
    for i in range(0,len(offset)):
        #print(i)
        if (offset[i] >hemodialysisdayend):
            if (offset[i]>firstday):
                if(offset[i]-firstday>1440):
                    for j in range(1,100):
                        temp=firstday+1440*j
                        if(temp>offset[i]):
                            hemodialysisdayend=temp
                            break
                else:
                    hemodialysisdayend=firstday+1440
            elif (offset[i]<firstday):
                hemodialysisdayend=firstday
            else:
                hemodialysisdayend=firstday+1440
            query = query_schema + """
            select *
            from vitalaperiodic
            where patientunitstayid = '{}' and observationoffset > {} and observationoffset <= {}
            order by observationoffset
            """.format(int(patientunitstayid),hemodialysisdayend-1440,hemodialysisdayend+2880)
            df_vitalaperiodic=pd.read_sql_query(query, con)
            df_vitalaperiodic=df_vitalaperiodic[['patientunitstayid','observationoffset','noninvasivesystolic', 'noninvasivediastolic', 'noninvasivemean']]
            df_VP2=df_VP2.append(df_vitalaperiodic)
            firstday=hemodialysisdayend
        else:
            continue;
    return df_VP2

In [13]:
if __name__ is '__main__':
    main()

2675/2676:3353226.0

In [14]:
#filtering the duplicate data
df=pd.read_csv(os.getcwd()+'\\vitalAperiodic.csv')
df=df.drop_duplicates()
df.to_csv(os.getcwd()+'\\vitalAperiodic.csv',index=False)

In [15]:
#sort data by patientunitstayid and observationoffset
df=pd.read_csv(os.getcwd()+'\\vitalAperiodic.csv')
df.sort_values(by=['patientunitstayid','observationoffset'],inplace=True)
df.to_csv(os.getcwd()+'\\vitalAperiodic.csv',index=False)