In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import time
from configobj import ConfigObj
from glob import glob

In [2]:
# Create a posgresql database connection using settings from config file 
# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = '243'
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '243':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres
Password: ········


In [3]:
#import patient data, if not exsit allpatientdata.csv file than please excute the patientdata.ipynb file first
df=pd.read_csv(os.getcwd()+'\\allpatientfile.csv')

In [4]:
def main():
    ftName_0 = ['observationoffset','temperature','sao2', 'heartrate', 'respiration','cvp','systemicsystolic','systemicdiastolic','systemicmean']
    ftName = ['patientunitstayid']
    for i in range(0,len(ftName_0)):
        ftName.append((ftName_0[i]))
    df_VP=pd.DataFrame(columns=ftName)
    df_VP3=pd.DataFrame(columns=ftName)
    for i in range(0,len(df)):
        print('%d/%d:%s'%(i,len(df),df['patientunitstayid'][i]),end='\r')
        df_VP=pd.DataFrame(columns=ftName)
        df_VP=gettreatmentoffset(df['patientunitstayid'][i],df['Firstday'][i],df_VP3)
        if df_VP is None:
            continue
        else:
            df_VP3=df_VP
    df_VP3.to_csv(os.getcwd()+'\\vitalperiodic.csv',index=False)

In [5]:
def gettreatmentoffset(patientunitstayid,firstday,df_VP2):
    #calculte the timing of dialysis
    query = query_schema + """
    select distinct treatmentoffset
    from treatment
    where patientunitstayid = '{}' and treatmentstring like '%dialysis%' and activeupondischarge = 'False'
    order by treatmentoffset
    """.format(int(patientunitstayid))
    df_treatment=pd.read_sql_query(query, con)
    offset=[]
    if(len(df_treatment)==0):
        df_IOcelllabel=celllabel(patientunitstayid,firstday,df_VP2)
        return df_IOcelllabel
    for i in range (0,len(df_treatment)):
        offset.append(df_treatment['treatmentoffset'][i])
    df_treatmentoffset=getvitalperiodic(patientunitstayid,firstday,offset,df_VP2)
    return df_treatmentoffset

In [6]:
def celllabel(patientunitstayid,firstday,df_VP2):
    #calculte the timing of dialysis
    query = query_schema + """
    select distinct intakeoutputoffset
    from intakeoutput
    where patientunitstayid = '{}' and celllabel like '%dialysis%'
    order by intakeoutputoffset
    """.format(int(patientunitstayid))
    df_IO=pd.read_sql_query(query, con)
    offset2=[]
    if(len(df_IO)==0):
        return None
    else:
        for i in range (0,len(df_IO)):
            offset2.append(df_IO['intakeoutputoffset'][i])
        df_check2=getvitalperiodic(patientunitstayid,firstday,offset2,df_VP2)
        return df_check2

In [7]:
#calculate the time interval after the patient had dialysis and the day after dialysis, and fetch all the data in the time interval
def getvitalperiodic(patientunitstayid,firstday,offset,df_VP2):
    hemodialysisdayend=-1000000
    flag=0
    for i in range(0,len(offset)):   
        if (offset[i] >hemodialysisdayend):
            if (offset[i]>firstday):
                if(offset[i]-firstday>1440):
                    for j in range(1,100):
                        temp=firstday+1440*j
                        if(temp>offset[i]):
                            hemodialysisdayend=temp
                            break
                else:
                    hemodialysisdayend=firstday+1440
            elif(offset[i]<firstday):
                hemodialysisdayend=firstday
            else:
                hemodialysisdayend=firstday+1440
            query = query_schema + """
            select *
            from vitalperiodic
            where patientunitstayid = '{}' and observationoffset > {} and observationoffset < {}
            order by observationoffset
            """.format(int(patientunitstayid),hemodialysisdayend-1440,hemodialysisdayend+2880)
            df_vitalperiodic=pd.read_sql_query(query, con)
            df_vitalperiodic=df_vitalperiodic[['patientunitstayid','observationoffset','temperature','sao2', 'heartrate', 'respiration','cvp','systemicsystolic','systemicdiastolic','systemicmean']]
            df_VP2=df_VP2.append(df_vitalperiodic)
            firstday=hemodialysisdayend
        else:
            continue;
    return df_VP2

In [8]:
if __name__ is '__main__':
    main()


2675/2676:3353226.0

In [9]:
#filtering duplicate data
df=pd.read_csv(os.getcwd()+'\\vitalperiodic.csv')
df=df.drop_duplicates()
df.to_csv(os.getcwd()+'\\vitalperiodic.csv',index=False)

In [10]:
#sort data by patientunitstayid and observationoffset
df=pd.read_csv(os.getcwd()+'\\vitalperiodic.csv')
df.sort_values(by=['patientunitstayid','observationoffset'],inplace=True)
df.to_csv(os.getcwd()+'\\vitalperiodic.csv',index=False)

In [11]:
df=pd.read_csv(os.getcwd()+'\\vitalperiodic.csv')
#if not exist vitalAperiodic.csv file, then excute vitalAperiodic.ipynb file first
df2=pd.read_csv(os.getcwd()+'\\vitalAperiodic.csv')


In [12]:
#merge two files, 
patientunitstayidlist=[]
patientunitstayidlist2=[]
for i in range(0,len(df)):
    patientunitstayidlist.append(df['patientunitstayid'][i])
for i in range(0,len(df2)):
    patientunitstayidlist2.append(df2['patientunitstayid'][i])
patientunitstayidlist=np.unique(patientunitstayidlist)


for i in range(0,len(patientunitstayidlist)):
    print("%d/%d:%s"%(i,len(patientunitstayidlist),patientunitstayidlist[i]),end='\r')
    if(patientunitstayidlist[i] in patientunitstayidlist2):
        flag=0
        vitalhead=0
        vitalAhead=0
        vitalcount=0
        vitalAcount=0
        for j in range(0,len(df)):
            if(df['patientunitstayid'][j]==patientunitstayidlist[i] and flag==0):
                vitalhead=j
                flag=1
            elif(df['patientunitstayid'][j]!=patientunitstayidlist[i] and flag==1):
                vitalcount=j
                flag=0
                break
        
        if(vitalcount==0 and j==len(df)-1):
            vitalcount=len(df)
        flag=0
        for j in range(0,len(df2)):
            if(df2['patientunitstayid'][j]==patientunitstayidlist[i] and flag==0):
                vitalAhead=j
                flag=1
            elif(df2['patientunitstayid'][j]!=patientunitstayidlist[i] and flag==1):
                vitalAcount=j
                flag=0
                break
        
        if(vitalAcount==0 and j==len(df2)-1):
            vitalAcount=len(df2)
        for j in range(vitalhead,vitalcount):

            for k in range(vitalAhead,vitalAcount):
                if(df['observationoffset'][j]<df2['observationoffset'][k]):
                    if(df2['patientunitstayid'][k-1]==df2['patientunitstayid'][k]):
                        if(pd.isna(df['systemicsystolic'][j])):
                            df['systemicsystolic'][j]=df2['noninvasivesystolic'][k-1]
                        if(pd.isna(df['systemicdiastolic'][j])):
                            df['systemicdiastolic'][j]=df2['noninvasivediastolic'][k-1]
                        if(pd.isna(df['systemicmean'][j])):
                            df['systemicmean'][j]=df2['noninvasivemean'][k-1]
                        break
                    elif(df2['patientunitstayid'][k-1]!=df2['patientunitstayid'][k] and pd.isna(df2['patientunitstayid'][k-1])):
                        if(pd.isna(df['systemicsystolic'][j])):
                            df['systemicsystolic'][j]=df2['noninvasivesystolic'][k]
                        if(pd.isna(df['systemicdiastolic'][j])):
                            df['systemicdiastolic'][j]=df2['noninvasivediastolic'][k]
                        if(pd.isna(df['systemicmean'][j])):
                            df['systemicmean'][j]=df2['noninvasivemean'][k]
                        break
            if(k+1==vitalAcount and pd.isna(df['systemicsystolic'][j])):
                df['systemicsystolic'][j]=df2['noninvasivesystolic'][k]
            if(k+1==vitalAcount and pd.isna(df['systemicdiastolic'][j])):
                df['systemicdiastolic'][j]=df2['noninvasivediastolic'][k]
            if(k+1==vitalAcount and pd.isna(df['systemicmean'][j])):
                df['systemicmean'][j]=df2['noninvasivemean'][k]
                
    else:
        continue
df.to_csv(os.getcwd()+'\\vitalperiodicexe.csv',index=False)

0/2676:251510

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

2675/2676:3353226

In [13]:
#filtering outliner
df=pd.read_csv(os.getcwd()+'\\vitalperiodicexe.csv')

for i in range(0,len(df.index)):
    if(df['systemicsystolic'][i]<40):
        df['systemicsystolic'][i]=40
    if(df['systemicsystolic'][i]>200):
        df['systemicsystolic'][i]=200      
    if(df['systemicdiastolic'][i]>200):
        df['systemicdiastolic'][i]=200
    if(df['systemicdiastolic'][i]<40):
        df['systemicdiastolic'][i]=40

df.to_csv(os.getcwd()+'\\vitalperiodicexe.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
