In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import time
import re
from configobj import ConfigObj

In [2]:
# Create a posgresql database connection using settings from config file 

# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = '243'
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '243':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    host=conn_info["sqlhost"],
                                    port=conn_info["sqlport"],
                                    user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')
    
        con = psycopg2.connect(dbname=conn_info["dbname"],
                                host=conn_info["sqlhost"],
                                port=conn_info["sqlport"],
                                user=conn_info["sqluser"],
                                password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres
Password: ········


In [3]:
#Patient
#import patient data, if not exsit allpatientdata.csv file than please excute the patientdata.ipynb file first
df=pd.read_csv(os.getcwd()+'\\allpatientdata.csv')

#import patient data, if not exsit respiratorycharting_com.csv file than please excute the respiratorycharting.ipynb file first
df2=pd.read_csv(os.getcwd()+'\\respiratorycharting_com.csv')


In [11]:
def main():
    start = time.clock()
    ftName_0 = ['Nasal cannula O2', 'Compliance', 'FiO2', 'Mean airway pressure',
                'Minute ventilation', 'PC mode', 'Peak airway pressure', 'PEEP',
                'Plateau', 'Pressure support', 'Resistance', 'Respiratory rate', 
                'RSBI','SaO2', 'tidal volume', 'ROXindex']
    ftName = ['patientunitstayid']
    for i in range(0,len(ftName_0)):
        for j in range(0,48):
            ftName.append('%s %02d hours'%(ftName_0[i], j+1))
    df0 = pd.DataFrame(columns= ftName)
    th = 0
    for i in range(0,len(df)):
        print('%d/%d:%s'%(i,len(df),df['patientunitstayid'][i]),end='\r')
        dialysisday=[]
        dialysisoffset=[]
        dialysisday=getdialysisday(df['patientunitstayid'][i],df['Firstday'][i])
        dialysisday=np.unique(dialysisday)
        dialysisoffset=getdialysisoffset(df['patientunitstayid'][i],df['Firstday'][i])
        dialysisoffset=np.unique(dialysisoffset)
        if(len(dialysisday)>0 and len(dialysisoffset)>0): 
            for j in range(0,len(dialysisday)):
                #print(df['patientunitstayid'][i])
                #print(dialysisday[j])
                #print(dialysisoffset[j])
                if(df2['target D%02d'%(dialysisday[j])][i]==1):#consider only if the patient is using respirator 
                    df0 = Respiratorycharting(df['patientunitstayid'][i],dialysisoffset[j], th, df0, ftName_0)
                    th=th+1
    df0.to_csv(os.getcwd()+'\\respiratorycharting_hour.csv',index=False)
    end = time.clock() - start
    print(end)

In [12]:
#可調整輸出的方式
#adjust the extract data into output format
def Featrue_all_value_with_day(ft_name, hour, df_respiratorycharting):
    temp_1 = [-0.001]
    for j in range(0,len(df_respiratorycharting.index)):
        if df_respiratorycharting['respchartvaluelabel'][j] == ft_name:  
                if df_respiratorycharting['hour'][j] == hour: 
                    temp_ex = df_respiratorycharting['respchartvalue'][j].strip(' ')
                    temp_ex =  re.sub(u"([^\u0030-\u0039\.])","",temp_ex)
                    if temp_ex.strip() == '':
                        continue
                    else:
                        temp_1.append(float(temp_ex))
    if len(temp_1) != 1:
           temp_1.pop(0)
    return np.min(temp_1)

In [13]:
def Respiratorycharting(patientunitstayid, dialysisoffset, th, df_RC, ftName_0):
    query = query_schema + """
    select *
    from respiratorycharting
    where patientunitstayid = '{}' and respchartoffset>'{}' and respchartoffset<='{}'
    order by respchartoffset
    """.format(patientunitstayid,int(dialysisoffset),int(dialysisoffset+60*48))

    df_respiratorycharting = pd.read_sql_query(query, con)
    df_respiratorycharting = df_respiratorycharting[['patientunitstayid','respchartoffset','respchartvaluelabel', 'respchartvalue']]

    #計算該row是第幾天
    #calculate event time by days
    hour = np.zeros((len(df_respiratorycharting.index),), dtype=int)
    df_respiratorycharting['hour'] = hour
    for i in range(0,len(df_respiratorycharting.index)):
        temp = df_respiratorycharting['respchartoffset'][i]

        temp = temp - dialysisoffset
        df_respiratorycharting['hour'][i] = ((temp/60)+1)
            
    #創建表格及預設值       
    #create sheet and default value
    space_row = [patientunitstayid]
    for i in range(len(ftName_0)):
        for j in range(0,48):
            space_row.append(' ')
    droplist = []
    if df_respiratorycharting.size == 0:
        df_RC = df_RC.append(pd.Series(space_row, index=df_RC.columns), ignore_index=True)
    else:
        df_RC = df_RC.append(pd.Series(space_row, index=df_RC.columns), ignore_index=True)
        ##讀進來的名稱先轉小寫再進行比較，然後修改成需要的名稱格式
        #Trun the name in to lower class, than comparing. After that change it into specific format
        for i in range(0,len(df_respiratorycharting.index)):
            temp = df_respiratorycharting['respchartvaluelabel'][i].lower().strip(' ')
            if temp == 'lpm o2':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Nasal cannula O2'
                temp_ex = df_respiratorycharting['respchartvalue'][i].strip('%')
            elif temp == 'compliance' or temp == 'mechanical ventilator compliance' or temp == 'static compliance':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Compliance'
            elif temp == 'fio2' or temp == 'fio2 (%)' or temp == 'o2 percentage' or temp == 'set fraction of inspired oxygen (fio2)':
                df_respiratorycharting['respchartvaluelabel'][i] = 'FiO2'
                temp_ex = df_respiratorycharting['respchartvalue'][i].strip('%')
            elif temp == 'mean airway pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Mean airway pressure'
            elif temp == 'exhaled mv or temp' or temp == 'minute volume, spontaneous' or temp == 'measured ve':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Minute ventilation'
            elif temp == 'pressure control':
                df_respiratorycharting['respchartvaluelabel'][i] = 'PC mode'
            elif temp == 'peak insp. pressure' or temp == 'peak pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Peak airway pressure'
            elif temp == 'peep' or temp == 'peep/cpap':
                df_respiratorycharting['respchartvaluelabel'][i] = 'PEEP'
            elif temp == 'plateau pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Plateau'
            elif temp == 'pressure support' or temp == 'PS':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Pressure support'
            elif temp == 'mechanical ventilator resistance':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Resistance'
            elif temp == 'vent rate' or temp == 'total rr' or temp == 'rr (patient)' or temp == 'resp rate total' or temp == 'f total' or temp == 'spontaneous respiratory rate' or temp == 'rr spont':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Respiratory rate'
            elif temp == 'total rsbi':
                df_respiratorycharting['respchartvaluelabel'][i] = 'RSBI'
            elif temp == 'sao2':
                df_respiratorycharting['respchartvaluelabel'][i] = 'SaO2'
            elif temp == 'tv/kg ibw':
                if df['gender'][0] == 'Female':
                    PBW = 45.5 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = str(int(float(df_respiratorycharting['respchartvalue'][i]) * PBW))
                elif df['gender'][0] == 'Male':
                    PBW = 50.0 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = str(int(float(df_respiratorycharting['respchartvalue'][i]) * PBW))
                df_respiratorycharting['respchartvaluelabel'][i] = 'tidal volume'
            elif temp == 'tidal volume observed (vt)' or temp == 'exhaled tv (patient)' or temp == 'tidal volume, delivered' or temp == 'exhaled vt' or temp == 'spont tv':
                '''
                if df['gender'][0] == 'Female':
                    PBW = 45.5 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = '%.3f'%(float(df_respiratorycharting['respchartvalue'][i]) / PBW)
                elif df['gender'][0] == 'Male':
                    PBW = 50.0 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = '%.3f'%(float(df_respiratorycharting['respchartvalue'][i]) / PBW)
                '''
                df_respiratorycharting['respchartvaluelabel'][i] = 'tidal volume'
        
    #將計算好的值放入表格中
    #put the calculated value into sheet and call "Feature_all_value_with_day" function to turning it into specific format
        for i in range(0,len(ftName_0)):
            for j in range(0,48):
                str_temp = '%s %02d hours'%(ftName_0[i],j+1)
                df_RC[str_temp][th] = Featrue_all_value_with_day(ftName_0[i],j+1,df_respiratorycharting)  
    return df_RC

In [14]:
#calculate the dialysis day
def getdialysisday(patientunitstayid,firstday):
    query = query_schema + """
    select distinct treatmentoffset
    from treatment
    where patientunitstayid = '{}' and treatmentstring like '%dialysis%' and activeupondischarge = 'False'
    order by treatmentoffset
    """.format(patientunitstayid)
    df_treatment=pd.read_sql_query(query, con)
    offset=[]
    if(len(df_treatment)==0):
        df_celllabel=celllabel(patientunitstayid,firstday)
        for i in range(0, len(df_celllabel)):
            for day in range(0,100):        
                if(firstday+1440*day>df_celllabel[i]):
                    break;
            if(day>27):
                continue
            offset.append(day+1)
        return offset
    for i in range(0, len(df_treatment)):
        for day in range(0,100):        
            if(firstday+1440*day>df_treatment['treatmentoffset'][i]):
                break;
        if(day>27):#since we only consider data within a month
            continue
        offset.append(day+1)
    '''    
    offset=list(dict.fromkeys(offset))#remove duplicate items
    for i in range(0,len(offset)):
        if(df5['target D%02d'%(offset[i])][postition]==0):
            #print(i)
            #dialysisday.remove(dialysisday[i])
            offset[i]=0.001
    offset=list(filter((0.001).__ne__, offset))
    '''
    return offset
    

In [15]:
#calculate the dialysis time
def getdialysisoffset(patientunitstayid,firstday):
    query = query_schema + """
    select distinct treatmentoffset
    from treatment
    where patientunitstayid = '{}' and treatmentstring like '%dialysis%' and activeupondischarge = 'False'
    order by treatmentoffset
    """.format(patientunitstayid)
    df_treatment=pd.read_sql_query(query, con)
    offset=[]
    offset2=[]
    if(len(df_treatment)==0):
        df_celllabel=celllabel(patientunitstayid,firstday)
        return df_celllabel
    for i in range (0,len(df_treatment)):
        offset.append(df_treatment['treatmentoffset'][i])
    hemodialysisdayend=-1000000
    flag=0
    for i in range(0,len(offset)):   
        if (offset[i] >hemodialysisdayend):
            if (offset[i]>firstday):
                if(offset[i]-firstday>1440):
                    for j in range(1,100):
                        temp=firstday+1440*j
                        if(temp>offset[i]):
                            hemodialysisdayend=temp
                            break
                else:
                    hemodialysisdayend=firstday+1440
            elif (offset[i]<firstday):
                hemodialysisdayend=firstday
            offset2.append(hemodialysisdayend-1440)
    """
    for i in range(0,len(dialysisday)):
        if(df5['target D%02d'%(dialysisday[i])][postition]==0):
            #print(i)
            #dialysisday.remove(dialysisday[i])
            offset2[i]=0.001
    offset2=list(filter((0.001).__ne__, offset2))
    """
    return offset2

In [16]:
#calculate the dialysis time
def celllabel(patientunitstayid,firstday):
    query = query_schema + """
    select distinct intakeoutputoffset
    from intakeoutput
    where patientunitstayid = '{}' and celllabel like '%dialysis%'
    order by intakeoutputoffset
    """.format(patientunitstayid)
    df_IO=pd.read_sql_query(query, con)
    offset=[]
    offset2=[]
    if(len(df_IO)==0):
        return None
    else:
        for i in range (0,len(df_IO)):
            offset.append(df_IO['intakeoutputoffset'][i])
    hemodialysisdayend=-1000000
    flag=0
    for i in range(0,len(offset)):   
        if (offset[i] >hemodialysisdayend):
            if (offset[i]>firstday):
                if(offset[i]-firstday>1440):
                    for j in range(1,100):
                        temp=firstday+1440*j
                        if(temp>offset[i]):
                            hemodialysisdayend=temp
                            break
                else:
                    hemodialysisdayend=firstday+1440
            elif (offset[i]<firstday):
                hemodialysisdayend=firstday
            offset2.append(hemodialysisdayend-1440)
    return offset2

In [17]:
if __name__ is '__main__':
    main()

  


0/2676:251510

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

21/2676:307232

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


25/2676:311838

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


137/2676:427977

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


145/2676:439487

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
1111111111111111111111111111111111

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
1111111111111111111111111111111111

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2447/2676:3204871

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
111111111111111111111111111111111111111111111
11544.54107613226




In [18]:
#mask
df=pd.read_csv(os.getcwd()+'\\respiratorycharting_hour.csv')
df= df.mask(df==-0.001)
df= df.mask(df=='-0.001')
df=df.mask(df==0.0)
df=df.mask(df=='0.0')
df.mask(df==' ').to_csv(os.getcwd()+'\\respiratorycharting_hour_mask.csv',index=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [13]:
#filtering outliner
df=pd.read_csv(os.getcwd()+'\\respiratorycharting_hour_mask.csv')
droplist = []
pid = []
for i in range(0,len(df.index)):
    for j in range(1,49):
        if df['Nasal cannula O2 %02d hours'%j][i] > 6:
            df['Nasal cannula O2 %02d hours'%j][i]=np.nan
            
        if df['Nasal cannula O2 %02d hours'%j][i] < 0:
            df['Nasal cannula O2 %02d hours'%j][i]=0
            
        if df['Compliance %02d hours'%j][i] > 100:
            df['Compliance %02d hours'%j][i]=np.nan
            
        if df['PEEP %02d hours'%j][i]<0:
            df['PEEP %02d hours'%j][i]=0
            
        if df['PEEP %02d hours'%j][i]>25:
            df['PEEP %02d hours'%j][i]=np.nan
            
        if df['FiO2 %02d hours'%j][i] < 20:
            df['FiO2 %02d hours'%j][i]=20
            
        if df['FiO2 %02d hours'%j][i] > 100:
            df['FiO2 %02d hours'%j][i]=np.nan
            
        if df['FiO2 %02d hours'%j][i] < 20:
            df['FiO2 %02d hours'%j][i]=20
            
        if df['Mean airway pressure %02d hours'%j][i] < 5:
            df['Mean airway pressure %02d hours'%j][i] = 5
            
        if df['Mean airway pressure %02d hours'%j][i] > 50:
            df['Mean airway pressure %02d hours'%j][i]=np.nan
            
        if df['Minute ventilation %02d hours'%j][i] > 50:
            df['Minute ventilation %02d hours'%j][i]=np.nan
            
        if df['PC mode %02d hours'%j][i] > 50:
            df['PC mode %02d hours'%j][i]=np.nan
            
        if df['Peak airway pressure %02d hours'%j][i] > 50:
            df['Peak airway pressure %02d hours'%j][i]=np.nan
            
        if df['Plateau %02d hours'%j][i] > 50:
            df['Plateau %02d hours'%j][i]=np.nan
            
        if df['Pressure support %02d hours'%j][i] > 50:
            df['Pressure support %02d hours'%j][i]=np.nan
            
        if df['RSBI %02d hours'%j][i] > 200:
            df['RSBI %02d hours'%j][i]=np.nan
            
        if df['Respiratory rate %02d hours'%j][i] < 7:
            df['Respiratory rate %02d hours'%j][i] = 7
            
        if df['Respiratory rate %02d hours'%j][i] > 55:
            df['Respiratory rate %02d hours'%j][i]=np.nan
            
        if df['tidal volume %02d hours'%j][i] < 20:
            df['tidal volume %02d hours'%j][i] = 20
            
        if df['tidal volume %02d hours'%j][i] > 1000:
            df['tidal volume %02d hours'%j][i]=np.nan

df.to_csv(os.getcwd()+'\\respiratorycharting_hour_mask.csv',index=False)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

725
