In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import time
import re
from configobj import ConfigObj

In [8]:
# Create a posgresql database connection using settings from config file 

# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = '243'
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '243':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    host=conn_info["sqlhost"],
                                    port=conn_info["sqlport"],
                                    user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')
    
        con = psycopg2.connect(dbname=conn_info["dbname"],
                                host=conn_info["sqlhost"],
                                port=conn_info["sqlport"],
                                user=conn_info["sqluser"],
                                password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres
Password: ········


In [9]:
#import patient data, if not exsit allpatientdata.csv file than please excute the patientdata.ipynb file first
df=pd.read_csv(os.getcwd()+'\\allpatientfile.csv')

In [10]:
def main():
    start = time.clock()
    ftName_0 = ['Nasal cannula O2', 'Compliance', 'FiO2', 'Mean airway pressure',
                'Minute ventilation', 'PC mode', 'Peak airway pressure', 'PEEP',
                'Plateau', 'Pressure support', 'Bipap', 'Respiratory rate', 
                'RSBI','SaO2', 'tidal volume', 'ROXindex']
    ftName = ['patientunitstayid']
    for i in range(0,len(ftName_0)):
        for j in range(0,30):
            ftName.append('%s D%02d'%(ftName_0[i], j+1))
    df_RC = pd.DataFrame(columns= ftName)
    th = 0
    for i in range(0,len(df)):
        print('%d/%d:%s'%(i,len(df),df['patientunitstayid'][i]),end='\r')
        df_RC = Respiratorycharting(df['patientunitstayid'][i], df['Firstday'][i], df['LoS'][i], th, df_RC, ftName_0)
        th=th+1
    df_RC.to_csv(os.getcwd()+'\\respiratorycharting.csv',index=False)
    end = time.clock() - start
    print(end)

In [11]:
#可調整輸出的方式
#adjust the extract data into output format
def Featrue_all_value_with_day(ft_name, day, df_respiratorycharting):
    temp_1 = [-0.001]
    for j in range(0,len(df_respiratorycharting.index)):
        if df_respiratorycharting['respchartvaluelabel'][j] == ft_name:  
            if df_respiratorycharting['date'][j] == day:
                if df_respiratorycharting['respchartvaluelabel'][j] == 'Bipap' and len(df_respiratorycharting['respchartvalue'][j])!=0:
                    temp_1.append(1)
                    continue
                temp_ex = df_respiratorycharting['respchartvalue'][j].strip(' ')
                temp_ex =  re.sub(u"([^\u0030-\u0039\.])","",temp_ex)
                if temp_ex.strip() == '':
                    continue
                else:
                    temp_1.append(float(temp_ex))
    if len(temp_1) != 1:
           temp_1.pop(0)
    return np.min(temp_1)

In [12]:
def Respiratorycharting(patientunitstayid, Firstday, lenofstay, th, df_RC, ftName_0):
    query = query_schema + """
    select *
    from respiratorycharting
    where patientunitstayid = '{}'
    order by respchartoffset
    """.format(int(patientunitstayid))

    df_respiratorycharting = pd.read_sql_query(query, con)
    df_respiratorycharting = df_respiratorycharting[['patientunitstayid','respchartoffset','respchartvaluelabel', 'respchartvalue']]

    #計算該row是第幾天
    #calculate event time by days
    date = np.zeros((len(df_respiratorycharting.index),), dtype=int)
    df_respiratorycharting['date'] = date
    for i in range(0,len(df_respiratorycharting.index)):
        temp = df_respiratorycharting['respchartoffset'][i]
        if temp < Firstday:
            df_respiratorycharting['date'][i] = 1
        elif temp >= Firstday:
            temp = temp - Firstday
            df_respiratorycharting['date'][i] = temp // (24*60) + 2
            
    #創建表格及預設值       
    #create sheet and default value
    space_row = [patientunitstayid]
    for i in range(len(ftName_0)):
        for j in range(0,30):
            if j >= int(lenofstay):
                space_row.append(' ')
            else:
                space_row.append(' ')
    droplist = []
    if df_respiratorycharting.size == 0:
        df_RC = df_RC.append(pd.Series(space_row, index=df_RC.columns), ignore_index=True)
    else:
        df_RC = df_RC.append(pd.Series(space_row, index=df_RC.columns), ignore_index=True)
        
        ##讀進來的名稱先轉小寫再進行比較，然後修改成需要的名稱格式
        #Trun the name in to lower class, than comparing. After that change it into specific format
        for i in range(0,len(df_respiratorycharting.index)):
            temp = df_respiratorycharting['respchartvaluelabel'][i].lower().strip(' ')
            if temp == 'lpm o2':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Nasal cannula O2'
                temp_ex = df_respiratorycharting['respchartvalue'][i].strip('%')
            elif temp == 'compliance' or temp == 'mechanical ventilator compliance' or temp == 'static compliance':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Compliance'
            elif temp == 'fio2' or temp == 'fio2 (%)' or temp == 'o2 percentage' or temp == 'set fraction of inspired oxygen (fio2)':
                df_respiratorycharting['respchartvaluelabel'][i] = 'FiO2'
                temp_ex = df_respiratorycharting['respchartvalue'][i].strip('%')
            elif temp == 'mean airway pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Mean airway pressure'
            elif temp == 'exhaled mv or temp' or temp == 'minute volume, spontaneous' or temp == 'measured ve':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Minute ventilation'
            elif temp == 'pressure control':
                df_respiratorycharting['respchartvaluelabel'][i] = 'PC mode'
            elif temp == 'peak insp. pressure' or temp == 'peak pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Peak airway pressure'
            elif temp == 'peep' or temp == 'peep/cpap':
                df_respiratorycharting['respchartvaluelabel'][i] = 'PEEP'
            elif temp == 'plateau pressure':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Plateau'
            elif temp == 'pressure support' or temp == 'PS':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Pressure support'
            elif temp == 'mechanical ventilator resistance':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Resistance'
            elif temp == 'vent rate' or temp == 'total rr' or temp == 'rr (patient)' or temp == 'resp rate total' or temp == 'f total' or temp == 'spontaneous respiratory rate' or temp == 'rr spont':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Respiratory rate'
            elif temp == 'total rsbi':
                df_respiratorycharting['respchartvaluelabel'][i] = 'RSBI'
            elif temp == 'sao2':
                df_respiratorycharting['respchartvaluelabel'][i] = 'SaO2'
            elif temp == 'bipap delivery mode':
                df_respiratorycharting['respchartvaluelabel'][i] = 'Bipap'
            elif temp == 'tv/kg ibw':
                if df['gender'][0] == 'Female':
                    PBW = 45.5 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = str(int(float(df_respiratorycharting['respchartvalue'][i]) * PBW))
                elif df['gender'][0] == 'Male':
                    PBW = 50.0 + 0.91 * (df['height'][0] - 152.4)
                    df_respiratorycharting['respchartvalue'][i] = str(int(float(df_respiratorycharting['respchartvalue'][i]) * PBW))
                df_respiratorycharting['respchartvaluelabel'][i] = 'tidal volume'
            elif temp == 'tidal volume observed (vt)' or temp == 'exhaled tv (patient)' or temp == 'tidal volume, delivered' or temp == 'exhaled vt' or temp == 'spont tv':
                df_respiratorycharting['respchartvaluelabel'][i] = 'tidal volume'
        
    #將計算好的值放入表格中
    #put the calculated value into sheet and call "Feature_all_value_with_day" function to turning it into specific format
        for i in range(0,len(ftName_0)):
            for j in range(0,30):
                if j >= int(lenofstay):
                    break
                else:
                    str_temp = '%s D%02d'%(ftName_0[i],j+1)
                    df_RC[str_temp][th] = Featrue_all_value_with_day(ftName_0[i],j+1,df_respiratorycharting)  
    return df_RC

In [13]:
if __name__ is '__main__':
    main()

  


0/2676:251510.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

1/2676:255084.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


21/2676:307232.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


25/2676:311838.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


137/2676:427977.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


145/2676:439487.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


993/2676:960746.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


1465/2676:1553458.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2446/2676:3204862.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2447/2676:3204871.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


6621.849893200037.0




In [14]:
#mask
df=pd.read_csv(os.getcwd()+'\\respiratorycharting.csv')
df= df.mask(df=='-0.001')
df= df.mask(df==-0.001)
df=df.mask(df==0)
df=df.mask(df==0.0)
df=df.mask(df=='0')
df=df.mask(df=='0.0')
df.mask(df==' ').to_csv(os.getcwd()+'\\respiratorycharting_mask.csv',index=False)

In [15]:
#filtering the outliner
df=pd.read_csv(os.getcwd()+'\\respiratorycharting_mask.csv')
for i in range(0,len(df.index)):
    for j in range(1,31):
        if df['Nasal cannula O2 D%02d'%j][i] > 6:
            df['Nasal cannula O2 D%02d'%j][i]=6

        if df['Nasal cannula O2 D%02d'%j][i] < 0:
            df['Nasal cannula O2 D%02d'%j][i]=0

        if df['Compliance D%02d'%j][i] > 100:
            df['Compliance D%02d'%j][i]=100

        if df['PEEP D%02d'%j][i]<0 or df['PEEP D%02d'%j][i]>25:
            df['PEEP D%02d'%j][i]=0

        if df['PEEP D%02d'%j][i]>25:
            df['PEEP D%02d'%j][i]=25

        if df['FiO2 D%02d'%j][i] > 100 or df['FiO2 D%02d'%j][i] < 20:
            df['FiO2 D%02d'%j][i]=100

        if df['FiO2 D%02d'%j][i] < 20:
            df['FiO2 D%02d'%j][i]=20
            
        if df['Mean airway pressure D%02d'%j][i] < 5:
            df['Mean airway pressure D%02d'%j][i] = 5
            
        if df['Mean airway pressure D%02d'%j][i] > 50:
            df['Mean airway pressure D%02d'%j][i]=50

        if df['Minute ventilation D%02d'%j][i] > 50:
            df['Minute ventilation D%02d'%j][i]=50

        if df['PC mode D%02d'%j][i] > 50:
            df['PC mode D%02d'%j][i]=50

        if df['Peak airway pressure D%02d'%j][i] > 50:
            df['Peak airway pressure D%02d'%j][i]=50

        if df['Plateau D%02d'%j][i] > 50:
            df['Plateau D%02d'%j][i]=50

        if df['Pressure support D%02d'%j][i] > 50:
            df['Pressure support D%02d'%j][i]=50

        if df['RSBI D%02d'%j][i] > 200:
            df['RSBI D%02d'%j][i]=200

        if df['Respiratory rate D%02d'%j][i] < 7:
            df['Respiratory rate D%02d'%j][i] = 7
            
        if df['Respiratory rate D%02d'%j][i] > 55:
            df['Respiratory rate D%02d'%j][i]=55

        if df['tidal volume D%02d'%j][i] < 20:
            df['tidal volume D%02d'%j][i] = 20
            
        if df['tidal volume D%02d'%j][i] > 1000:
            df['tidal volume D%02d'%j][i]=1000

df.to_csv(os.getcwd()+'\\respiratorycharting_mask.csv',index=False)

In [21]:
#complement 
df = pd.read_csv(os.getcwd()+'\\respiratorycharting_mask.csv')
ftName_0 = ['Nasal cannula O2', 'Compliance', 'FiO2', 'Mean airway pressure',
                'Minute ventilation', 'PC mode', 'Peak airway pressure', 'PEEP',
                'Plateau', 'Pressure support', 'Respiratory rate', 
                'RSBI','SaO2','tidal volume', 'ROXindex']

for i in range(0,len(df)):
    print('%d/%d:%s'%(i,len(df),df['patientunitstayid'][i]),end='\r')
    for j in ftName_0:
        current=0
        flag=0
        for day in range(1,31):
            if(not pd.isna(df['%s D%02d'%(j,day)][i]) and flag==0):
                flag=1#find first value of a month
                current=df['%s D%02d'%(j,day)][i]
                #print(current,flag,day)
                break
        if(flag==1):
            #print(123)
            for missing in range(1,day):
                df['%s D%02d'%(j,missing)][i]=current
            flag=2#if there is a null value than put the value of previous day into it
        #print(df['%s D%02d'%('Nasal cannula O2',1)][i])
        if(flag==2):
            for missing in range(2,31):
                if(pd.isna(df['%s D%02d'%(j,missing)][i]) and not pd.isna(df['%s D%02d'%(j,missing-1)][i])):
                    df['%s D%02d'%(j,missing)][i]=df['%s D%02d'%(j,missing-1)][i]
        mean=[]
        if(flag==0):
            for k in range(0,len(df)):
                mean.append(df['%s D01'%j][k])
            for day in range(1,31):
                df['%s D%02d'%(j,day)][i]=np.nanmean(mean)
df.to_csv('respiratorycharting_com.csv',index=False)
                

0/2676:251510.01/2676:255084.0



2675/2676:3353226.0

In [22]:
#insert target to show wheather the patient using ventilator or not
df = pd.read_csv(os.getcwd()+'\\respiratorycharting_com.csv')
d=[]
for i in range(0,len(df)):
    d.append(np.nan)
for i in range(1,31):
    df.insert(len(df.columns),"target D%02d"%(i),d)
df.to_csv(os.getcwd()+'\\respiratorycharting_com.csv',index=False)

In [23]:
#insert target to show wheather the patient using ventilator or not
df2 = pd.read_csv(os.getcwd()+'\\respiratorycharting_com.csv')#target
df = pd.read_csv(os.getcwd()+'\\respiratorycharting_mask.csv')

for i in range(0,len(df)):
    for k in range(1,31):
        if(not pd.isna(df['PEEP D%02d'%(k)][i])):
            df2['target D%02d'%(k)][i]=1
            '''
        elif(pd.isna(df['PEEP D%02d'%(k)][i])):
            if(not pd.isna(df['Bipap D%02d'%(k)][i]) or not pd.isna(df['Nasal cannula O2 D%02d'%(k)][i])):
            '''
        else:
            df2['target D%02d'%(k)][i]=0
for i in range(0,len(df)):
#i=2498
    flag=0
    start=0
    end=0
    for k in range(2,30):
        if(not pd.isna(df['PEEP D%02d'%k][i]) and flag==0 and pd.isna(df['PEEP D%02d'%(k+1)][i])):
            start=k
            flag=1
        if(not pd.isna(df['PEEP D%02d'%k][i]) and flag==1and pd.isna(df['PEEP D%02d'%(k-1)][i])):
            #if(not pd.isna(df['Bipap D%02d'%(k-1)][i]) or not pd.isna(df['Nasal cannula O2 D%02d'%(k-1)][i])):
            end=k
        #print(k,start,end)
        if(0<end-start<=5 and flag==1):
            for j in range(start,end):
                df2['target D%02d'%j][i]=1.0
            start=0
            end=0
            flag=0
df2.to_csv(os.getcwd()+'\\respiratorycharting_com.csv',index=False)