In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import time
from configobj import ConfigObj

In [5]:
# Create a posgresql database connection using settings from config file 

# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = '243'
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '243':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                    host=conn_info["sqlhost"],
                                    port=conn_info["sqlport"],
                                    user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')
    
        con = psycopg2.connect(dbname=conn_info["dbname"],
                                host=conn_info["sqlhost"],
                                port=conn_info["sqlport"],
                                user=conn_info["sqluser"],
                                password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres
Password: ········


In [7]:
#import patient data, if not exsit allpatientdata.csv file than please excute the patientdata.ipynb file first
df=pd.read_csv(os.getcwd()+'\\allpatientfile.csv')

In [8]:
def main():
    start = time.clock()
    #there are four features we need
    ftName_0 = ['Aspergillus', 'Candida', 'GNB', 'GPC']
    ftName = ['patientunitstayid']
    for i in range(0,len(ftName_0)):
        for j in range(0,30):
            ftName.append('%s D%02d'%(ftName_0[i], j+1))
    df_ML = pd.DataFrame(columns= ftName)
    th = 0
    for i in range(0,len(df)):
        print("%d/%d: %s"%(i,len(df),df['patientunitstayid'][i]),end='\r')
        df_ML = Microlab(df['patientunitstayid'][i], df['Firstday'][i], df['LoS'][i], th, df_ML, ftName_0)
        th=th+1
    df_ML.to_csv(os.getcwd()+'\\microlab.csv',index=False)
    end = time.clock() - start
    print(end)

In [9]:
#可調整輸出的方式
#adjust the extract data into output format
def Featrue_all_value_with_day(ft_name, day, df_microlab):
    temp_1 = [0]
    for j in range(0,len(df_microlab.index)):
        if df_microlab['organism'][j] == ft_name:  
                if df_microlab['date'][j] == day: 
                    temp_1.append(1)
    if len(temp_1) != 1:
        temp_1.pop(0)
    return np.max(temp_1)


In [10]:
def Microlab(patientunitstayid, Firstday, lenofstay, th, df_ML, ftName_0):
    query = query_schema + """
    select *
    from microlab
    where patientunitstayid = '{}'
    order by culturetakenoffset
    """.format(int(patientunitstayid))

    df_microlab = pd.read_sql_query(query, con)
    df_microlab = df_microlab[['patientunitstayid','culturetakenoffset','organism']]

    #計算該row是第幾天
    #calculate event time by days
    date = np.zeros((len(df_microlab.index),), dtype=int)
    df_microlab['date'] = date
    for i in range(0,len(df_microlab.index)):
        temp = df_microlab['culturetakenoffset'][i]
        if temp < Firstday:
            df_microlab['date'][i] = 1
        elif temp >= Firstday:
            temp = temp - Firstday
            df_microlab['date'][i] = temp // (24*60) + 2

    #創建表格及預設值
    #create sheet and default value
    space_row = [patientunitstayid]
    for i in range(len(ftName_0)):
        for j in range(0,30):
            if j >= int(lenofstay):
                space_row.append(-1)
            else:
                space_row.append(0)
    if df_microlab.size == 0:
        df_ML = df_ML.append(pd.Series(space_row, index=df_ML.columns), ignore_index=True)
    else:
        df_ML = df_ML.append(pd.Series(space_row, index=df_ML.columns), ignore_index=True)

        ##讀進來的名稱先轉小寫再進行比較，然後修改成需要的名稱格式
        #Trun the name in to lower class, than comparing. After that change it into specific format
        for i in range(0,len(df_microlab.index)):
            temp = df_microlab['organism'][i].lower().strip(' ')
            if temp == 'candida albicans' or temp == 'candida glabrata' or temp == 'candida parapsilosis' or temp == 'candida tropicalis' or temp == 'yeast':
                df_microlab['organism'][i] = 'Candida'
            elif temp == 'acinetobacter baumanii' or temp == 'bacteroides fragilis' or temp == 'campylobacter fetus' or temp == 'enterobacter cloacae' or temp == 'enterobacter sp.' or temp == 'escherichia coli' or temp == 'gram negative rods' or temp == 'haemophilus influenzae' or temp == 'klebsiella oxytoca' or temp == 'klebsiella pneumoniae' or temp == 'proteus mirabilis (indole +)' or temp == 'pseudomonas aeruginosa' or temp == 'serratia marcescens' or temp == 'stenotrophomonas maltophila':
                df_microlab['organism'][i] = 'GNB'
            elif temp == 'Aspergillus fumigatus':
                df_microlab['organism'][i] = 'Aspergillus'        
            elif temp == 'enterococcus faecalis' or temp == 'enterococcus fecium' or temp == 'gram positive cocci' or temp == 'gram positive cocci - in chains' or temp == 'gram positive cocci - in clusters' or temp == 'gram positive diplococci' or temp == 'staphylococcus aureus' or temp == 'staphylococcus hominis' or temp == 'streptococcus pneumoniae' or temp == 'streptococcus pyogenes' or temp == 'streptococcus species, other':
                df_microlab['organism'][i] = 'GPC'
                
        #將計算好的值放入表格中
        #put the calculated value into sheet and call "Feature_all_value_with_day" function to turning it into specific format
        for i in range(0,len(ftName_0)):
            for j in range(0,30):
                if j >= int(lenofstay):
                    break
                else:
                    str_temp = '%s D%02d'%(ftName_0[i],j+1)
                    df_ML[str_temp][th] = Featrue_all_value_with_day(ftName_0[i],j+1,df_microlab)  
    return df_ML

In [11]:
if __name__ is '__main__':
    main()

  


2359/2676: 3096377.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2360/2676: 3097187.02361/2676: 3097574.02362/2676: 3098272.02363/2676: 3100103.02364/2676: 3100461.02365/2676: 3101803.02366/2676: 3103378.02367/2676: 3103505.02368/2676: 3106481.02369/2676: 3108971.02370/2676: 3109267.02371/2676: 3112893.02372/2676: 3115193.02373/2676: 3117121.02374/2676: 3117247.02375/2676: 3117939.02376/2676: 3118107.02377/2676: 3120701.02378/2676: 3123953.02379/2676: 3124310.02380/2676: 3125093.02381/2676: 3125414.02382/2676: 3125415.02383/2676: 3125536.02384/2676: 3126102.02385/2676: 3126398.02386/2676: 3127325.02387/2676: 3127693.02388/2676: 3128486.02389/2676: 3128815.02390/2676: 3128816.02391/2676: 3128817.02392/2676: 3130750.02393/2676: 3131863.02394/2676: 3132909.02395/2676: 3134315.02396/2676: 3135068.02397/2676: 3136953.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


8.4299429000238896.0


  app.launch_new_instance()


In [12]:
#mask
df=pd.read_csv(os.getcwd()+'\\microlab.csv')
df= df.mask(df==-1)
df.mask(df==' ').to_csv(os.getcwd()+'\\microlab_mask.csv',index=False)

In [13]:
#imputation 補值
df2 = pd.read_csv(os.getcwd()+'\microlab_mask.csv')
ftName_0 = ['Aspergillus', 'Candida', 'GNB', 'GPC']

for i in range(0,len(df2)):
    print(df2['patientunitstayid'][i])
    for j in range(0,len(ftName_0)):
        start=2
        for day in range(start,31):
            if(pd.isna(df2[ftName_0[j]+" D%02d"%(day)][i])and df2[ftName_0[j]+" D%02d"%(day-1)][i]==0):
                start=day
                for k in range(start+1,31):
                    if(not pd.isna(df2[ftName_0[j]+" D%02d"%(k)][i]) and df2[ftName_0[j]+" D%02d"%(k)][i]==1):
                        temp=df2[ftName_0[j]+" D%02d"%(k)][i]
                        for com in range(start,k):
                            df2[ftName_0[j]+" D%02d"%(com)][i]=temp
                        day=k
                        break
                    else:
                        for com in range(start,k):
                            df2[ftName_0[j]+" D%02d"%(com)][i]=0
                        day=k
                        break
            elif(pd.isna(df2[ftName_0[j]+" D%02d"%(day)][i])and df2[ftName_0[j]+" D%02d"%(day-1)][i]==1):
                start=day
                #print(123)
                for k in range(start,31):
                    if(pd.isna(df2[ftName_0[j]+" D%02d"%(k)][i])):
                        df2[ftName_0[j]+" D%02d"%(k)][i]=1
                        if(k<30):
                            if(not pd.isna(df2[ftName_0[j]+" D%02d"%(k+1)][i])):
                                day=k
                                break
for i in range(0,len(df2)):
    for j in range(0,len(ftName_0)):                            
        for day in range(1,31):
            if(pd.isna((df2[ftName_0[j]+" D%02d"%(day)][i]))):
                df2[ftName_0[j]+" D%02d"%(day)][i]=df2[ftName_0[j]+" D%02d"%(day-1)][i]

df2.to_csv(os.getcwd()+'\\microlab_com.csv',index=False)

251510.0
255084.0
257541.0
258841.0
264445.0
264458.0
264459.0
264716.0
267580.0
272551.0
276269.0
277424.0
280609.0
282527.0
283919.0
288512.0
291580.0
292678.0
303932.0
304111.0
304112.0
307232.0
307233.0
310709.0
311060.0
311838.0
314472.0
315675.0
316110.0
316448.0
316820.0
317144.0
317597.0
317857.0
317917.0
319563.0
319656.0
320647.0
321556.0
323705.0
323900.0
326481.0
328870.0
329985.0
329997.0
330755.0
331961.0
333573.0
335033.0
335637.0
335866.0
336030.0
336291.0
337643.0
337886.0
340203.0
342637.0
343310.0
344692.0
345691.0
347236.0
349887.0
350148.0
351100.0
353173.0
353888.0
356510.0
356736.0
360614.0
360947.0
361518.0
362606.0
362904.0
363151.0
366048.0
366353.0
367300.0
368735.0
370199.0
370234.0
370755.0
371331.0
371615.0
371828.0
372372.0
373453.0
373663.0
373682.0
376110.0
376189.0
376239.0
377999.0
378048.0
378589.0
382032.0
382033.0
382551.0
383082.0
383198.0
383305.0
384523.0
384583.0
389297.0
389324.0
392433.0
393404.0
395242.0
397901.0
400311.0
400543.0
401481.0
4

933597.0
933813.0
934132.0
934303.0
934304.0
934344.0
935616.0
937161.0
937232.0
937428.0
937491.0
938863.0
939463.0
940700.0
941389.0
941784.0
941894.0
942809.0
942954.0
943176.0
943229.0
943847.0
944692.0
944694.0
945476.0
945517.0
946474.0
946535.0
947091.0
947151.0
947291.0
948052.0
948232.0
948251.0
948467.0
949655.0
949781.0
950897.0
952119.0
952312.0
952612.0
954165.0
954457.0
954829.0
954831.0
955568.0
955571.0
955846.0
955887.0
956763.0
957147.0
957800.0
957801.0
957987.0
958423.0
958514.0
958611.0
958612.0
958647.0
958719.0
958720.0
958860.0
958892.0
959106.0
959867.0
960238.0
960746.0
960778.0
960791.0
961241.0
961530.0
962179.0
962436.0
962806.0
962937.0
963490.0
963786.0
963868.0
965099.0
965216.0
965330.0
965603.0
965773.0
965947.0
965967.0
966370.0
966380.0
966801.0
967444.0
967715.0
967993.0
968356.0
968639.0
969769.0
969919.0
969934.0
970196.0
970932.0
971051.0
971204.0
971885.0
972000.0
972389.0
972709.0
972870.0
973219.0
974003.0
974484.0
975519.0
975837.0
976152.0
9

1780263.0
1780445.0
1780514.0
1780670.0
1780998.0
1781064.0
1781458.0
1781657.0
1781802.0
1783158.0
1783658.0
1785351.0
1785473.0
1785822.0
1785917.0
1785918.0
1786144.0
1786522.0
1787384.0
1788109.0
1788208.0
1788556.0
1788567.0
1788568.0
1788832.0
1789528.0
1789996.0
1790022.0
1790055.0
1792183.0
1792366.0
1792717.0
1792823.0
1793563.0
1793565.0
1793843.0
1794090.0
1794985.0
1795055.0
1795092.0
1795550.0
1796189.0
1796201.0
1796228.0
1797073.0
1797773.0
1800469.0
1801000.0
1801097.0
1801098.0
1801099.0
1801237.0
1801251.0
1801441.0
1802339.0
1802366.0
1803044.0
1803767.0
1804230.0
1805202.0
1805691.0
1805879.0
1806511.0
1806519.0
1806787.0
1806856.0
1806931.0
1807549.0
1807896.0
1808250.0
1808251.0
1808294.0
1808541.0
1808587.0
1809093.0
1809147.0
1809240.0
1809355.0
1809683.0
1809943.0
1810039.0
1810145.0
1811114.0
1811553.0
1812282.0
1813531.0
1814247.0
1814402.0
1814761.0
1815789.0
1816239.0
1817436.0
1817806.0
1817910.0
1818067.0
1818502.0
1819099.0
1820213.0
1820776.0
1820777.0


3338553.0
3338726.0
3338823.0
3339383.0
3340345.0
3340476.0
3341064.0
3341407.0
3341968.0
3342068.0
3342538.0
3342552.0
3342956.0
3343372.0
3343860.0
3344085.0
3344105.0
3344110.0
3344534.0
3344545.0
3344747.0
3344845.0
3344978.0
3345308.0
3345622.0
3345757.0
3346127.0
3346554.0
3346733.0
3346870.0
3347058.0
3347406.0
3347519.0
3348701.0
3348974.0
3349045.0
3349046.0
3349047.0
3349087.0
3349231.0
3349545.0
3349680.0
3349850.0
3350526.0
3350584.0
3350585.0
3350778.0
3350907.0
3350908.0
3350978.0
3351431.0
3351989.0
3352034.0
3352125.0
3352475.0
3352512.0
3352721.0
3352801.0
3353087.0
3353226.0
