In [1]:
import sqlite3
import matplotlib
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import utility as util
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, f1_score


In [2]:
conn = sqlite3.connect('D:\MIMIC2.db')

In [3]:
que = """

SELECT
  pvt.subject_id, pvt.hadm_id, pvt.icustay_id

  , min(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_min
  , max(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_max
  , min(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_min
  , max(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_max
  , min(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_min
  , max(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_max
  , min(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_min
  , max(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_max
  , min(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_min
  , max(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_max
  , min(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_min
  , max(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_max
  , min(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_min
  , max(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_max
  , min(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_min
  , max(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_max
  , min(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_min
  , max(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_max
  , min(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_min
  , max(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_max
  , min(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_min
  , max(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_max
  , min(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_min
  , max(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_max
  , min(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_min
  , max(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_max
  , min(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_min
  , max(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_max
  , min(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_min
  , max(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_max
  , min(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_min
  , max(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_max
  , min(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null END) as SODIUM_min
  , max(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null end) as SODIUM_max
  , min(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_min
  , max(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_max
  , min(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_min
  , max(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_max


FROM
(
  SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
  
  , CASE
        WHEN itemid = 50868 THEN 'ANION GAP'
        WHEN itemid = 50862 THEN 'ALBUMIN'
        WHEN itemid = 51144 THEN 'BANDS'
        WHEN itemid = 50882 THEN 'BICARBONATE'
        WHEN itemid = 50885 THEN 'BILIRUBIN'
        WHEN itemid = 50912 THEN 'CREATININE'
        WHEN itemid = 50806 THEN 'CHLORIDE'
        WHEN itemid = 50902 THEN 'CHLORIDE'
        WHEN itemid = 50809 THEN 'GLUCOSE'
        WHEN itemid = 50931 THEN 'GLUCOSE'
        WHEN itemid = 50810 THEN 'HEMATOCRIT'
        WHEN itemid = 51221 THEN 'HEMATOCRIT'
        WHEN itemid = 50811 THEN 'HEMOGLOBIN'
        WHEN itemid = 51222 THEN 'HEMOGLOBIN'
        WHEN itemid = 50813 THEN 'LACTATE'
        WHEN itemid = 51265 THEN 'PLATELET'
        WHEN itemid = 50822 THEN 'POTASSIUM'
        WHEN itemid = 50971 THEN 'POTASSIUM'
        WHEN itemid = 51275 THEN 'PTT'
        WHEN itemid = 51237 THEN 'INR'
        WHEN itemid = 51274 THEN 'PT'
        WHEN itemid = 50824 THEN 'SODIUM'
        WHEN itemid = 50983 THEN 'SODIUM'
        WHEN itemid = 51006 THEN 'BUN'
        WHEN itemid = 51300 THEN 'WBC'
        WHEN itemid = 51301 THEN 'WBC'
      ELSE null
    END AS label
  , 
    CASE
      WHEN itemid = 50862 and valuenum >    10 THEN null 
      WHEN itemid = 50868 and valuenum > 10000 THEN null 
      WHEN itemid = 51144 and valuenum <     0 THEN null 
      WHEN itemid = 51144 and valuenum >   100 THEN null 
      WHEN itemid = 50882 and valuenum > 10000 THEN null 
      WHEN itemid = 50885 and valuenum >   150 THEN null 
      WHEN itemid = 50806 and valuenum > 10000 THEN null 
      WHEN itemid = 50902 and valuenum > 10000 THEN null 
      WHEN itemid = 50912 and valuenum >   150 THEN null 
      WHEN itemid = 50809 and valuenum > 10000 THEN null 
      WHEN itemid = 50931 and valuenum > 10000 THEN null 
      WHEN itemid = 50810 and valuenum >   100 THEN null 
      WHEN itemid = 51221 and valuenum >   100 THEN null 
      WHEN itemid = 50811 and valuenum >    50 THEN null 
      WHEN itemid = 51222 and valuenum >    50 THEN null 
      WHEN itemid = 50813 and valuenum >    50 THEN null 
      WHEN itemid = 51265 and valuenum > 10000 THEN null 
      WHEN itemid = 50822 and valuenum >    30 THEN null 
      WHEN itemid = 50971 and valuenum >    30 THEN null 
      WHEN itemid = 51275 and valuenum >   150 THEN null 
      WHEN itemid = 51237 and valuenum >    50 THEN null 
      WHEN itemid = 51274 and valuenum >   150 THEN null 
      WHEN itemid = 50824 and valuenum >   200 THEN null 
      WHEN itemid = 50983 and valuenum >   200 THEN null 
      WHEN itemid = 51006 and valuenum >   300 THEN null 
      WHEN itemid = 51300 and valuenum >  1000 THEN null 
      WHEN itemid = 51301 and valuenum >  1000 THEN null 
    ELSE le.valuenum
    END AS valuenum

  FROM icustays ie

  LEFT JOIN labevents le
    ON le.subject_id = ie.subject_id AND le.hadm_id = ie.hadm_id
    AND le.charttime BETWEEN julianday(ie.intime) and julianday(ie.intime + 1)
    AND le.ITEMID in
    (
      50868, 
      50862, 
      51144, 
      50882, 
      50885, 
      50912, 
      50902, 
      50806, 
      50931, 
      50809, 
      51221, 
      50810, 
      51222, 
      50811, 
      50813, 
      51265, 
      50971, 
      50822, 
      51275, 
      51237, 
      51274, 
      50983, 
      50824, 
      51006, 
      51301, 
      51300  
    )
    AND valuenum IS NOT null AND valuenum > 0 -- lab values cannot be 0 and cannot be negative
) pvt
GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id
ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id;
"""

In [31]:
que = """
SELECT
  pvt.subject_id, pvt.hadm_id, pvt.icustay_id

  , min(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_min
  , max(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_max
  , min(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_min
  , max(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_max
  , min(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_min
  , max(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_max
  , min(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_min
  , max(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_max
  , min(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_min
  , max(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_max
  , min(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_min
  , max(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_max
  , min(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_min
  , max(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_max
  , min(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_min
  , max(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_max
  , min(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_min
  , max(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_max
  , min(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_min
  , max(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_max
  , min(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_min
  , max(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_max
  , min(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_min
  , max(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_max
  , min(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_min
  , max(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_max
  , min(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_min
  , max(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_max
  , min(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_min
  , max(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_max
  , min(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_min
  , max(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_max
  , min(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null END) as SODIUM_min
  , max(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null end) as SODIUM_max
  , min(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_min
  , max(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_max
  , min(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_min
  , max(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_max


FROM
( -- begin query that extracts the data
  SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
  -- here we assign labels to ITEMIDs
  -- this also fuses together multiple ITEMIDs containing the same data
  , CASE
        WHEN itemid = 50868 THEN 'ANION GAP'
        WHEN itemid = 50862 THEN 'ALBUMIN'
        WHEN itemid = 51144 THEN 'BANDS'
        WHEN itemid = 50882 THEN 'BICARBONATE'
        WHEN itemid = 50885 THEN 'BILIRUBIN'
        WHEN itemid = 50912 THEN 'CREATININE'
        WHEN itemid = 50806 THEN 'CHLORIDE'
        WHEN itemid = 50902 THEN 'CHLORIDE'
        WHEN itemid = 50809 THEN 'GLUCOSE'
        WHEN itemid = 50931 THEN 'GLUCOSE'
        WHEN itemid = 50810 THEN 'HEMATOCRIT'
        WHEN itemid = 51221 THEN 'HEMATOCRIT'
        WHEN itemid = 50811 THEN 'HEMOGLOBIN'
        WHEN itemid = 51222 THEN 'HEMOGLOBIN'
        WHEN itemid = 50813 THEN 'LACTATE'
        WHEN itemid = 51265 THEN 'PLATELET'
        WHEN itemid = 50822 THEN 'POTASSIUM'
        WHEN itemid = 50971 THEN 'POTASSIUM'
        WHEN itemid = 51275 THEN 'PTT'
        WHEN itemid = 51237 THEN 'INR'
        WHEN itemid = 51274 THEN 'PT'
        WHEN itemid = 50824 THEN 'SODIUM'
        WHEN itemid = 50983 THEN 'SODIUM'
        WHEN itemid = 51006 THEN 'BUN'
        WHEN itemid = 51300 THEN 'WBC'
        WHEN itemid = 51301 THEN 'WBC'
      ELSE null
    END AS label
  , -- add in some sanity checks on the values
  -- the where clause below requires all valuenum to be > 0, so these are only upper limit checks
    CASE
      WHEN itemid = 50862 and valuenum >    10 THEN null -- g/dL 'ALBUMIN'
      WHEN itemid = 50868 and valuenum > 10000 THEN null -- mEq/L 'ANION GAP'
      WHEN itemid = 51144 and valuenum <     0 THEN null -- immature band forms, %
      WHEN itemid = 51144 and valuenum >   100 THEN null -- immature band forms, %
      WHEN itemid = 50882 and valuenum > 10000 THEN null -- mEq/L 'BICARBONATE'
      WHEN itemid = 50885 and valuenum >   150 THEN null -- mg/dL 'BILIRUBIN'
      WHEN itemid = 50806 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
      WHEN itemid = 50902 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
      WHEN itemid = 50912 and valuenum >   150 THEN null -- mg/dL 'CREATININE'
      WHEN itemid = 50809 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
      WHEN itemid = 50931 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
      WHEN itemid = 50810 and valuenum >   100 THEN null -- % 'HEMATOCRIT'
      WHEN itemid = 51221 and valuenum >   100 THEN null -- % 'HEMATOCRIT'
      WHEN itemid = 50811 and valuenum >    50 THEN null -- g/dL 'HEMOGLOBIN'
      WHEN itemid = 51222 and valuenum >    50 THEN null -- g/dL 'HEMOGLOBIN'
      WHEN itemid = 50813 and valuenum >    50 THEN null -- mmol/L 'LACTATE'
      WHEN itemid = 51265 and valuenum > 10000 THEN null -- K/uL 'PLATELET'
      WHEN itemid = 50822 and valuenum >    30 THEN null -- mEq/L 'POTASSIUM'
      WHEN itemid = 50971 and valuenum >    30 THEN null -- mEq/L 'POTASSIUM'
      WHEN itemid = 51275 and valuenum >   150 THEN null -- sec 'PTT'
      WHEN itemid = 51237 and valuenum >    50 THEN null -- 'INR'
      WHEN itemid = 51274 and valuenum >   150 THEN null -- sec 'PT'
      WHEN itemid = 50824 and valuenum >   200 THEN null -- mEq/L == mmol/L 'SODIUM'
      WHEN itemid = 50983 and valuenum >   200 THEN null -- mEq/L == mmol/L 'SODIUM'
      WHEN itemid = 51006 and valuenum >   300 THEN null -- 'BUN'
      WHEN itemid = 51300 and valuenum >  1000 THEN null -- 'WBC'
      WHEN itemid = 51301 and valuenum >  1000 THEN null -- 'WBC'
    ELSE le.valuenum
    END AS valuenum

  FROM icustays ie

  LEFT JOIN labevents le
    ON le.subject_id = ie.subject_id AND le.hadm_id = ie.hadm_id
    AND le.ITEMID in
    (
      -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
      50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895
      50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697
      51144, -- BANDS - hematology
      50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733
      50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
      50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
      50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568
      50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187
      50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
      50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
      51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846
      50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715
      51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
      50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
      50813, -- LACTATE | BLOOD GAS | BLOOD | 187124
      51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444
      50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825
      50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946
      51275, -- PTT | HEMATOLOGY | BLOOD | 474937
      51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183
      51274, -- PT | HEMATOLOGY | BLOOD | 469090
      50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
      50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
      51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925
      51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
      51300  -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
    )
    AND valuenum IS NOT null AND valuenum > 0 -- lab values cannot be 0 and cannot be negative
) pvt
GROUP BY  pvt.hadm_id 
ORDER BY  pvt.hadm_id
"""

In [32]:
lab_first_day = pd.read_sql(que,conn)


In [33]:
lab_first_day

Unnamed: 0,subject_id,hadm_id,icustay_id,ANIONGAP_min,ANIONGAP_max,ALBUMIN_min,ALBUMIN_max,BANDS_min,BANDS_max,BICARBONATE_min,...,INR_min,INR_max,PT_min,PT_max,SODIUM_min,SODIUM_max,BUN_min,BUN_max,WBC_min,WBC_max
0,58526,100001,275225,9.0,30.0,,,,,11.0,...,1.0,1.0,12.1,12.1,135.0,145.0,16.0,48.0,6.1,15.9
1,54610,100003,209281,9.0,12.0,2.3,2.6,,,15.0,...,1.5,1.6,17.0,17.7,130.0,133.0,15.0,51.0,10.2,17.9
2,9895,100006,291788,6.0,19.0,2.0,2.0,21.0,21.0,24.0,...,1.3,3.3,13.9,22.2,128.0,134.0,12.0,23.0,7.3,16.6
3,23018,100007,217937,9.0,15.0,,,,,22.0,...,1.1,1.1,12.4,12.4,136.0,144.0,11.0,21.0,5.3,12.3
4,533,100009,253656,11.0,13.0,4.3,4.3,,,23.0,...,1.1,1.2,13.2,14.2,134.0,143.0,12.0,21.0,7.8,17.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57781,20785,199993,271752,7.0,19.0,3.3,3.3,10.0,10.0,21.0,...,1.0,1.6,12.2,15.4,123.0,139.0,8.0,40.0,7.3,22.6
57782,23761,199994,296723,12.0,18.0,3.0,3.0,7.0,7.0,17.0,...,1.2,1.3,13.0,13.7,123.0,141.0,12.0,36.0,6.3,14.8
57783,19412,199995,255092,10.0,16.0,3.5,3.8,,,24.0,...,1.0,1.8,12.6,16.8,132.0,141.0,9.0,18.0,5.7,11.4
57784,27200,199998,293589,8.0,16.0,4.2,4.2,,,21.0,...,1.0,1.3,11.7,14.8,135.0,141.0,14.0,25.0,4.8,12.4


In [35]:
neo_basic_label_df = pd.read_csv('../neo_basic_label_df.csv')

In [36]:
neo_basic_label_df

Unnamed: 0.1,Unnamed: 0,HADM_ID,SUBJECT_ID,30readflag
0,0,100023,1189,0
1,1,100025,4528,0
2,2,100029,14509,0
3,3,100044,677,0
4,4,100055,1549,0
...,...,...,...,...
8083,8083,198806,22036,0
8084,8084,198876,20709,0
8085,8085,198892,60,0
8086,8086,199319,6415,0


In [37]:
icustays = pd.read_sql("""SELECT * FROM icustays""", conn)

In [38]:
icustays[icustays['HADM_ID'].isin(neo_basic_label_df['HADM_ID'])]

Unnamed: 0,index,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,FIRST_CAREUNIT,LAST_CAREUNIT,FIRST_WARDID,LAST_WARDID,INTIME,OUTTIME,LOS
9,9,374,277,171601,272866,carevue,NICU,NICU,56,56,2132-10-21 21:11:46,2132-10-22 14:44:48,0.7313
11,11,376,280,123506,247496,carevue,NICU,NICU,56,56,2155-12-08 18:22:09,2155-12-22 17:10:45,13.9504
22,22,387,288,110921,216651,carevue,NICU,NICU,56,56,2191-05-19 12:47:10,2191-05-27 15:15:03,8.1027
32,32,397,297,107224,203620,carevue,NICU,NICU,56,56,2135-10-05 14:55:47,2135-10-05 17:05:59,0.0904
34,34,399,299,195143,204434,carevue,NICU,NICU,56,56,2176-07-01 19:09:04,2176-07-01 20:21:13,0.0501
...,...,...,...,...,...,...,...,...,...,...,...,...,...
47131,47131,39147,30958,132158,295710,carevue,NICU,NICU,56,56,2162-07-08 13:44:57,2162-08-19 00:12:00,41.4355
47132,47132,39148,30959,176199,292990,carevue,NICU,NICU,56,56,2114-06-30 18:25:53,2114-06-30 19:50:10,0.0585
47133,47133,39149,30960,159129,254748,carevue,NICU,NICU,56,56,2166-06-25 13:14:04,2166-07-07 11:03:32,11.9094
47134,47134,39150,30961,168662,275888,carevue,NICU,NICU,56,56,2159-07-14 02:47:27,2159-07-15 18:35:45,1.6585


In [39]:
lab_first_day['HADM_ID'] = lab_first_day['hadm_id'] 

In [40]:
### todo:看看merge 之後還剩多少 如果都沒剩就不要取first day

In [41]:
neo_lab_first_day = util.merge(lab_first_day,neo_basic_label_df,'right','HADM_ID')
neo_lab_first_day

Unnamed: 0.1,subject_id,hadm_id,icustay_id,ANIONGAP_min,ANIONGAP_max,ALBUMIN_min,ALBUMIN_max,BANDS_min,BANDS_max,BICARBONATE_min,...,SODIUM_min,SODIUM_max,BUN_min,BUN_max,WBC_min,WBC_max,HADM_ID,Unnamed: 0,SUBJECT_ID,30readflag
0,1189.0,100023.0,281211.0,,,,,6.0,6.0,,...,,,,,17.7,17.7,100023,0,1189,0
1,4528.0,100025.0,280315.0,,,,,,,,...,,,,,,,100025,1,4528,0
2,14509.0,100029.0,222038.0,,,,,,,,...,,,,,10.1,10.1,100029,2,14509,0
3,677.0,100044.0,289655.0,11.0,16.0,,,,,21.0,...,139.0,146.0,,,12.6,12.6,100044,3,677,0
4,1549.0,100055.0,215944.0,,,,,1.0,1.0,,...,,,,,17.3,17.3,100055,4,1549,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8083,,,,,,,,,,,...,,,,,,,119944,7569,16414,1
8084,,,,,,,,,,,...,,,,,,,140278,7693,20749,0
8085,,,,,,,,,,,...,,,,,,,150381,7750,29973,1
8086,,,,,,,,,,,...,,,,,,,169226,7873,11145,1


In [42]:
neo_lab_first_day.dropna(axis= 1, how = 'all')

Unnamed: 0.1,subject_id,hadm_id,icustay_id,ANIONGAP_min,ANIONGAP_max,ALBUMIN_min,ALBUMIN_max,BANDS_min,BANDS_max,BICARBONATE_min,...,SODIUM_min,SODIUM_max,BUN_min,BUN_max,WBC_min,WBC_max,HADM_ID,Unnamed: 0,SUBJECT_ID,30readflag
0,1189.0,100023.0,281211.0,,,,,6.0,6.0,,...,,,,,17.7,17.7,100023,0,1189,0
1,4528.0,100025.0,280315.0,,,,,,,,...,,,,,,,100025,1,4528,0
2,14509.0,100029.0,222038.0,,,,,,,,...,,,,,10.1,10.1,100029,2,14509,0
3,677.0,100044.0,289655.0,11.0,16.0,,,,,21.0,...,139.0,146.0,,,12.6,12.6,100044,3,677,0
4,1549.0,100055.0,215944.0,,,,,1.0,1.0,,...,,,,,17.3,17.3,100055,4,1549,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8083,,,,,,,,,,,...,,,,,,,119944,7569,16414,1
8084,,,,,,,,,,,...,,,,,,,140278,7693,20749,0
8085,,,,,,,,,,,...,,,,,,,150381,7750,29973,1
8086,,,,,,,,,,,...,,,,,,,169226,7873,11145,1


In [43]:
neo_lab_first_day.to_csv('neo_lab_first_day.csv',index =False)