# Neural Network - Length of Stay (LOS)

This ML model predicts days from admission to discharge for the patients in the MIMIC dataset who lived

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Data Setup
Creates separate clean dataframes for both sets of patients - lived & died

In [2]:
# read in labs

labs = pd.read_csv('../Resources/labsNew.csv')
labs.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,2,163353.0,0.0,0.0,0.0,9.3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.1
1,3,145834.0,1.8,17.0,25.0,0.8,99.0,114.0,3.2,265.0,...,14.8,125.7,4.8,179.0,5.4,3.7,136.0,139.0,36.0,15.1
2,4,185777.0,2.8,17.0,24.0,2.2,97.0,0.0,0.5,0.0,...,12.3,31.3,3.2,207.0,3.1,0.0,135.0,0.0,9.0,9.7
3,5,178980.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,309.0,0.0,0.0,0.0,0.0,0.0,13.9
4,6,107064.0,2.7,17.0,16.0,0.2,107.0,95.0,3.5,106.0,...,12.5,55.2,4.1,198.0,4.9,4.2,135.0,135.0,86.0,22.7


In [3]:
# read in patients that survived

lived = pd.read_csv('../Resources/admissions_survived.csv')
lived.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,LENGTH_OF_STAY
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,1.144444
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1,5.496528
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1,6.768056
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1,2.856944
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1,3.534028


In [4]:
# join labs.csv with the admissions_survived.csv

lived_df = pd.merge(lived, labs, on='HADM_ID')
lived_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID_x,HADM_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,...,12.4,30.1,3.7,259.0,4.4,0.0,140.0,0.0,17.0,5.1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,...,17.5,42.0,3.2,95.0,3.9,3.6,143.0,140.0,14.0,9.4
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,...,11.9,26.6,3.0,216.0,4.2,3.5,140.0,133.0,16.0,10.9
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,...,12.4,22.5,3.8,215.0,4.1,0.0,139.0,0.0,13.0,9.8
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,...,12.8,34.6,4.5,269.0,3.3,0.0,134.0,0.0,50.0,12.2


In [5]:
# Check on what we have in our df

lived_df.columns

Index(['ROW_ID', 'SUBJECT_ID_x', 'HADM_ID', 'ADMITTIME', 'DISCHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'LENGTH_OF_STAY', 'SUBJECT_ID_y', 'Albumin',
       'Anion Gap', 'Bicarbonate', 'Bilirubin, Total', 'Chloride',
       'Chloride, Whole Blood', 'Creatinine', 'Glucose_Blood_Gas',
       'Glucose_Chemistry', 'Hematocrit', 'Hematocrit, Calculated',
       'Hemoglobin_Blood_Gas', 'Hemoglobin_Hematology', 'INR(PT)', 'Lactate',
       'Magnesium', 'PT', 'PTT', 'Phosphate', 'Platelet Count', 'Potassium',
       'Potassium, Whole Blood', 'Sodium', 'Sodium, Whole Blood',
       'Urea Nitrogen', 'White Blood Cells'],
      dtype='object')

In [6]:
# Get rid of attributes we don't need and/or are messing up the model

#lived_df = lived_df.drop(['ROW_ID', 'SUBJECT_ID_x', 'HADM_ID', 'DIAGNOSIS', 'ADMITTIME', 'MARITAL_STATUS', 'DISCHTIME', 'DISCHARGE_LOCATION',
#                         'ADMISSION_TYPE','ADMISSION_LOCATION','INSURANCE','RELIGION', 'ETHNICITY', 'EDREGTIME', 'EDOUTTIME', 'HOSPITAL_EXPIRE_FLAG',
#       'HAS_CHARTEVENTS_DATA', 'SUBJECT_ID_y', 'LANGUAGE'], axis = 1)

In [7]:
lived_df = lived_df[['LENGTH_OF_STAY',
                     'Chloride, Whole Blood',
                     'Glucose_Chemistry',
                     'Hematocrit, Calculated',
                     'Hemoglobin_Blood_Gas',
                     'Lactate',
                     'Potassium, Whole Blood',
                     'Sodium, Whole Blood',
                     'Anion Gap',
                     'Albumin',
                     'Bicarbonate',
                     'Bilirubin, Total',
                     'Creatinine',
                     'Chloride',
                     'Glucose_Blood_Gas',
                     'Magnesium',
                     'Phosphate',
                     'Potassium',
                     'Sodium',
                     'Urea Nitrogen',
                     'Hematocrit',
                     'Hemoglobin_Hematology',
                     'Platelet Count',
                     'PTT',
                     'INR(PT)',
                     'PT',
                     'White Blood Cells'
                    ]]

In [8]:
# Encode non-numeric values

from sklearn.preprocessing import LabelEncoder

x_data = lived_df.drop(['LENGTH_OF_STAY'], axis=1)
encodedData = x_data.apply(LabelEncoder().fit_transform)
encodedData.head()

Unnamed: 0,"Chloride, Whole Blood",Glucose_Chemistry,"Hematocrit, Calculated",Hemoglobin_Blood_Gas,Lactate,"Potassium, Whole Blood","Sodium, Whole Blood",Anion Gap,Albumin,Bicarbonate,...,Potassium,Sodium,Urea Nitrogen,Hematocrit,Hemoglobin_Hematology,Platelet Count,PTT,INR(PT),PT,White Blood Cells
0,0,89,0,0,0,0,0,14,0,33,...,31,38,17,249,95,256,141,9,37,53
1,0,115,27,82,0,27,36,13,0,26,...,25,41,14,109,52,92,260,26,90,132
2,39,128,37,115,90,26,29,14,32,32,...,29,38,16,279,107,213,104,7,32,167
3,0,87,37,117,0,0,0,15,0,27,...,27,37,13,294,114,212,62,7,37,142
4,0,365,23,71,63,0,0,20,23,26,...,19,32,50,269,104,266,186,7,41,187


In [9]:
# Round the y data
lived_df['LENGTH_OF_STAY'] = lived_df['LENGTH_OF_STAY'].round()

In [32]:
# Shape the data

X = encodedData
y = lived_df['LENGTH_OF_STAY']

from keras.utils import to_categorical
y_binary = to_categorical(y)

print(f"data shape: {X.shape}")
print(f"target shape: {y.shape}")

data shape: (52294, 26)
target shape: (52294,)


# Length of Stay Model

In [33]:
# Create test and train data and scale the data sets

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, random_state=1)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [46]:
# Construct the NN

model = Sequential()
model.add(Dense(units=161, activation='relu', input_dim=26))
model.add(Dense(units=161, activation='relu'))
model.add(Dense(units=296, activation='softmax'))

In [47]:
# Run the NN

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train,
    epochs=120,
    shuffle=True,
    verbose=2
)

Epoch 1/120
 - 12s - loss: 3.0369 - acc: 0.1397
Epoch 2/120
 - 11s - loss: 2.8830 - acc: 0.1531
Epoch 3/120
 - 10s - loss: 2.8467 - acc: 0.1555
Epoch 4/120
 - 10s - loss: 2.8208 - acc: 0.1629
Epoch 5/120
 - 10s - loss: 2.7993 - acc: 0.1638
Epoch 6/120
 - 11s - loss: 2.7784 - acc: 0.1682
Epoch 7/120
 - 11s - loss: 2.7580 - acc: 0.1719
Epoch 8/120
 - 11s - loss: 2.7371 - acc: 0.1773
Epoch 9/120
 - 11s - loss: 2.7159 - acc: 0.1789
Epoch 10/120
 - 11s - loss: 2.6945 - acc: 0.1860
Epoch 11/120
 - 11s - loss: 2.6746 - acc: 0.1895
Epoch 12/120
 - 10s - loss: 2.6517 - acc: 0.1938
Epoch 13/120
 - 10s - loss: 2.6323 - acc: 0.1974
Epoch 14/120
 - 10s - loss: 2.6101 - acc: 0.2036
Epoch 15/120
 - 11s - loss: 2.5921 - acc: 0.2078
Epoch 16/120
 - 11s - loss: 2.5721 - acc: 0.2131
Epoch 17/120
 - 11s - loss: 2.5530 - acc: 0.2173
Epoch 18/120
 - 11s - loss: 2.5358 - acc: 0.2198
Epoch 19/120
 - 11s - loss: 2.5178 - acc: 0.2237
Epoch 20/120
 - 11s - loss: 2.5021 - acc: 0.2290
Epoch 21/120
 - 11s - loss: 2

<keras.callbacks.History at 0xb41b74b00>

In [48]:
# Looking at first 5 predictions

#predictions = model.predict_classes(X_test[:5])
predictions = np.argmax(model.predict(X_test), axis=-1)
for x in range(5):
    print(f"Predicted Label: {predictions[x]} | Actual Label: {np.argmax(y_test[x])}")

Predicted Label: 3 | Actual Label: 17
Predicted Label: 0 | Actual Label: 2
Predicted Label: 3 | Actual Label: 8
Predicted Label: 8 | Actual Label: 7
Predicted Label: 3 | Actual Label: 4


In [49]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loaded Model Neural Network - Loss: {model_loss}, Loaded Model Accuracy: {model_accuracy}")

Loaded Model Neural Network - Loss: 4.282653732221025, Loaded Model Accuracy: 0.1310998929178102


# Save the NN model

In [15]:
model.save("NeuralNetworkModels/LOS_model.h5")

In [None]:
# To test a model, load the desired model and then run the cells below

# ENTER THE NAME OF THE DESIRED MODEL TO LOAD HERE
NNModel = "LabValueModel#1.h5"

from keras.models import load_model
first_model = load_model(f"NeuralNetworkModels/{NNModel}")

In [None]:
model_loss, model_accuracy = first_model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loaded Model Neural Network - Loss: {model_loss}, Loaded Model Accuracy: {model_accuracy}")

In [None]:
# Insert Confusion Matrix

In [None]:
# Insert AUROC