# Neural Network - Length of Stay (LOS)

This ML model predicts days from admission to discharge for the patients in the MIMIC dataset who lived

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Data Setup
Creates separate clean dataframes for both sets of patients - lived & died

In [2]:
# read in labs

labs = pd.read_csv('../Resources/labsNew.csv')
labs.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,2,163353.0,0.0,0.0,0.0,9.3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.1
1,3,145834.0,1.8,17.0,25.0,0.8,99.0,114.0,3.2,265.0,...,14.8,125.7,4.8,179.0,5.4,3.7,136.0,139.0,36.0,15.1
2,4,185777.0,2.8,17.0,24.0,2.2,97.0,0.0,0.5,0.0,...,12.3,31.3,3.2,207.0,3.1,0.0,135.0,0.0,9.0,9.7
3,5,178980.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,309.0,0.0,0.0,0.0,0.0,0.0,13.9
4,6,107064.0,2.7,17.0,16.0,0.2,107.0,95.0,3.5,106.0,...,12.5,55.2,4.1,198.0,4.9,4.2,135.0,135.0,86.0,22.7


In [3]:
# read in patients that survived

lived = pd.read_csv('../Resources/admissions_survived.csv')
lived.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,LENGTH_OF_STAY
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,1.144444
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1,5.496528
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1,6.768056
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1,2.856944
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1,3.534028


In [4]:
# join labs.csv with the admissions_survived.csv

lived_df = pd.merge(lived, labs, on='HADM_ID')
lived_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID_x,HADM_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,...,12.4,30.1,3.7,259.0,4.4,0.0,140.0,0.0,17.0,5.1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,...,17.5,42.0,3.2,95.0,3.9,3.6,143.0,140.0,14.0,9.4
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,...,11.9,26.6,3.0,216.0,4.2,3.5,140.0,133.0,16.0,10.9
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,...,12.4,22.5,3.8,215.0,4.1,0.0,139.0,0.0,13.0,9.8
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,...,12.8,34.6,4.5,269.0,3.3,0.0,134.0,0.0,50.0,12.2


In [5]:
# Check on what we have in our df

lived_df.columns

Index(['ROW_ID', 'SUBJECT_ID_x', 'HADM_ID', 'ADMITTIME', 'DISCHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'LENGTH_OF_STAY', 'SUBJECT_ID_y', 'Albumin',
       'Anion Gap', 'Bicarbonate', 'Bilirubin, Total', 'Chloride',
       'Chloride, Whole Blood', 'Creatinine', 'Glucose_Blood_Gas',
       'Glucose_Chemistry', 'Hematocrit', 'Hematocrit, Calculated',
       'Hemoglobin_Blood_Gas', 'Hemoglobin_Hematology', 'INR(PT)', 'Lactate',
       'Magnesium', 'PT', 'PTT', 'Phosphate', 'Platelet Count', 'Potassium',
       'Potassium, Whole Blood', 'Sodium', 'Sodium, Whole Blood',
       'Urea Nitrogen', 'White Blood Cells'],
      dtype='object')

In [6]:
# Get rid of attributes we don't need and/or are messing up the model

lived_df = lived_df.drop(['ROW_ID', 'SUBJECT_ID_x', 'HADM_ID', 'DIAGNOSIS', 'ADMITTIME', 'MARITAL_STATUS', 'DISCHTIME', 'DISCHARGE_LOCATION',
                         'RELIGION', 'ETHNICITY', 'EDREGTIME', 'EDOUTTIME', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'SUBJECT_ID_y', 'LANGUAGE'], axis = 1)

In [11]:
# Encode non-numeric values

from sklearn.preprocessing import LabelEncoder

x_data = lived_df.drop(['LENGTH_OF_STAY'], axis=1)
encodedData = x_data.apply(LabelEncoder().fit_transform)
encodedData.head()

Unnamed: 0,ADMISSION_TYPE,ADMISSION_LOCATION,INSURANCE,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,1,2,3,0,14,33,4,41,0,6,...,37,141,36,256,31,0,38,0,17,53
1,0,4,2,0,13,26,0,50,0,7,...,90,260,31,92,25,27,41,36,14,132
2,1,5,2,32,14,32,8,42,39,7,...,32,104,29,213,29,26,38,29,16,167
3,1,5,3,0,15,27,5,43,0,9,...,37,62,37,212,27,0,37,0,13,142
4,1,2,3,23,20,26,4,33,0,16,...,41,186,44,266,19,0,32,0,50,187


In [21]:
# Round the y data
lived_df['LENGTH_OF_STAY'] = lived_df['LENGTH_OF_STAY'].round()

In [22]:
# Shape the data

X = encodedData
y = lived_df['LENGTH_OF_STAY']
print(f"data shape: {X.shape}")
print(f"target shape: {y.shape}")

data shape: (52294, 29)
target shape: (52294,)


# Length of Stay Model

In [24]:
# Create test and train data and scale the data sets

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [25]:
from keras.utils import to_categorical
y_train_binary = to_categorical(y_train)
y_test_binary = to_categorical(y_test)

In [28]:
# Construct the NN

model = Sequential()
model.add(Dense(units=14, activation='relu', input_dim=29))
model.add(Dense(units=14, activation='relu'))
model.add(Dense(units=296, activation='softmax'))

In [29]:
# Run the NN

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_binary,
    epochs=70,
    shuffle=True,
    verbose=2
)

Epoch 1/70
 - 8s - loss: 3.2619 - acc: 0.1232
Epoch 2/70
 - 7s - loss: 2.9659 - acc: 0.1369
Epoch 3/70
 - 7s - loss: 2.9341 - acc: 0.1418
Epoch 4/70
 - 7s - loss: 2.9166 - acc: 0.1449
Epoch 5/70
 - 7s - loss: 2.9041 - acc: 0.1466
Epoch 6/70
 - 7s - loss: 2.8933 - acc: 0.1486
Epoch 7/70
 - 7s - loss: 2.8834 - acc: 0.1500
Epoch 8/70
 - 7s - loss: 2.8746 - acc: 0.1511
Epoch 9/70
 - 7s - loss: 2.8669 - acc: 0.1524
Epoch 10/70
 - 7s - loss: 2.8611 - acc: 0.1516
Epoch 11/70
 - 9s - loss: 2.8557 - acc: 0.1549
Epoch 12/70
 - 12s - loss: 2.8520 - acc: 0.1521
Epoch 13/70
 - 11s - loss: 2.8491 - acc: 0.1554
Epoch 14/70
 - 11s - loss: 2.8464 - acc: 0.1545
Epoch 15/70
 - 12s - loss: 2.8437 - acc: 0.1557
Epoch 16/70
 - 9s - loss: 2.8424 - acc: 0.1551
Epoch 17/70
 - 8s - loss: 2.8399 - acc: 0.1554
Epoch 18/70
 - 7s - loss: 2.8383 - acc: 0.1562
Epoch 19/70
 - 7s - loss: 2.8372 - acc: 0.1563
Epoch 20/70
 - 7s - loss: 2.8351 - acc: 0.1560
Epoch 21/70
 - 7s - loss: 2.8344 - acc: 0.1567
Epoch 22/70
 - 8s 

<keras.callbacks.History at 0xb35f23518>

In [33]:
# Looking at first 5 predictions

predictions = model.predict_classes(X_test[:5])

print(f"Predicted classes: {predictions}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [2 2 1 3 1]
Actual Labels: [17.0, 2.0, 8.0, 7.0, 4.0]


# Save the NN model

In [None]:
model.save("NeuralNetworkModels/LOS_model.h5")

In [None]:
# To test a model, load the desired model and then run the cells below

# ENTER THE NAME OF THE DESIRED MODEL TO LOAD HERE
NNModel = "LabValueModel#1.h5"

from keras.models import load_model
first_model = load_model(f"NeuralNetworkModels/{NNModel}")

In [None]:
model_loss, model_accuracy = first_model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loaded Model Neural Network - Loss: {model_loss}, Loaded Model Accuracy: {model_accuracy}")

In [None]:
# Insert Confusion Matrix

In [None]:
# Insert AUROC