# Neural Network Flag Classifier
-------------------------------------------
#### This neural network will use a dataset containing twenty-six separate input variables that are the initial blood lab tests taken upon a patient's admission to an Intensive Care Unit. Each input variable is a flag that indicates whether the particular test returned abnormal or not. Tests that were not performed are marked as "no test". The neural network will take in these twenty-six input variables and attempt to predict whether the patient died while in the ICU.

In [1]:
# Import all necessary dependencies

import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Data Pre-Processing
------------
#### Below is the process for combining the CSVs containing our desired information, and data cleaning so that it may be used in the neural network

In [2]:
# Load in and display the labs.csv file
labsData = pd.read_csv('../../labs.csv')

labsData.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,2,163353.0,no test,no test,no test,normal,no test,no test,no test,no test,...,no test,no test,no test,abnormal,no test,no test,no test,no test,no test,abnormal
1,3,145834.0,abnormal,normal,normal,normal,abnormal,abnormal,abnormal,abnormal,...,abnormal,normal,abnormal,normal,abnormal,normal,normal,normal,abnormal,abnormal
2,4,185777.0,abnormal,normal,normal,abnormal,normal,no test,normal,no test,...,normal,normal,normal,normal,abnormal,no test,normal,no test,normal,delta
3,5,178980.0,no test,no test,no test,no test,no test,no test,no test,no test,...,no test,no test,no test,normal,no test,no test,no test,no test,no test,normal
4,6,107064.0,abnormal,abnormal,abnormal,normal,normal,abnormal,abnormal,abnormal,...,abnormal,abnormal,abnormal,normal,abnormal,normal,normal,normal,abnormal,normal


In [3]:
# Load in the PATIENTS.csv
patientsData = pd.read_csv('../../Resources/PATIENTS.csv')

# Eliminate all columns in the resulting dataframe except for the SUBJECT_ID and EXPIRE_FLAG columns
patientsData = patientsData[['SUBJECT_ID','EXPIRE_FLAG']]

# Display the dataframe
patientsData.head()

Unnamed: 0,SUBJECT_ID,EXPIRE_FLAG
0,249,0
1,250,1
2,251,0
3,252,0
4,253,0


In [4]:
# Join the two dataframes together on SUBJECT_ID
joinedData = labsData.merge(patientsData, on='SUBJECT_ID')

# Display the joined dataframe
joinedData.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells,EXPIRE_FLAG
0,2,163353.0,no test,no test,no test,normal,no test,no test,no test,no test,...,no test,no test,abnormal,no test,no test,no test,no test,no test,abnormal,0
1,3,145834.0,abnormal,normal,normal,normal,abnormal,abnormal,abnormal,abnormal,...,normal,abnormal,normal,abnormal,normal,normal,normal,abnormal,abnormal,1
2,4,185777.0,abnormal,normal,normal,abnormal,normal,no test,normal,no test,...,normal,normal,normal,abnormal,no test,normal,no test,normal,delta,0
3,5,178980.0,no test,no test,no test,no test,no test,no test,no test,no test,...,no test,no test,normal,no test,no test,no test,no test,no test,normal,0
4,6,107064.0,abnormal,abnormal,abnormal,normal,normal,abnormal,abnormal,abnormal,...,abnormal,abnormal,normal,abnormal,normal,normal,normal,abnormal,normal,0


In [5]:
# Import the sklearn LabelEncoder
from sklearn.preprocessing import LabelEncoder

# Create a variable to hold the input variables, being sure to drop the SUBJECT_ID, HADM_ID & EXPIRE_FLAG columns
x_data = joinedData.drop(['SUBJECT_ID','HADM_ID','EXPIRE_FLAG'], axis=1)

# Encode the x data using the LabelEncoder
encodedData = x_data.apply(LabelEncoder().fit_transform)

# Display the encoded data
encodedData.head()

Unnamed: 0,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,Glucose_Chemistry,Hematocrit,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,1,1,1,3,1,1,2,1,1,0,...,1,1,2,0,1,1,1,1,1,0
1,0,2,2,3,0,0,0,0,2,0,...,0,2,0,3,0,2,2,2,0,0
2,0,2,2,0,2,1,3,1,0,0,...,2,2,3,3,0,1,2,1,2,1
3,1,1,1,2,1,1,2,1,1,3,...,1,1,2,3,1,1,1,1,1,3
4,0,0,0,3,2,0,0,0,0,0,...,0,0,0,3,0,2,2,2,0,3


In [6]:
# Print the shape of the data to ensure uniformity
X = encodedData
y = joinedData['EXPIRE_FLAG']
print(X.shape, y.shape)

(58112, 26) (58112,)


# Train Test Split
---------
#### Below splits the above data into train and test groups for training the neural network

In [7]:
# Import train_test_split from sklearn and to_categorical from keras
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

# Split the data between test and training data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Perform y One-Hot Encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Neural Network Model Creation
--------
#### Below is the construction and training process for the neural network. This neural network will be a deep, sequential neural net, employing relu and softmax activation functions, the adam optimizer and the categorical crossentropy loss function. There will be four layers from input to output, with each hidden layer containing fourteen nodes.

### Note:
##### If you wish to test an existing model, and do not want to create a new model, do not run the next four cells. The four cells below are for building new models. Just below them is where one can load existing models for testing.

In [8]:
# Define the model and add all desired layers to the neural network
model = Sequential()
model.add(Dense(units=14, activation='relu', input_dim=26))
model.add(Dense(units=14, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [9]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Set the parameters for fitting the model
model.fit(
    X_train,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
 - 8s - loss: 0.5923 - acc: 0.6824
Epoch 2/60
 - 4s - loss: 0.5641 - acc: 0.7022
Epoch 3/60
 - 4s - loss: 0.5563 - acc: 0.7041
Epoch 4/60
 - 4s - loss: 0.5500 - acc: 0.7068
Epoch 5/60
 - 4s - loss: 0.5442 - acc: 0.7084
Epoch 6/60
 - 4s - loss: 0.5407 - acc: 0.7112
Epoch 7/60
 - 4s - loss: 0.5389 - acc: 0.7090
Epoch 8/60
 - 4s - loss: 0.5365 - acc: 0.7097
Epoch 9/60
 - 4s - loss: 0.5352 - acc: 0.7107
Epoch 10/60
 - 4s - loss: 0.5339 - acc: 0.7123
Epoch 11/60
 - 4s - loss: 0.5333 - acc: 0.7118
Epoch 12/60
 - 4s - loss: 0.5321 - acc: 0.7126
Epoch 13/60
 - 4s - loss: 0.5312 - acc: 0.7146
Epoch 14/60
 - 4s - loss: 0.5311 - acc: 0.7125
Epoch 15/60
 - 4s - loss: 0.5304 - acc: 0.7148
Epoch 16/60
 - 4s - loss: 0.5296 - acc: 0.7147
Epoch 17/60
 - 4s - loss: 0.5295 - acc: 0.7157
Epoch 18/60
 - 4s - loss: 0.5290 - acc: 0.7157
Epoch 19/60
 - 4s - loss: 0.5284 - acc: 0.7166
Epoch 20/60
 - 4s - loss: 0.5278 - acc: 0.7176
Epoch 21/60
 - 4s - loss: 0.5275 - acc: 0.7158
Epoch 22/60
 - 4s - lo

<keras.callbacks.History at 0x1a3509e438>

In [10]:
# Evaluate the model and print the loss and accuracy scores to check model performance
model_loss, model_accuracy = model.evaluate(
    X_test, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.5327542816752379, Accuracy: 0.713931718061674


In [11]:
# Compare the first five predictions to the actual values for evaluative purposes
predictions = model.predict_classes(X_test[:5])

print(f"Predicted classes: {predictions}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [0 0 1 1 0]
Actual Labels: [1, 0, 1, 1, 0]


# Save New Model
----------
#### If a new model proves satisfactory, the below code will save it for future use. Be sure to give it a unique name so it won't overwrite an existing saved model.

In [None]:
model.save("NeuralNetworkModels/FlagModel#1.h5)

# Existing Models
---------
#### Below is the code for loading an existing model for testing. Be sure to run the first seven cells of the notebook before running the code below.

In [12]:
# To test a model, load the desired model and then run the cells below

# ENTER THE NAME OF THE DESIRED MODEL TO LOAD HERE
NNModel = "FlagModel#1.h5"

from keras.models import load_model
first_model = load_model(f"NeuralNetworkModels/{NNModel}")

In [13]:
# Evaluate the model and print the loss and accuracy scores to check model performance
model_loss, model_accuracy = first_model.evaluate(
    X_test, y_test_categorical, verbose=2)
print(f"Loaded Model Neural Network - Loss: {model_loss}, Loaded Model Accuracy: {model_accuracy}")

Loaded Model Neural Network - Loss: 0.5282498722822131, Loaded Model Accuracy: 0.7144823788546255


In [14]:
# Create a confusion matrix for the model

# Predict the classes using the predict_classes method and create an array with the actual classes
predictions = first_model.predict_classes(X_test)
y_test_arr = list(y_test)

# Create placeholder variables for the confusion matrix, initialize them at zero
tp = 0
fp = 0
tn = 0
fn = 0

# Create a for loop that ranges the length of the test data
for i in range(0,14527):
    
    # Use conditionals to compare the predicted class to the actual class and increment the appropriate variable
    # depending on if the prediction was correct
    if predictions[i] == 0:
        if y_test_arr[i] == 0:
            tn += 1
        else:
            fn += 1
        
    elif predictions[i] == 1:
        if y_test_arr[i] == 0:
            fp += 1
        else:
            tp += 1

            
# Calculate the accuracy, precision and recall scores and save to variables
acc = (tp + tn) / 14528
pre = tp / (tp + fp)
rec = tp / (tp + fn)

# Calculate the f1 score
f1 = 2 * ((pre * rec) / (pre + rec))
  
# Print out the values to see the results of the confusion matrix
print(f"True Positives: {tp}")
print(f"False Positive: {fp}")
print(f"True Negatives: {tn}")
print(f"False Negatives: {fn}")
print(f"---------------------------------")
print(f"Accuracy: {round(acc,4)}")
print(f"Precision: {round(pre,4)}")
print(f"Recall: {round(rec,4)}")
print(f"f1 Score: {round(f1,4)}")

True Positives: 3178
False Positive: 1760
True Negatives: 7201
False Negatives: 2388
---------------------------------
Accuracy: 0.7144
Precision: 0.6436
Recall: 0.571
f1 Score: 0.6051
