# Neural Network - Days to Death (DDT)

This ML model predicts days from admission to death for the patients in the MIMIC dataset who expired

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Data Setup
Creates separate clean dataframes for both sets of patients - lived & died

In [2]:
labs = pd.read_csv('../Resources/labsNew.csv')
labs.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,2,163353.0,0.0,0.0,0.0,9.3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.1
1,3,145834.0,1.8,17.0,25.0,0.8,99.0,114.0,3.2,265.0,...,14.8,125.7,4.8,179.0,5.4,3.7,136.0,139.0,36.0,15.1
2,4,185777.0,2.8,17.0,24.0,2.2,97.0,0.0,0.5,0.0,...,12.3,31.3,3.2,207.0,3.1,0.0,135.0,0.0,9.0,9.7
3,5,178980.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,309.0,0.0,0.0,0.0,0.0,0.0,13.9
4,6,107064.0,2.7,17.0,16.0,0.2,107.0,95.0,3.5,106.0,...,12.5,55.2,4.1,198.0,4.9,4.2,135.0,135.0,86.0,22.7


In [3]:
died = pd.read_csv('../Resources/admissions_died.csv')
died.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,DAYS_TO_DEATH
0,30,31,128652,2108-08-22 23:27:00,2108-08-30 15:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,CATHOLIC,MARRIED,WHITE,,,STATUS EPILEPTICUS,1,1,7.647917
1,55,56,181711,2104-01-02 02:01:00,2104-01-08 10:30:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,NOT SPECIFIED,,WHITE,2104-01-01 23:59:00,2104-01-02 03:33:00,HEAD BLEED,1,1,6.353472
2,61,61,189535,2119-01-04 18:12:00,2119-02-03 01:35:00,EMERGENCY,CLINIC REFERRAL/PREMATURE,DEAD/EXPIRED,Private,,CATHOLIC,MARRIED,WHITE,,,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,1,1,29.307639
3,68,67,155252,2157-12-02 00:45:00,2157-12-02 03:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,JEWISH,SINGLE,WHITE,2157-12-01 20:45:00,2157-12-02 00:55:00,SUBARACHNOID HEMORRHAGE,1,1,0.131944
4,86,84,166401,2196-04-14 04:02:00,2196-04-17 13:42:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Private,,OTHER,MARRIED,WHITE,2196-04-13 22:23:00,2196-04-14 04:31:00,"GLIOBLASTOMA,NAUSEA",1,1,3.402778


In [4]:
died_df = pd.merge(died, labs, on='HADM_ID')
died_df.head()

Unnamed: 0,ROW_ID,SUBJECT_ID_x,HADM_ID,ADMITTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,30,31,128652,2108-08-22 23:27:00,2108-08-30 15:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,...,13.2,29.4,2.7,109.0,3.3,4.2,128.0,0.0,13.0,6.9
1,55,56,181711,2104-01-02 02:01:00,2104-01-08 10:30:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,...,13.0,27.0,3.9,210.0,4.0,3.4,128.0,138.0,21.0,10.0
2,61,61,189535,2119-01-04 18:12:00,2119-02-03 01:35:00,EMERGENCY,CLINIC REFERRAL/PREMATURE,DEAD/EXPIRED,Private,,...,11.1,28.7,2.7,21.0,3.3,0.0,139.0,0.0,17.0,0.1
3,68,67,155252,2157-12-02 00:45:00,2157-12-02 03:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,...,13.2,35.5,0.0,183.0,5.1,0.0,137.0,0.0,24.0,9.3
4,86,84,166401,2196-04-14 04:02:00,2196-04-17 13:42:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Private,,...,0.0,0.0,3.4,231.0,2.5,0.0,127.0,0.0,6.0,11.6


In [5]:
died_df.columns

Index(['ROW_ID', 'SUBJECT_ID_x', 'HADM_ID', 'ADMITTIME', 'DEATHTIME',
       'ADMISSION_TYPE', 'ADMISSION_LOCATION', 'DISCHARGE_LOCATION',
       'INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY',
       'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'DAYS_TO_DEATH', 'SUBJECT_ID_y', 'Albumin',
       'Anion Gap', 'Bicarbonate', 'Bilirubin, Total', 'Chloride',
       'Chloride, Whole Blood', 'Creatinine', 'Glucose_Blood_Gas',
       'Glucose_Chemistry', 'Hematocrit', 'Hematocrit, Calculated',
       'Hemoglobin_Blood_Gas', 'Hemoglobin_Hematology', 'INR(PT)', 'Lactate',
       'Magnesium', 'PT', 'PTT', 'Phosphate', 'Platelet Count', 'Potassium',
       'Potassium, Whole Blood', 'Sodium', 'Sodium, Whole Blood',
       'Urea Nitrogen', 'White Blood Cells'],
      dtype='object')

In [6]:
died_df = died_df.drop(['ROW_ID', 'SUBJECT_ID_x', 'SUBJECT_ID_y', 'HADM_ID', 'ADMITTIME', 'DEATHTIME', 'DISCHARGE_LOCATION',
                         'RELIGION', 'ETHNICITY', 'EDREGTIME', 'EDOUTTIME', 'HOSPITAL_EXPIRE_FLAG',
       'HAS_CHARTEVENTS_DATA', 'LANGUAGE'], axis = 1)

In [12]:
died_df_dummy = pd.get_dummies(died_df)

died_df_dummy = died_df_dummy[['DAYS_TO_DEATH', 'Chloride, Whole Blood','Glucose_Chemistry', 'Hematocrit, Calculated', 'Hemoglobin_Blood_Gas','Lactate', 'Potassium, Whole Blood', 
         'Sodium, Whole Blood', 'Anion Gap', 'Albumin','Bicarbonate', 'Bilirubin, Total', 'Creatinine', 'Chloride', 
         'Glucose_Blood_Gas', 'Magnesium','Phosphate', 'Potassium', 'Sodium', 'Urea Nitrogen', 'Hematocrit', 'Hemoglobin_Hematology',
         'Platelet Count', 'PTT', 'INR(PT)', 'PT', 'White Blood Cells']]

died_df_dummy.head()

Unnamed: 0,DAYS_TO_DEATH,"Chloride, Whole Blood",Glucose_Chemistry,"Hematocrit, Calculated",Hemoglobin_Blood_Gas,Lactate,"Potassium, Whole Blood","Sodium, Whole Blood",Anion Gap,Albumin,...,Potassium,Sodium,Urea Nitrogen,Hematocrit,Hemoglobin_Hematology,Platelet Count,PTT,INR(PT),PT,White Blood Cells
0,7.647917,0.0,110.0,0.0,0.0,1.4,4.2,0.0,9.0,2.7,...,3.3,128.0,13.0,30.0,10.6,109.0,29.4,1.2,13.2,6.9
1,6.353472,0.0,155.0,0.0,0.0,0.0,3.4,138.0,14.0,3.2,...,4.0,128.0,21.0,27.0,8.6,210.0,27.0,1.1,13.0,10.0
2,29.307639,0.0,98.0,0.0,0.0,2.7,0.0,0.0,9.0,3.0,...,3.3,139.0,17.0,24.0,8.6,21.0,28.7,0.8,11.1,0.1
3,0.131944,0.0,176.0,0.0,0.0,0.0,0.0,0.0,22.0,0.0,...,5.1,137.0,24.0,54.1,18.4,183.0,35.5,1.2,13.2,9.3
4,3.402778,0.0,119.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,...,2.5,127.0,6.0,35.1,12.5,231.0,0.0,0.0,0.0,11.6


In [13]:
# assignment and reshaping of DIED data
data = died_df_dummy.drop(["DAYS_TO_DEATH"], axis=1)
target = died_df_dummy["DAYS_TO_DEATH"].values.reshape(-1, 1)

print(f"data shape: {data.shape}")
print(f"target shape: {target.shape}")

data shape: (5818, 26)
target shape: (5818, 1)


# Days to Death Model

In [14]:
# Create test and train data and scale the data sets

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=1)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
from keras.utils import to_categorical
y_train_binary = to_categorical(y_train)
y_test_binary = to_categorical(y_test)

In [18]:
# Construct the NN

model = Sequential()
model.add(Dense(units=116, activation='relu', input_dim=26))
model.add(Dense(units=116, activation='relu'))
model.add(Dense(units=207, activation='softmax'))

In [19]:
# Run the NN

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_binary,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
 - 5s - loss: 3.5649 - acc: 0.1529
Epoch 2/60
 - 1s - loss: 3.0626 - acc: 0.1689
Epoch 3/60
 - 1s - loss: 3.0067 - acc: 0.1802
Epoch 4/60
 - 1s - loss: 2.9606 - acc: 0.1868
Epoch 5/60
 - 1s - loss: 2.9187 - acc: 0.1863
Epoch 6/60
 - 1s - loss: 2.8721 - acc: 0.1994
Epoch 7/60
 - 1s - loss: 2.8367 - acc: 0.2024
Epoch 8/60
 - 1s - loss: 2.7972 - acc: 0.2056
Epoch 9/60
 - 1s - loss: 2.7534 - acc: 0.2187
Epoch 10/60
 - 1s - loss: 2.7147 - acc: 0.2228
Epoch 11/60
 - 1s - loss: 2.6782 - acc: 0.2258
Epoch 12/60
 - 1s - loss: 2.6402 - acc: 0.2409
Epoch 13/60
 - 1s - loss: 2.6020 - acc: 0.2420
Epoch 14/60
 - 1s - loss: 2.5695 - acc: 0.2491
Epoch 15/60
 - 1s - loss: 2.5311 - acc: 0.2542
Epoch 16/60
 - 1s - loss: 2.4916 - acc: 0.2611
Epoch 17/60
 - 1s - loss: 2.4545 - acc: 0.2744
Epoch 18/60
 - 1s - loss: 2.4177 - acc: 0.2897
Epoch 19/60
 - 1s - loss: 2.3815 - acc: 0.2936
Epoch 20/60
 - 1s - loss: 2.3515 - acc: 0.3058
Epoch 21/60
 - 1s - loss: 2.3150 - acc: 0.3131
Epoch 22/60
 - 1s - lo

<keras.callbacks.History at 0x1a47578048>

In [20]:
# Looking at first 5 predictions

predictions = model.predict_classes(X_test[:5])

print(f"Predicted classes: {predictions}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [0 4 0 0 4]
Actual Labels: [array([5.30972222]), array([19.82986111]), array([12.95208333]), array([15.97986111]), array([9.25138889])]


In [24]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_binary, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

ValueError: Error when checking target: expected dense_6 to have shape (207,) but got array with shape (167,)

In [26]:
predictions = model.predict_classes(X_test_scaled)
y_test_arr = list(y_test_binary)

tp = 0
fp = 0
tn = 0
fn = 0

for i in range(0,14527):
    if predictions[i] == 0:
        if y_test_arr[i] == 0:
            tn += 1
        else:
            fn += 1
        
    elif predictions[i] == 1:
        if y_test_arr[i] == 0:
            fp += 1
        else:
            tp += 1
            
acc = (tp + tn) / 14528
pre = tp / (tp + fp)
rec = tp / (tp + fn)

f1 = 2 * ((pre * rec) / (pre + rec))
            
print(f"True Positives: {tp}")
print(f"False Positive: {fp}")
print(f"True Negatives: {tn}")
print(f"False Negatives: {fn}")
print(f"---------------------------------")
print(f"Accuracy: {round(acc,4)}")
print(f"Precision: {round(pre,4)}")
print(f"Recall: {round(rec,4)}")
print(f"f1 Score: {round(f1,4)}")

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()