In [73]:
import os
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# modelling libs
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, RMSprop, Adagrad
from tensorflow.keras.layers import Flatten, Dense, LSTM, Dropout, Bidirectional, Conv1D, MaxPooling1D, Input, concatenate
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [75]:
# ----------------------
# load biosignal data
# ----------------------

# biosignals
with open('biosignals.pkl','rb') as f:
    bioData = pickle.load(f)

# dropped indexes
with open('droppedIndices.pkl','rb') as f:
    dropIdx = pickle.load(f)
    print('dropped patient IDs: ', dropIdx)
    
# reduce length of biosignals for demo purposes
bioData = bioData[:,0:5000,:]
print('bioData shape: ', bioData.shape)

dropped patient IDs:  [1242, 4452]
bioData shape:  (6386, 5000, 3)


In [76]:
# --------------------
# load ehr data 
# --------------------

# load all ehr data
dataFile = r'C:\Users\ta1031742\OneDrive - Bose Corporation\Documents\Admin\Northeastern\DS5500-Capstone\Data\vitaldb\vitaldb-a-high-fidelity-multi-parameter-vital-signs-database-in-surgical-patients-1.0.0\clinical_data.csv'
data = pd.read_csv(dataFile)

# downselect features for demo purposes
ehrData = data[['sex', 'height', 'weight', 'bmi', 'asa', 'icu_days']]

# replace gender values
ehrData['sex'] = ehrData['sex'].replace(to_replace={'M': 0, 'F': 1})

# drop bad indexes from biosisgnals data
ehrData = ehrData.drop(index=dropIdx).reset_index(drop=True)

# fill missing values
ehrData = ehrData.fillna(value=-999)

print('ehrData shape: ', ehrData.shape)

ehrData shape:  (6386, 6)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ehrData['sex'] = ehrData['sex'].replace(to_replace={'M': 0, 'F': 1})


In [77]:
# ---------------------------
# create train, test sets
# ---------------------------

# randomly specify train, test indices
random.seed(42)
allIdx = np.linspace(0,6385,6385, dtype=int) # create list of all patient id's
testIdx = random.sample(range(1, len(df)), int(np.floor(len(df)*0.2))) # randomly generate test indices
trainIdx = list(set(allIdx) - set(testIdx)) # get train indices

# separate biosignal data into train, test sets
testBio = bioData[testIdx, :, :]
trainBio = bioData[trainIdx, :, :]
print('testBio shape: ', testBio.shape)
print('trainBio shape: ', trainBio.shape); print()

# separate y (icu_days) from data
y = ehrData['icu_days']
ehrData = ehrData.drop(columns='icu_days')

# separate y into train, test sets
yTest = y[testIdx]
yTrain = y[trainIdx]
print('yTest shape: ', yTest.shape)
print('yTrain shape: ', yTrain.shape)

# separate ehr data into train, test sets
ehrData = ehrData.to_numpy()
testEhr = ehrData[testIdx, :]
trainEhr = ehrData[trainIdx, :]
print('testEhr shape: ', testEhr.shape)
print('trainEhr shape: ', trainEhr.shape); print()

testBio shape:  (1277, 5000, 3)
trainBio shape:  (5108, 5000, 3)

yTest shape:  (1277,)
yTrain shape:  (5108,)
testEhr shape:  (1277, 5)
trainEhr shape:  (5108, 5)



In [71]:
# ---------------------
# build model
# ---------------------

maxLen = 5000 # length of biosignal data (change to 28215 if you keep the original length of the biosignals)

seq_inp = Input(shape=(maxLen,3)) # input layer for sequential data (biosignal data)
nonseq_inp = Input(shape=(trainEhr.shape[1],)) # input layer for non-sequential data (ehr data)
x = Bidirectional(LSTM(10, input_shape=(trainBio.shape[1], trainBio.shape[2])))(seq_inp) # sequential data goes through lstm
x = concatenate([x, nonseq_inp]) # then concatenate with non-seq data
x = Dense(32, activation="relu")(x) # dense layer
out = Dense(1, activation="sigmoid")(x) # output layer
model = Model(inputs=[seq_inp, nonseq_inp], outputs=out)

model.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 5000, 3)]    0           []                               
                                                                                                  
 bidirectional_1 (Bidirectional  (None, 20)          1120        ['input_5[0][0]']                
 )                                                                                                
                                                                                                  
 input_6 (InputLayer)           [(None, 5)]          0           []                               
                                                                                                  
 concatenate_1 (Concatenate)    (None, 25)           0           ['bidirectional_1[0][0]',  

In [72]:
# -------
# train
# -------
model.fit([trainBio, trainEhr], yTrain, epochs=10, batch_size=256, validation_split=0.2)

  return t[start:end]


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x215d6efbe20>