# Build LSTM model

## Read data

In [1]:
import os
from pathlib import Path

import pandas as pd


dataDirName = Path(os.environ['DATA_DIR'], 'sepsis_prediction', 'lstm_initial_trials', '02_data_matrix')
dataDirName.mkdir(exist_ok=True, parents=True)

dataDf = pd.read_csv(Path(dataDirName, 'data_temp_variables.csv'))
dataDf = dataDf.sort_values(by=['PATIENT_ID', 'EPISODE_ID', 'measurement_datetime'], ascending=True)
dataDf

Unnamed: 0,PATIENT_ID,EPISODE_ID,measurement_datetime,concept_name,value_mean,value_std,value_open,value_high,value_low,value_close,Organism_FIRST_NOTED,sepsis,target
0,18849,12790706,2019-09-01,Temperature,37.477778,1.605286,36.9,40.6,35.5,35.5,2019-09-01,0.0,0.0
1,18849,12790706,2019-09-02,Temperature,36.875000,0.853913,36.4,38.1,36.2,36.8,2019-09-01,0.0,0.0
2,18849,12790706,2019-09-03,Temperature,36.750000,0.695222,36.1,37.7,36.1,37.7,2019-09-01,0.0,0.0
3,41308,14824675,2021-05-29,Temperature,36.576819,0.000000,0.0,0.0,0.0,0.0,2021-05-31,0.0,0.0
4,41308,14824675,2021-05-30,Temperature,36.576819,0.000000,0.0,0.0,0.0,0.0,2021-05-31,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
228981,2684042,17565390,2023-03-31,Temperature,36.576819,0.000000,0.0,0.0,0.0,0.0,2023-04-01,0.0,0.0
228982,2684042,17565390,2023-04-01,Temperature,36.100000,0.089443,36.1,36.2,36.0,36.0,2023-04-01,0.0,0.0
228983,2684042,17565632,2023-03-30,Temperature,36.576819,0.000000,0.0,0.0,0.0,0.0,2023-04-01,0.0,0.0
228984,2684042,17565632,2023-03-31,Temperature,36.576819,0.000000,0.0,0.0,0.0,0.0,2023-04-01,0.0,0.0


In [2]:
dataDf[dataDf.target == 1][200:210]

Unnamed: 0,PATIENT_ID,EPISODE_ID,measurement_datetime,concept_name,value_mean,value_std,value_open,value_high,value_low,value_close,Organism_FIRST_NOTED,sepsis,target
7015,309825,481728,2017-11-21,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2017-11-05,1.0,1.0
7016,309825,481728,2017-11-22,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2017-11-05,1.0,1.0
7017,309825,481728,2017-11-23,Temperature,36.0,0.0,36.0,36.0,36.0,36.0,2017-11-05,1.0,1.0
7021,310094,499315,2018-01-23,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2018-01-18,1.0,1.0
7022,310094,499315,2018-01-24,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2018-01-18,1.0,1.0
7023,310094,499315,2018-01-25,Temperature,36.4,0.0,36.4,36.4,36.4,36.4,2018-01-18,1.0,1.0
7041,311670,373021,2017-02-15,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2017-01-02,1.0,1.0
7042,311670,373021,2017-02-16,Temperature,36.576819,0.0,0.0,0.0,0.0,0.0,2017-01-02,1.0,1.0
7043,311670,373021,2017-02-17,Temperature,36.0,0.0,36.0,36.0,36.0,36.0,2017-01-02,1.0,1.0
7079,313406,13355118,2020-03-11,Temperature,37.514286,0.569879,38.6,38.6,36.9,37.3,2020-03-11,1.0,1.0


In [14]:
rowsX = []
rowsY = []
for name, group in dataDf.groupby(by=['PATIENT_ID', 'EPISODE_ID']):
    low = 0
    high = 3
    while high <= group.shape[0]:
        rowsX.append(
            [[*x] for x in zip(list(group.value_mean[low: high]), list(group.value_std[low: high]), list(group.value_open[low: high]), list(group.value_high[low: high]), list(group.value_low[low: high]), list(group.value_close[low: high]))]
            )
        rowsY.append(group.target[(high - 1): high].values[0])
        high += 1
        low += 1

In [15]:
rowsX

[[[37.47777777777778, 1.605286405737134, 36.9, 40.6, 35.5, 35.5],
  [36.875, 0.8539125638299683, 36.4, 38.1, 36.2, 36.8],
  [36.75, 0.6952217871538089, 36.1, 37.7, 36.1, 37.7]],
 [[36.57681874218282, 0.0, 0.0, 0.0, 0.0, 0.0],
  [36.57681874218282, 0.0, 0.0, 0.0, 0.0, 0.0],
  [29.9, 0.0, 29.9, 29.9, 29.9, 29.9]],
 [[36.26666666666667, 0.37859388972002, 36.1, 36.7, 36.0, 36.0],
  [36.73333333333333, 0.461880215351699, 37.0, 37.0, 36.2, 36.2],
  [36.8, 0.2828427124746155, 37.0, 37.0, 36.6, 36.6]],
 [[36.73333333333333, 0.461880215351699, 37.0, 37.0, 36.2, 36.2],
  [36.8, 0.2828427124746155, 37.0, 37.0, 36.6, 36.6],
  [21.65, 21.14249275747777, 6.7, 36.6, 6.7, 36.6]],
 [[36.8, 0.2828427124746155, 37.0, 37.0, 36.6, 36.6],
  [21.65, 21.14249275747777, 6.7, 36.6, 6.7, 36.6],
  [36.1, 0.0, 36.1, 36.1, 36.1, 36.1]],
 [[21.65, 21.14249275747777, 6.7, 36.6, 6.7, 36.6],
  [36.1, 0.0, 36.1, 36.1, 36.1, 36.1],
  [36.35, 0.2121320343559622, 36.5, 36.5, 36.2, 36.2]],
 [[36.1, 0.0, 36.1, 36.1, 36.1, 36

In [16]:
import numpy as np


trainX = np.array(rowsX)
testX = np.array(rowsX)
trainY = np.array(rowsY)
testY = np.array(rowsY)


In [17]:
trainX.shape, trainY.shape

((189056, 3, 6), (189056,))

In [19]:
# LSTM for international airline passengers problem with time step regression framing
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error


look_back = 3

model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 6)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(trainX, trainY, epochs=10, batch_size=1, verbose=2)
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

Epoch 1/10
189056/189056 - 184s - 972us/step - accuracy: 0.9452 - loss: 0.2123
Epoch 2/10
189056/189056 - 180s - 955us/step - accuracy: 0.9454 - loss: 0.2120
Epoch 3/10
189056/189056 - 174s - 922us/step - accuracy: 0.9454 - loss: 0.2106
Epoch 4/10
189056/189056 - 171s - 905us/step - accuracy: 0.9454 - loss: 0.2103
Epoch 5/10
189056/189056 - 171s - 903us/step - accuracy: 0.9454 - loss: 0.2104
Epoch 6/10
189056/189056 - 170s - 902us/step - accuracy: 0.9454 - loss: 0.2102
Epoch 7/10
189056/189056 - 170s - 901us/step - accuracy: 0.9454 - loss: 0.2100
Epoch 8/10
189056/189056 - 171s - 903us/step - accuracy: 0.9454 - loss: 0.2100
Epoch 9/10
189056/189056 - 172s - 911us/step - accuracy: 0.9454 - loss: 0.2102
Epoch 10/10
189056/189056 - 171s - 906us/step - accuracy: 0.9454 - loss: 0.2103
[1m5908/5908[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 501us/step
[1m5908/5908[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 525us/step


In [20]:
[x[0] for x in trainPredict]

[np.float32(0.07469904),
 np.float32(0.040050186),
 np.float32(0.074856795),
 np.float32(0.04648021),
 np.float32(0.04085309),
 np.float32(0.040050186),
 np.float32(0.07483187),
 np.float32(0.07486712),
 np.float32(0.074793756),
 np.float32(0.07485902),
 np.float32(0.0748017),
 np.float32(0.07484045),
 np.float32(0.07481022),
 np.float32(0.07484005),
 np.float32(0.07481977),
 np.float32(0.074817896),
 np.float32(0.07485183),
 np.float32(0.07486111),
 np.float32(0.07485514),
 np.float32(0.074845254),
 np.float32(0.07483777),
 np.float32(0.074864596),
 np.float32(0.040050186),
 np.float32(0.07482802),
 np.float32(0.07484702),
 np.float32(0.074838184),
 np.float32(0.0748422),
 np.float32(0.07486199),
 np.float32(0.074816585),
 np.float32(0.074713096),
 np.float32(0.074722394),
 np.float32(0.07482852),
 np.float32(0.0748291),
 np.float32(0.07484072),
 np.float32(0.07483598),
 np.float32(0.07482719),
 np.float32(0.07478351),
 np.float32(0.07481262),
 np.float32(0.074804),
 np.float32(0.0748

In [21]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


print('accuracy_score', accuracy_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('balanced_accuracy_score', balanced_accuracy_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('precision_score', precision_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('recall_score', recall_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('precision_recall_fscore_support', precision_recall_fscore_support(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('f1_score', f1_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))
print('roc_auc_score', roc_auc_score(trainY, [1 if (x[0] > 0.5) else 0 for x in trainPredict]))

accuracy_score 0.9454024204468517
balanced_accuracy_score 0.5
precision_score 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


recall_score 0.0
precision_recall_fscore_support (array([0.94540242, 0.        ]), array([1., 0.]), array([0.97193507, 0.        ]), array([178734,  10322]))
f1_score 0.0
roc_auc_score 0.5
