In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import sklearn as skm
%matplotlib inline

from sklearn.utils import resample
import math
import pandas as pd
import random
from sklearn.metrics import f1_score,roc_auc_score


In [None]:
def getpatientstay(patient,df):
  return df['Patient_ID'].value_counts().get(patient, 0)

In [None]:
# Load Date
df =pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/train_set_interpolation_with_multivariate.csv')


In [None]:
### DataFrame funcs

for patient in df.Patient_ID.unique():
  patient = int(patient)
  # Get patient length stay#
  patient_stay = getpatientstay(patient,df)
  if patient_stay<=12:
    df = df.loc[df['Patient_ID'] != patient]
  num_sepsis_readings= df.loc[df['Patient_ID'] == patient, 'SepsisLabel'].sum()
  if num_sepsis_readings <6:
    df = df.loc[df['Patient_ID'] != patient]


# return patient cleaned df and seperate into train and test sets
# y = df.SepsisLabel
# X = df.drop(['SepsisLabel'],axis=1)


In [None]:
df.to_csv('cleanedLSTMdata.csv', index=False)

In [None]:
# Define the proportion of data to be allocated to each set
train_proportion = 0.7
val_proportion = 0.15
test_proportion = 0.15

# Load the data into a pandas dataframe
df = pd.read_csv("/content/drive/MyDrive/DataScience/project/cleanedLSTMdata.csv")


# Group the data by patient ID
grouped = df.groupby("Patient_ID")

# Shuffle the groups
grouped = grouped.sample(frac=1, random_state=42)

# Split the shuffled groups into train, validation, and test sets
train_size = int(len(grouped) * train_proportion)
val_size = int(len(grouped) * val_proportion)
test_size = int(len(grouped) * test_proportion)

train_groups = grouped.head(train_size).index.values
val_groups = grouped[train_size:train_size+val_size].index.values
test_groups = grouped[-test_size:].index.values

# Concatenate the rows from each group in each set back into a single dataframe
train_df = df.loc[train_groups]
val_df = df.loc[val_groups]
test_df = df.loc[test_groups]

In [None]:
### Define model
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout
# model_lstm = Sequential()
# model_lstm.add(LSTM(20,activation='tanh', return_sequences=True,input_shape=(12, train_df.shape[1]-1)))
# model_lstm.add(LSTM(20, return_sequences=True))
# model_lstm.add(Dropout(0.2))
# model_lstm.add(LSTM(6))
# model_lstm.add(Dense(1))

# model_lstm.compile(loss='mse', optimizer='adam')
# model_lstm.summary()


model = tf.keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(1, 12, train_df.shape[1]-1)))
model.add(tf.keras.layers.Conv2D(64, 1, activation="relu"))
model.add(tf.keras.layers.MaxPooling2D(2, padding="same"))
model.add(tf.keras.layers.LSTM(64, activation="tanh"))
model.add(tf.keras.layers.LSTM(32, activation="tanh"))
model.add(Dropout(0.2))
model.add(tf.keras.layers.Flatten())
model.add(Dense(1))


model.compile(loss='mse', optimizer='adam')
model.summary()
# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(6))
# model.add(tf.keras.layers.Activation("softmax"))


In [None]:
y_val, y_test = val_df.SepsisLabel, test_df.SepsisLabel
X_val,X_test = val_df.drop(['SepsisLabel'],axis=1), test_df.drop(['SepsisLabel'],axis=1)


# Initialise model
# Get patients list
windowsize = 12
epochs = 50
batches_per_patient = 30
for epoch in range(epochs):
  print('Epoch: ',epoch)
  for patient in train_df.Patient_ID.unique():
    print('Patient: ',patient)
    # Get patients data
    patient = int(patient)
    patient_df = train_df[train_df.Patient_ID==patient]
    for batch_per_patient in range(batches_per_patient):
      # Randomly choose a starting integer, take the next 12 readigns as a batch
      # Fit model for this batch
      patient_stay = patient_df.shape[0]
      batch_start = random.randint(0,patient_stay-windowsize)
      batch_df = df.iloc[batch_start:batch_start+windowsize, :]
      y_batch = batch_df.SepsisLabel
      X_batch = batch_df.drop(['SepsisLabel'],axis=1).values
      X_batch = X_batch.reshape(1,windowsize,batch_df.shape[1]-1)
      y_batch = y_batch.values.reshape(1,windowsize)
      model.fit(X_batch,y_batch,epochs=3,verbose=0)
      # print(lstm_history)


  # Validate model
  if epoch % 5 != 0:
        continue
  # Validate model using val data
  preds = model.predict(X_val)
  print('| F1 Score: ',f1_score(y_val,preds,average = None), ' | roc auc score: ', roc_auc_score(y_val,preds))

In [None]:
epoch