In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import sklearn as skm
%matplotlib inline

In [None]:
from sklearn.utils import resample
import math
import pandas as pd

def balance_patient(df_patient,max_patient_stay):
    values = df_patient.SepsisLabel.value_counts()
    if values.shape[0]<=1:
      return

    if values[1] > int(sum(values)/2) and values[0]!=0 and sum(values)<max_patient_stay:
      df_minority = df_patient[df_patient.SepsisLabel==0]
      df_majority = df_patient[df_patient.SepsisLabel==1]
    elif values[1] <= int(sum(values)/2) and values[0]!=0 and sum(values)<max_patient_stay:
      df_minority = df_patient[df_patient.SepsisLabel==1]
      df_majority = df_patient[df_patient.SepsisLabel==0]
    else:
      return None
    df_minority_upsampled = resample(df_minority, 
                                 replace=True,     # sample with replacement
                                 n_samples=max_patient_stay,    # to match majority class
                                 random_state=123) # reproducible results

    return pd.concat([df_majority, df_minority_upsampled])


def get_patient_df(df,patient_ID):
    return df[df.Patient_ID==patient_ID]


def import_data(filename):
    return pd.read_csv(filename)


def get_max_patient_stay(df):
    return np.max(df.Patient_ID.value_counts())



def create_ts_data(dataset, lookback, predicted_col):
    temp=dataset.copy()
    temp["id"]= range(1, len(temp)+1)
    temp = temp.iloc[:-lookback, :]
    temp.set_index('id', inplace =True)
    predicted_value=dataset.copy()
    predicted_value = predicted_value.iloc[lookback:,predicted_col]
    predicted_value.columns=["Predicted"]
    predicted_value= pd.DataFrame(predicted_value)
    
    predicted_value["id"]= range(1, len(predicted_value)+1)
    predicted_value.set_index('id', inplace =True)
    final_df= pd.concat([temp, predicted_value], axis=1)
    #final_df.columns = ['var1(t-1)', 'var2(t-1)', 'var3(t-1)', 'var4(t-1)', 'var5(t-1)', 'var6(t-1)', 'var7(t-1)', 'var8(t-1)','var1(t)']
    #final_df.set_index('Date', inplace=True)
    return final_df


def normalise_data(df):
    df_values = df.values
    # ensure all data is float
    df_values = df_values.astype('float32') 
    # normalizing input features
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df_values)
    # Refit to dataframe
    return pd.DataFrame(df_scaled)


def make_3d_df(num_patients, max_patient_stay,num_features):
    # Create a MultiIndex with three levels
    index = pd.MultiIndex.from_product([range(num_patients), range(max_patient_stay), range(num_features)], names=['sample', 'time_step', 'feature'])

    # Create an empty 3D DataFrame with the MultiIndex
    return pd.DataFrame(index=index, columns=['value']), index


def build_3d_ts_df(df):
    max_patient_stay = get_max_patient_stay(df)
    final_data,index = make_3d_df(max(df.Patient_ID.unique()),max_patient_stay,df.shape[0])

    for patient in df.Patient_ID.unique():
      patient_data = get_patient_df(df, patient)
      patient_data = balance_patient(patient_data, max_patient_stay)
      if patient_data is None:
        continue
      patient_data = create_ts_data(dataset=df, lookback=1, predicted_col=df.columns.get_loc("SepsisLabel"))

      patient_data = normalise_data(patient_data)

      X, y = patient_data[:, :-1], patient_data[:, -1]

      new_df = pd.DataFrame(X.reshape(-1, 1), index=index[patient*max_patient_stay*df.shape[0]:(patient+1)*max_patient_stay*df.shape[0]], columns=['value'])
      final_data = final_data.append(new_df)
    return final_data

In [None]:
# file = '/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/train_set_interpolation_with_constant.csv'
file = '/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/train_set_interpolation_with_multivariate.csv'
df = import_data(file)

In [None]:
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout
model_lstm = Sequential()
model_lstm.add(LSTM(20,activation='tanh', return_sequences=True,input_shape=(get_max_patient_stay(df), 32)))
model_lstm.add(LSTM(20, return_sequences=True))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(6))
model_lstm.add(Dense(1))

model_lstm.compile(loss='mse', optimizer='adam')
model_lstm.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 336, 20)           4240      
                                                                 
 lstm_1 (LSTM)               (None, 336, 20)           3280      
                                                                 
 dropout (Dropout)           (None, 336, 20)           0         
                                                                 
 lstm_2 (LSTM)               (None, 6)                 648       
                                                                 
 dense (Dense)               (None, 1)                 7         
                                                                 
Total params: 8,175
Trainable params: 8,175
Non-trainable params: 0
_________________________________________________________________


In [None]:
max_patient_stay = get_max_patient_stay(df)
# final_data,index = make_3d_df(max(df.Patient_ID.unique()),max_patient_stay,df.shape[0])

epochs = 3000
for epochs in range(epochs):
  for patient in df.Patient_ID.unique():
    patient_data = get_patient_df(df, patient)
    patient_data = balance_patient(patient_data, max_patient_stay)
    if patient_data is None:
      continue
    patient_data = create_ts_data(dataset=df, lookback=1, predicted_col=df.columns.get_loc("SepsisLabel"))

    patient_data = normalise_data(patient_data)
    print(patient_data)
    X, y = patient_data[:, :-1], patient_data[:, -1]

    new_df = pd.DataFrame(X.reshape(-1, 1), index=index[patient*max_patient_stay*df.shape[0]:(patient+1)*max_patient_stay*df.shape[0]], columns=['value'])
    final_data = final_data.append(new_df)
    lstm_history = model_lstm.fit(X, y, epochs=10, batch_size=64, shuffle=False)



    

              0         1         2         3         4         5         6   \
0       0.237702  0.963153  0.745470  0.289356  0.197987  0.269297  0.264399   
1       0.296154  0.936752  0.745535  0.222231  0.197607  0.272714  0.264706   
2       0.265385  0.986712  0.747515  0.292409  0.235714  0.289365  0.308824   
3       0.269231  0.936752  0.738503  0.292409  0.245232  0.302151  0.426471   
4       0.319231  0.855567  0.745326  0.292409  0.254750  0.310713  0.345588   
...          ...       ...       ...       ...       ...       ...       ...   
792093  0.234615  0.952365  0.736398  0.479551  0.382143  0.409196  0.286765   
792094  0.284615  0.942997  0.739915  0.350891  0.235714  0.300681  0.205882   
792095  0.146154  0.999202  0.743433  0.350891  0.250000  0.324796  0.235294   
792096  0.167308  0.999202  0.746951  0.264630  0.221429  0.297667  0.235294   
792097  0.215385  0.999202  0.750469  0.479551  0.342857  0.366996  0.205882   

              7         8         9   .

InvalidIndexError: ignored