_**Preprocessing the patient file to prepare to feed into the model.**_

In [43]:
# Defining a function to extract PatientID from the file path
import re
def extractPatientID(path):
    pattern = r'p(\d{6})\.psv'
    match = re.search(pattern, path)
    patient_id = match.group(1).lstrip('0')
    patient_id = int(patient_id)
    return patient_id

def extractWindowSequence(df, window):
    num_rows = df.shape[0]
    sequence = []
    window_label = []
    y = df['SepsisLabel'].max()
    for i in range(0, num_rows-window):
        x = df[i:i+window]
        window_label.append(y)
        sequence.append(x.drop(['SepsisLabel'], axis = 1).values)
    return sequence, window_label

In [54]:
from sklearn.impute import KNNImputer
import time
import pandas as pd

imputer = KNNImputer(n_neighbors = 10)

sequences = []
window = 12

label = []
window_labels = []
start_time = time.time()

file = "C:\\Users\\nandi\\OneDrive\\Documents\\training_setA\\training\\p000009.psv"
    
df_raw = pd.read_csv(file, sep ='|', )
df_raw.drop(['Bilirubin_direct','TroponinI','Fibrinogen','Unit1','Unit2','HospAdmTime','Gender'],axis=1, inplace=True)
null_cols = df_raw.columns[df_raw.isnull().all()].tolist()
df = df_raw.drop(null_cols, axis=1)
df = pd.DataFrame(imputer.fit_transform(df), columns = df.columns)

for col in null_cols:
    df[col] = df_raw[col]

patient_id = extractPatientID(file)
df['PatientID'] = [patient_id]*df.shape[0]

if patient_id%1000 == 0:
    print('Reading patient file: ', patient_id)    
df.fillna(0, inplace=True)

x, y = extractWindowSequence(df, window)
sequences = sequences + x
window_labels = window_labels + y

In [55]:
print("Sepsis Label: ",df['SepsisLabel'].max())

Sepsis Label:  1.0


In [56]:
import numpy as np

X = np.array(sequences)
y = np.array(window_labels)

### Defining our best Model using CNN

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.layers import Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization
from keras.optimizers import RMSprop, SGD
from keras.layers import Dropout
from keras.layers import GRU, Dense
from keras.regularizers import L1


CNN_model = Sequential()
CNN_model.add(Conv2D(32, 3, activation='relu', kernel_initializer='he_uniform', input_shape=(12, 34, 1)))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(BatchNormalization(center=True, scale=True))
CNN_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_initializer='he_uniform'))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(BatchNormalization(center=True, scale=True))
CNN_model.add(Dropout(0.5))
CNN_model.add(Flatten())
CNN_model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
CNN_model.add(Dense(32, activation='relu', kernel_initializer='he_uniform'))
CNN_model.add(Dense(1, activation='sigmoid'))

CNN_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 10, 32, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 5, 16, 32)        0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 5, 16, 32)        128       
 ormalization)                                                   
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 14, 64)         18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 1, 7, 64)         0         
 2D)                                                             
                                                      

In [6]:
CNN_model.compile(loss='binary_crossentropy', optimizer= SGD(learning_rate = 0.01), metrics = ['acc'])

### Loading the weights from the saved file

In [8]:
CNN_model.load_weights("C:\\Users\\nandi\\Downloads\\CNNmodel.h5")

### Predicting using the best model

In [57]:
y_pred = CNN_model.predict(X)



_**The length of y_pred tells us the number of sets of 12 hour data extracted from the patient file**_

In [58]:
len(y_pred)

246

In [59]:
y_pred

array([[0.1740593 ],
       [0.17560491],
       [0.19658607],
       [0.18819073],
       [0.20555185],
       [0.21571784],
       [0.21004932],
       [0.21222363],
       [0.20352378],
       [0.18315145],
       [0.21166866],
       [0.21305016],
       [0.22416255],
       [0.2386801 ],
       [0.25545946],
       [0.25273287],
       [0.31554574],
       [0.34118927],
       [0.3115991 ],
       [0.34902152],
       [0.35984173],
       [0.40308505],
       [0.4451098 ],
       [0.48212525],
       [0.5209741 ],
       [0.5485367 ],
       [0.6212362 ],
       [0.62510514],
       [0.6615203 ],
       [0.703287  ],
       [0.7541293 ],
       [0.7678331 ],
       [0.79183185],
       [0.8083937 ],
       [0.8292263 ],
       [0.83172303],
       [0.83174103],
       [0.8600486 ],
       [0.8585552 ],
       [0.8716212 ],
       [0.8783935 ],
       [0.8790147 ],
       [0.89239407],
       [0.88828313],
       [0.88879347],
       [0.8884432 ],
       [0.89120287],
       [0.891

### Interpreting the results
<br>_**We can see that for a patient who has found to be septic our model predicts the high probability of sepsis onset starting at the 41st hour. <br>The probability keeps on increasing and reaches the highest of around 98% towards the end.**_