In [5]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import os
from scipy.io import loadmat
import numpy as np

## Long Short Term Memory Networks (LSTM)

In [3]:
data_df = pd.read_csv('preprocessed_data.csv')

In [3]:
def preprocess_data(data_folder, output_length=30000):
    def downsample_signal(signal_data, downsample_factor):
        downsampled_signal = signal_data[:, ::downsample_factor]
        return downsampled_signal

    def process_patient_data(patient_folder_path):
        patient_signals = []
        for file_entry in os.listdir(patient_folder_path):
            if file_entry.endswith('.mat'):
                file_path = os.path.join(patient_folder_path, file_entry)
                mat_data = loadmat(file_path)
                signal_data = mat_data['val']
                patient_signals.append(signal_data)

        full_signal = np.concatenate(patient_signals, axis=1)
        downsample_factor = int(full_signal.shape[1] / output_length)
        downsampled_signal = downsample_signal(full_signal, downsample_factor)

        return downsampled_signal

    X_data = []
    for patient_folder_entry in os.listdir(data_folder):
        patient_folder_path = os.path.join(data_folder, patient_folder_entry)
        if os.path.isdir(patient_folder_path):
            patient_data = process_patient_data(patient_folder_path)
            print('Patient:', patient_folder_entry, '| Data shape:', patient_data.shape)
            X_data.append(patient_data)

    return np.array(X_data)


In [None]:
data_folder = '../dataset/icare1.0/training'
X_signal = preprocess_data(data_folder)

Patient: ICARE_0284 | Data shape: (18, 30000)
Patient: ICARE_0286 | Data shape: (18, 30000)
Patient: ICARE_0296 | Data shape: (18, 30000)
Patient: ICARE_0299 | Data shape: (18, 30000)
Patient: ICARE_0303 | Data shape: (18, 30000)
Patient: ICARE_0306 | Data shape: (18, 30000)
Patient: ICARE_0311 | Data shape: (18, 30000)
Patient: ICARE_0312 | Data shape: (18, 30000)
Patient: ICARE_0313 | Data shape: (18, 30000)
Patient: ICARE_0316 | Data shape: (18, 30000)
Patient: ICARE_0319 | Data shape: (18, 30000)
Patient: ICARE_0320 | Data shape: (18, 30000)
Patient: ICARE_0326 | Data shape: (18, 30000)
Patient: ICARE_0328 | Data shape: (18, 30000)
Patient: ICARE_0332 | Data shape: (18, 30000)
Patient: ICARE_0334 | Data shape: (18, 30000)
Patient: ICARE_0335 | Data shape: (18, 30000)
Patient: ICARE_0337 | Data shape: (18, 30000)
Patient: ICARE_0340 | Data shape: (18, 30000)
Patient: ICARE_0341 | Data shape: (18, 30000)
Patient: ICARE_0342 | Data shape: (18, 30000)
Patient: ICARE_0344 | Data shape: 

In [None]:
np.save('X_signal.npy', X_signal)

In [6]:
X_signal = np.load('X_signal.npy')

In [7]:
print(X_signal.shape)

(607, 18, 30000)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X_signal.reshape(X_signal.shape[0], X_signal.shape[2], X_signal.shape[1]),
    LabelEncoder().fit_transform(data_df['Outcome']),
    test_size=0.1, random_state=42)

In [9]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(546, 30000, 18) (61, 30000, 18) (546,) (61,)


In [15]:
custom_lstm_model = Sequential()
custom_lstm_model.add(LSTM(units=32, input_shape=(X_train.shape[1], X_train.shape[2])))
custom_lstm_model.add(Dropout(0.1))
custom_lstm_model.add(Dense(units=16, activation='relu'))
custom_lstm_model.add(Dropout(0.1))
custom_lstm_model.add(Dense(units=8, activation='relu'))
custom_lstm_model.add(Dropout(0.1))
custom_lstm_model.add(Dense(units=1, activation='sigmoid'))
custom_lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 32)                6528      
                                                                 
 dropout_3 (Dropout)         (None, 32)                0         
                                                                 
 dense_3 (Dense)             (None, 16)                528       
                                                                 
 dropout_4 (Dropout)         (None, 16)                0         
                                                                 
 dense_4 (Dense)             (None, 8)                 136       
                                                                 
 dropout_5 (Dropout)         (None, 8)                 0         
                                                                 
 dense_5 (Dense)             (None, 1)                

In [16]:
custom_lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = custom_lstm_model.fit(X_train, y_train, epochs=1, batch_size=16, validation_split=0.1)



In [None]:
# Save the trained model to disk
model_filename = 'custom_lstm_model.h5'
custom_lstm_model.save(model_filename)

In [13]:
history.history

{'loss': [0.7356374859809875],
 'accuracy': [0.45824846625328064],
 'val_loss': [0.6848698854446411],
 'val_accuracy': [0.581818163394928]}

In [17]:
# Predict on the test set
y_pred = custom_lstm_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)
# Print the classification report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.50      0.18      0.26        17
           1       0.75      0.93      0.83        44

    accuracy                           0.72        61
   macro avg       0.62      0.55      0.54        61
weighted avg       0.68      0.72      0.67        61

