# Import needed modules

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import glob
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, RepeatedStratifiedKFold
from IPython.display import display, HTML

from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, Dropout, Flatten
from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding

import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
!ls /content/drive/MyDrive/Data/6389_Data_project3

AD1   AD2  AD4	AD6  AD8  HC1	HC2  HC4  HC6  HC8
AD10  AD3  AD5	AD7  AD9  HC10	HC3  HC5  HC7  HC9


# Load all data

In [None]:
path = '/content/drive/MyDrive/Data/6389_Data_project3/'

X = []
y = []

for dir in os.listdir(path=path):
  #print(dir)
  if os.path.isdir(path + dir):
    time_series_files_for_this_patient = glob.glob(path + dir + '/*.txt')
    this_patient_matrices = []
    for filename in time_series_files_for_this_patient:
      this_roi_matrix = np.loadtxt(filename)
      result = np.zeros((150,194))
      result[:this_roi_matrix.shape[0],:this_roi_matrix.shape[1]] = this_roi_matrix
      this_roi_matrix = result
      #print(this_roi_matrix.shape)
      this_patient_matrices.append(this_roi_matrix)
    this_patient_category = 0 if 'HC' in dir else 1
    y.append(this_patient_category)
    X.append(this_patient_matrices)
    # break

In [None]:
X = np.asarray(X)
X = np.array(X[:,0,:,:])
y, X.shape

([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], (20, 150, 194))

# Define the parameters used

In [None]:
epochs = 70  # maximum number of training epochs
folds = 2  # the number of folds for k-fold cross validation

In [None]:
model = Sequential()

model.add(LSTM(20, input_shape=(X.shape[1], X.shape[2]), activation='sigmoid', return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(20, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_26 (LSTM)              (None, 150, 20)           17200     
                                                                 
 dropout_26 (Dropout)        (None, 150, 20)           0         
                                                                 
 lstm_27 (LSTM)              (None, 20)                3280      
                                                                 
 dropout_27 (Dropout)        (None, 20)                0         
                                                                 
 dense_13 (Dense)            (None, 1)                 21        
                                                                 
Total params: 20,501
Trainable params: 20,501
Non-trainable params: 0
_________________________________________________________________


In [None]:
batch_size = 6
epochs = 12

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

In [None]:
def plot_history(history):
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()

In [None]:
y = np.array(y)
X.shape

(20, 150, 194)

# Train the model

In [None]:
num_folds = 4
count = 1

test_accuracies = []
test_losses = []
histories = []

skfold = RepeatedStratifiedKFold(n_splits=num_folds, n_repeats=5)
for train, test in skfold.split(X, y):
  print('*'*5, '--- Fold {} ----'.format(count ), '*' * 5)
  count += 1
  print(y[train], y[test])
  history = model.fit(X[train], y[train],  batch_size=batch_size, epochs=epochs, validation_data=(X[test], y[test]), callbacks=[callback])

  score = model.evaluate(X[test], y[test], verbose=0)

  print(y[test])


  test_losses.append(score[0])
  test_accuracies.append(score[1])
  print("Test loss:", score[0])
  print("Test accuracy:", score[1])

  histories.append(history)

***** --- Fold 1 ---- *****
[1 1 1 1 1 1 1 0 0 0 0 0 0 0 0] [1 1 1 0 0]
Epoch 1/12
Epoch 2/12
Epoch 3/12
[1 1 1 0 0]
Test loss: 0.8572067022323608
Test accuracy: 0.800000011920929
***** --- Fold 2 ---- *****
[1 1 1 1 1 1 1 0 0 0 0 0 0 0 0] [1 1 1 0 0]
Epoch 1/12
Epoch 2/12
Epoch 3/12
[1 1 1 0 0]
Test loss: 0.003217028919607401
Test accuracy: 1.0
***** --- Fold 3 ---- *****
[1 1 1 1 1 1 1 1 0 0 0 0 0 0 0] [1 1 0 0 0]
Epoch 1/12
Epoch 2/12
Epoch 3/12
[1 1 0 0 0]
Test loss: 0.0045117540284991264
Test accuracy: 1.0
***** --- Fold 4 ---- *****
[1 1 1 1 1 1 1 1 0 0 0 0 0 0 0] [1 1 0 0 0]
Epoch 1/12
Epoch 2/12
Epoch 3/12
[1 1 0 0 0]
Test loss: 0.003234587609767914
Test accuracy: 1.0
***** --- Fold 5 ---- *****
[1 1 1 1 1 1 1 0 0 0 0 0 0 0 0] [1 1 1 0 0]
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
[1 1 1 0 0]
Test loss: 0.003517447505146265
Test accuracy: 1.0
***** --- Fold 6 ---- *****
[1 1 1 1 1 1 1 0 

# Evaluation of Results

In [None]:
print(
    f"Accuracy over all folds mean: {np.mean(test_accuracies)*100:.3}% and std: {np.std(test_accuracies)*100:.2}%"
)

Accuracy over all folds mean: 99.0% and std: 4.4%
