In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using {}.".format(device.type))

Using cuda.


In [2]:
from pathlib import Path

LOCAL_RUNTIME = False

if not LOCAL_RUNTIME:
    from google.colab import drive
    data_dir = Path("/content/Data/")

    if not (data_dir.is_dir() and any(data_dir.iterdir())):
        drive.mount('/content/drive/')

        if not data_dir.is_dir():
            !mkdir /content/Data/

        intra_dir = data_dir / "Intra"
        cross_dir = data_dir / "Cross"

        if not (
            intra_dir.is_dir()
            and any(subdir.is_file() for subdir in intra_dir.iterdir())
            and cross_dir.is_dir()
            and any(subdir.is_file() for subdir in cross_dir.iterdir())
        ):
            !unzip '/content/drive/MyDrive/Data/DL/Intra.zip' -d '/content/Data/' > /dev/null
            !unzip '/content/drive/MyDrive/Data/DL/Cross.zip' -d '/content/Data/' > /dev/null

        drive.flush_and_unmount()
else:
    raise ValueError("This code is intended to run in Google Colab. Please switch to a Colab environment.")

assert data_dir.is_dir()


Mounted at /content/drive/


In [23]:
# Importing Necessary Libraries
import h5py
import numpy as np
import torch
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import KFold
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K
from tensorflow.keras.layers import GRU, Bidirectional, Dense, Dropout, TimeDistributed

data_dir = Path("./Data/")
intra_dir = data_dir / "Intra"
cross_dir = data_dir / "Cross"

In [24]:
def load_h5(path):
    with h5py.File(path, 'r') as f:
        keys = list(f.keys())
        assert len(keys) == 1, "Only one key per file is expected"
        matrix = f[keys[0]][()]
    return matrix

def load_labels(path: Path) -> np.ndarray:
    *task, subject_identifier, chunk = path.stem.split("_")
    if "rest" in task:
        y = 0
    elif 'math' in task:
        y = 1
    elif 'working' in task:
        y = 2
    elif 'motor' in task:
        y = 3
    else:
        assert False, 'unknown task'
    return np.array([y, int(subject_identifier), int(chunk)])

In [25]:
def downsample(data, old_freq, new_freq):
    # Calculate the downsampling factor
    downsample_factor = int(np.round(old_freq / new_freq))
    # Ensure that timesteps are divisible by the downsampling factor
    data = data[:,:,:data.shape[2]//downsample_factor*downsample_factor]
    # Reshape
    reshaped_data = data.reshape(data.shape[0], data.shape[1], -1, downsample_factor)
    # Take the mean along the last axis
    downsampled_data = reshaped_data.mean(axis=-1)
    return downsampled_data

def z_score_normalize(data):
    # Convert to PyTorch tensor
    data_tensor = torch.tensor(data, dtype=torch.float32)
    # Calculate mean and std along the timesteps
    mean = torch.mean(data_tensor, dim=2, keepdim=True)
    std = torch.std(data_tensor, dim=2, keepdim=True)
    # Perform z-score norm
    normalized_data = (data_tensor - mean) / std
    return normalized_data

In [169]:
## INTRA Data Loading and Preprocessing

intra_train_glob = list((intra_dir / "train").glob("*.h5"))
intra_test_glob = list((intra_dir / "test").glob("*.h5"))

intra_train_X = np.stack([load_h5(path) for path in intra_train_glob])
intra_train_labels = np.array([load_labels(path)[0] for path in intra_train_glob])
intra_test_X = np.stack([load_h5(path) for path in intra_test_glob])
intra_test_labels = np.array([load_labels(path)[0] for path in intra_test_glob])

In [170]:
import gc

intra_train_X_downsampled = downsample(intra_train_X, 2034, 125)
intra_train_X_norm = z_score_normalize(intra_train_X_downsampled)

intra_test_X_downsampled = downsample(intra_test_X, 2034, 125)
intra_test_X_norm = z_score_normalize(intra_test_X_downsampled)

del intra_train_X, intra_test_X, intra_train_X_downsampled, intra_test_X_downsampled
gc.collect()

63569

In [171]:
intra_train_X_preprocessed = intra_train_X_norm.numpy()
intra_train_X_preprocessed.shape
intra_test_X_preprocessed = intra_test_X_norm.numpy()
intra_test_X_preprocessed.shape

(8, 248, 2226)

In [172]:
num_classes = 4
intra_train_labels_one_hot = to_categorical(intra_train_labels, num_classes)
intra_test_labels_one_hot = to_categorical(intra_test_labels, num_classes)

del intra_train_labels, intra_test_labels
gc.collect()

7206

# Model

In [173]:
def build_lstm_model(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(256, input_shape=input_shape, return_sequences=True))
    Dropout(0.5),
    model.add(LSTM(256))
    Dropout(0.5),
    model.add(Dense(4, activation='softmax'))  # 4 classes
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Training

In [174]:
# Initialize KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [175]:
# Initialize KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [176]:
fold_no = 1
for train, val in kfold.split(intra_train_X_preprocessed, intra_train_labels_one_hot):
    # Build models for each fold
    model_lstm = build_lstm_model(intra_train_X_preprocessed.shape[1:], 4)

    # Training
    print(f'Training for fold {fold_no} ...')
    history_lstm = model_lstm.fit(intra_train_X_preprocessed[train], intra_train_labels_one_hot[train], epochs=10, batch_size=32, validation_data=(intra_train_X_preprocessed[val], intra_train_labels_one_hot[val]))

    fold_no += 1

Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 5 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [177]:
# Evaluate models
performance_lstm = model_lstm.evaluate(intra_test_X_preprocessed, intra_test_labels_one_hot)
print("Standard LSTM Model Performance:", performance_lstm)

Standard LSTM Model Performance: [2.1094112396240234, 0.375]


# Cross Data

In [178]:
#del intra_train_X_norm, intra_test_X_norm, intra_train_labels, intra_test_labels, intra_train_labels_one_hot, intra_test_labels_one_hot
gc.collect()

74098

In [179]:
cross_train_glob = list((cross_dir / "train").glob("*.h5")) + list((cross_dir / "test1").glob("*.h5")) + list((cross_dir / "test2").glob("*.h5"))
cross_test_glob = list((cross_dir / "test3").glob("*.h5"))

In [180]:
cross_train_X = np.stack([load_h5(path) for path in cross_train_glob])
cross_train_labels = np.array([load_labels(path)[0] for path in cross_train_glob])

cross_test_X = np.stack([load_h5(path) for path in cross_test_glob])
cross_test_labels = np.array([load_labels(path)[0] for path in cross_test_glob])

In [181]:
# Preprocess Cross data
cross_train_X_downsampled = downsample(cross_train_X, 2034, 125)
cross_train_X_norm = z_score_normalize(cross_train_X_downsampled)

cross_test_X_downsampled = downsample(cross_test_X, 2034, 125)
cross_test_X_norm = z_score_normalize(cross_test_X_downsampled)

del cross_train_X, cross_test_X, cross_train_X_downsampled, cross_test_X_downsampled
gc.collect()

0

In [182]:
cross_train_X_norm = cross_train_X_norm.numpy()
cross_test_X_norm = cross_test_X_norm.numpy()

In [183]:
# Convert labels to categorical
cross_train_labels_cat = to_categorical(cross_train_labels)
cross_test_labels_cat = to_categorical(cross_test_labels)

In [184]:
# Parameters
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)

# K-Fold Cross-validation
fold_no = 1
for train, val in kfold.split(cross_train_X_norm, cross_train_labels_cat):
    # Build models for each fold
    model_lstm_cross = build_lstm_model(cross_train_X_norm.shape[1:], 4)

    # Training
    print(f'Training for fold {fold_no} ...')
    history_lstm_cross = model_lstm_cross.fit(cross_train_X_norm[train], cross_train_labels_cat[train], epochs=10, batch_size=32, validation_data=(cross_train_X_norm[val], cross_train_labels_cat[val]))

    fold_no += 1

Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training for fold 5 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [185]:
# Evaluate models
performance_lstm = model_lstm_cross.evaluate(cross_test_X_norm, cross_test_labels_cat)
print("Standard LSTM Model Performance:", performance_lstm)

Standard LSTM Model Performance: [3.253880500793457, 0.5]
