# Notebook Description

In this notebook we tried exprimenting a generalized subject indpendent model. 

## setting up the environment

In [10]:
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../src')  
from data_loading import load_and_combine_mat_data
from preprocessing import preprocess_data
from sklearn.preprocessing import RobustScaler

from evaluation import evaluate_model
from feature_selection import get_subject_indices
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.feature_selection import RFECV
from sklearn.model_selection import train_test_split, StratifiedKFold


## data loading

In [2]:
# List of file paths
mat_file_paths = [
    '../data/Subject_10.mat', '../data/Subject_9.mat', '../data/Subject_8.mat', '../data/Subject_7.mat', '../data/Subject_6.mat', 
    '../data/Subject_5.mat', '../data/Subject_4.mat', '../data/Subject_3.mat', '../data/Subject_2.mat', '../data/Subject_1.mat'
]

X, y, Channels = load_and_combine_mat_data(mat_file_paths)

Combined EEG Data Shape (Samples, Channels, Trials): (512, 128, 2236)
Combined Labels Shape: (2236,)
Channels: ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'A11', 'A12', 'A13', 'A14', 'A15', 'A16', 'A17', 'A18', 'A19', 'A20', 'A21', 'A22', 'A23', 'A24', 'A25', 'A26', 'A27', 'A28', 'A29', 'A30', 'A31', 'A32', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'B13', 'B14', 'B15', 'B16', 'B17', 'B18', 'B19', 'B20', 'B21', 'B22', 'B23', 'B24', 'B25', 'B26', 'B27', 'B28', 'B29', 'B30', 'B31', 'B32', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D19', 'D20', 'D21', 'D22', 'D23', 'D24', 'D25', 'D26', 'D27', 'D28', 'D29', 'D30', 'D31', 'D32']


selecting all unique channels selected using RFE per subject

In [3]:
# Selected channels for each subject
selected_channels = {
    'Subject 1': ['A29', 'B10', 'B27', 'C5', 'D4', 'D9', 'D24', 'D27'],
    'Subject 2': ['A1', 'A6', 'A12', 'B23', 'B27', 'C29', 'D10', 'D22'],
    'Subject 3': ['A15', 'A20', 'B10', 'B16', 'C3', 'C7', 'D24', 'D32'],
    'Subject 4': ['A14', 'A23', 'B7', 'B11', 'C8', 'C15', 'D6', 'D32'],
    'Subject 5': ['A11', 'A25', 'A32', 'B11', 'B27', 'C18', 'D7', 'D25'],
    'Subject 6': ['A12', 'A20', 'B2', 'B15', 'B24', 'B27', 'C6', 'D32'],
    'Subject 7': ['A14', 'A22', 'A26', 'A29', 'B8', 'B15', 'C9', 'D32'],
    'Subject 8': ['A16', 'A27', 'B6', 'B11', 'C10', 'C31', 'D19', 'D32'],
    'Subject 9': ['A1', 'A20', 'B11', 'B17', 'C8', 'D6', 'D18', 'D23'],
    'Subject 10': ['A3', 'A14', 'A17', 'A27', 'B25', 'C9', 'D23', 'D32']
}

In [4]:
# Combine all channels into a single set to remove duplicates
unique_channels = set(channel for channels in selected_channels.values() for channel in channels)

# Convert to list if needed
unique_channels_list = list(unique_channels)

# Print the unique list of channels
print("Unique channels:", unique_channels_list)
print(len(unique_channels_list))

Unique channels: ['B10', 'C7', 'B16', 'D18', 'B7', 'D23', 'D6', 'B6', 'A11', 'B11', 'A26', 'A23', 'C15', 'A3', 'C3', 'D19', 'A6', 'A20', 'A15', 'A17', 'B27', 'B23', 'D32', 'A25', 'B15', 'C6', 'C8', 'D4', 'A29', 'A16', 'D10', 'C31', 'B25', 'C10', 'D24', 'A27', 'A12', 'C29', 'D25', 'A1', 'B2', 'B8', 'C5', 'D22', 'B24', 'A32', 'A22', 'C9', 'A14', 'D7', 'C18', 'D9', 'D27', 'B17']
54


In [5]:
# Find the indices of the selected channels
selected_indices = [Channels.index(ch) for ch in unique_channels_list]

# Select only the specified channels from the data
X_selected = X[:, selected_indices, :]

In [6]:
X_selected.shape

(512, 54, 2236)

## data preprocessing

In [7]:
X_preprocessed =  preprocess_data(X_selected)
print(X_preprocessed.shape)

(512, 54, 2236)


# Modeling

## MLP

In [8]:
X = np.transpose(X_preprocessed, (2, 0, 1))  
X = X.reshape(X_preprocessed.shape[2], -1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Initialize the RobustScaler
scaler = RobustScaler()
# Fit Data of trainset and transform testset
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)  

In [12]:
print("X_train_scaled shape:", X_train_scaled.shape)  
print("X_test_scaled shape:", X_test_scaled.shape) 

X_train_scaled shape: (1788, 27648)
X_test_scaled shape: (448, 27648)


In [13]:
# Define and train the MLP with anti-overfitting measures
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),       #  neurons and layers
    activation='relu',                  # ReLU activation function
    solver='adam',                      # Adam optimizer
    alpha=0.01,                         # Increased L2 regularization (weight decay)
    learning_rate_init=0.001,           # Learning rate
    max_iter=300,                       # Set maximum iterations
    random_state=42,
    early_stopping=True,                # Early stopping to prevent overfitting
    validation_fraction=0.2,            # Use 20% of the training data for validation
    n_iter_no_change=10                 # Stop if no improvement for 10 iterations
)

In [14]:
mlp.fit(X_train_scaled, y_train)

In [15]:
evaluate_model(mlp, X_test_scaled, y_test, X_train_scaled, y_train)

Test Accuracy: 0.6272321428571429
Classification Report Test:
               precision    recall  f1-score   support

         0.0       0.64      0.63      0.63        97
         1.0       0.58      0.66      0.62       113
         2.0       0.64      0.61      0.62       119
         3.0       0.66      0.61      0.64       119

    accuracy                           0.63       448
   macro avg       0.63      0.63      0.63       448
weighted avg       0.63      0.63      0.63       448

Train Accuracy: 0.9194630872483222
Classification Report Train:
               precision    recall  f1-score   support

         0.0       0.93      0.91      0.92       462
         1.0       0.90      0.94      0.92       446
         2.0       0.93      0.92      0.92       440
         3.0       0.92      0.91      0.91       440

    accuracy                           0.92      1788
   macro avg       0.92      0.92      0.92      1788
weighted avg       0.92      0.92      0.92      1788



{'test_accuracy': 0.6272321428571429,
 'test_report': '              precision    recall  f1-score   support\n\n         0.0       0.64      0.63      0.63        97\n         1.0       0.58      0.66      0.62       113\n         2.0       0.64      0.61      0.62       119\n         3.0       0.66      0.61      0.64       119\n\n    accuracy                           0.63       448\n   macro avg       0.63      0.63      0.63       448\nweighted avg       0.63      0.63      0.63       448\n',
 'train_accuracy': 0.9194630872483222,
 'train_report': '              precision    recall  f1-score   support\n\n         0.0       0.93      0.91      0.92       462\n         1.0       0.90      0.94      0.92       446\n         2.0       0.93      0.92      0.92       440\n         3.0       0.92      0.91      0.91       440\n\n    accuracy                           0.92      1788\n   macro avg       0.92      0.92      0.92      1788\nweighted avg       0.92      0.92      0.92      178

### Hyper-parameters tuning

In [16]:
mlp_2 = MLPClassifier(
    hidden_layer_sizes=(128, 32),       # Fewer neurons to reduce complexity
    activation='relu',                 # ReLU activation function
    solver='adam',                     # Adam optimizer
    alpha=0.05,                        # Increased L2 regularization
    learning_rate_init=0.0001,         # Smaller learning rate
    max_iter=200,                      # Reduce max iterations
    random_state=42,
    early_stopping=True,               # Early stopping to prevent overfitting
    validation_fraction=0.2,           # Use 20% of the training data for validation
    n_iter_no_change=5                 # Stop if no improvement for fewer iterations
)

In [17]:
mlp_2.fit(X_train_scaled, y_train)

In [18]:
evaluate_model(mlp_2, X_test_scaled, y_test, X_train_scaled, y_train)

Test Accuracy: 0.7566964285714286
Classification Report Test:
               precision    recall  f1-score   support

         0.0       0.68      0.79      0.73        97
         1.0       0.78      0.73      0.76       113
         2.0       0.73      0.71      0.72       119
         3.0       0.83      0.79      0.81       119

    accuracy                           0.76       448
   macro avg       0.76      0.76      0.76       448
weighted avg       0.76      0.76      0.76       448

Train Accuracy: 0.9435123042505593
Classification Report Train:
               precision    recall  f1-score   support

         0.0       0.95      0.95      0.95       462
         1.0       0.96      0.93      0.95       446
         2.0       0.91      0.96      0.93       440
         3.0       0.96      0.93      0.94       440

    accuracy                           0.94      1788
   macro avg       0.94      0.94      0.94      1788
weighted avg       0.94      0.94      0.94      1788



{'test_accuracy': 0.7566964285714286,
 'test_report': '              precision    recall  f1-score   support\n\n         0.0       0.68      0.79      0.73        97\n         1.0       0.78      0.73      0.76       113\n         2.0       0.73      0.71      0.72       119\n         3.0       0.83      0.79      0.81       119\n\n    accuracy                           0.76       448\n   macro avg       0.76      0.76      0.76       448\nweighted avg       0.76      0.76      0.76       448\n',
 'train_accuracy': 0.9435123042505593,
 'train_report': '              precision    recall  f1-score   support\n\n         0.0       0.95      0.95      0.95       462\n         1.0       0.96      0.93      0.95       446\n         2.0       0.91      0.96      0.93       440\n         3.0       0.96      0.93      0.94       440\n\n    accuracy                           0.94      1788\n   macro avg       0.94      0.94      0.94      1788\nweighted avg       0.94      0.94      0.94      178

## CNN 
more complex model

In [22]:
X_preprocessed.shape

(512, 54, 2236)

In [23]:
X_cnn = X_preprocessed.transpose(2, 0, 1).reshape(X_preprocessed.shape[2], 512, X_preprocessed.shape[1], 1)

X_train, X_test, y_train, y_test = train_test_split(X_cnn, y, test_size=0.2, random_state=42)

In [24]:
# Reshape X_train and X_test to 2D for scaling
X_train_reshaped = X_train.reshape(-1, X_train.shape[2])
X_test_reshaped = X_test.reshape(-1, X_test.shape[2])

# Initialize and fit RobustScaler on X_train
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

# Reshape back to the original shape
X_train_scaled = X_train_scaled.reshape(X_train.shape)
X_test_scaled = X_test_scaled.reshape(X_test.shape)

In [25]:
print("X_train_scaled shape:", X_train_scaled.shape)
print("X_test_scaled shape:", X_test_scaled.shape)

X_train_scaled shape: (1788, 512, 54, 1)
X_test_scaled shape: (448, 512, 54, 1)


In [26]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, SeparableConv2D, AveragePooling2D, Dropout, Flatten, Dense, BatchNormalization, Activation
from tensorflow.keras.optimizers import Nadam

# Define the EEGNet model
def create_eegnet_model(input_shape=(512, 54, 1), num_classes=4):
    model = Sequential()

    # First Conv2D block
    model.add(Conv2D(4, (64, 1), padding='same', input_shape=input_shape, use_bias=False))  # Reduced filters
    model.add(BatchNormalization())

    # Depthwise Conv2D block
    model.add(DepthwiseConv2D((1, 1), use_bias=False, depth_multiplier=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('elu'))
    model.add(AveragePooling2D((1, 4)))  # Pooling along the width
    model.add(Dropout(0.2))

    # Separable Conv2D block
    model.add(SeparableConv2D(8, (1, 16), use_bias=False, padding='same'))  # Reduced filters
    model.add(BatchNormalization())
    model.add(Activation('elu'))

    # Adjust the pooling size here to avoid negative dimensions
    model.add(AveragePooling2D((1, 2)))  # Reduced from (1, 4) to (1, 2)
    model.add(Dropout(0.2))

    # Flatten and classification
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    return model

# Build and compile the model
model = create_eegnet_model()

# Define Nadam optimizer
optimizer = Nadam(learning_rate=0.001)

# Compile the model
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


2024-11-10 15:18:09.508598: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-10 15:18:09.524239: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-10 15:18:09.528940: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-10 15:18:09.541871: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

In [27]:
from tensorflow.keras.callbacks import LearningRateScheduler
import numpy as np

# Cyclical learning rate function
def cyclical_lr(step_size, min_lr=1e-5, max_lr=5e-3):
    def clr(epoch):
        cycle = np.floor(1 + epoch / (2 * step_size))
        x = np.abs(epoch / step_size - 2 * cycle + 1)
        lr = min_lr + (max_lr - min_lr) * np.maximum(0, (1 - x))
        return lr
    return clr

# Set cyclical learning rate scheduler
clr_callback = LearningRateScheduler(cyclical_lr(step_size=200))

# Train the model
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_test_scaled, y_test),
                    epochs=50,  # Reduced epochs
                    batch_size=64,  # Reduced batch size
                    callbacks=[clr_callback])

Epoch 1/50


I0000 00:00:1731251919.020531   28505 service.cc:146] XLA service 0x79f4f8006d30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731251919.020590   28505 service.cc:154]   StreamExecutor device (0): NVIDIA A16, Compute Capability 8.6
I0000 00:00:1731251919.020599   28505 service.cc:154]   StreamExecutor device (1): NVIDIA A16, Compute Capability 8.6
I0000 00:00:1731251919.020607   28505 service.cc:154]   StreamExecutor device (2): NVIDIA A16, Compute Capability 8.6
I0000 00:00:1731251919.020613   28505 service.cc:154]   StreamExecutor device (3): NVIDIA A16, Compute Capability 8.6
2024-11-10 15:18:39.280462: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-10 15:18:39.554644: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m 3/28[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 44ms/step - accuracy: 0.2700 - loss: 1.7325

I0000 00:00:1731251933.115452   28505 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 498ms/step - accuracy: 0.2526 - loss: 1.7538 - val_accuracy: 0.2344 - val_loss: 1.3922 - learning_rate: 1.0000e-05
Epoch 2/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.2521 - loss: 1.6443 - val_accuracy: 0.2634 - val_loss: 1.3797 - learning_rate: 3.4950e-05
Epoch 3/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - accuracy: 0.3069 - loss: 1.4845 - val_accuracy: 0.3415 - val_loss: 1.3604 - learning_rate: 5.9900e-05
Epoch 4/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.4088 - loss: 1.3120 - val_accuracy: 0.3415 - val_loss: 1.3376 - learning_rate: 8.4850e-05
Epoch 5/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - accuracy: 0.4377 - loss: 1.2485 - val_accuracy: 0.3973 - val_loss: 1.3137 - learning_rate: 1.0980e-04
Epoch 6/50
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 