In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import seaborn as sns

In [2]:
from sktime.transformations.panel.rocket import MiniRocket
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score
from sklearn.model_selection import LeaveOneOut, RandomizedSearchCV, LeaveOneGroupOut
from sklearn.preprocessing import LabelEncoder

## Transform data on Minirocket

#### 30 second

In [3]:
file_paths = glob.glob('eeg_label/*_eeg_label.csv')

data_list = []
labels_list = []
patient_ids = []

for file_path in file_paths:
    eeg_data = pd.read_csv(file_path)
    patient_id = file_path.split('/')[-1]
    
    # Segment the data into non-overlapping 30 second windows
    segment_size = 128 * 30
    num_segments = len(eeg_data) // segment_size
    
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = start_idx + segment_size
        segment = eeg_data['EEG'].iloc[start_idx:end_idx].values
        label = eeg_data['Label'].iloc[start_idx:end_idx].mode()[0]
        
        data_list.append(segment)
        labels_list.append(label)
        patient_ids.append(patient_id)  # Repeat patient ID for each segment

# Convert lists to numpy arrays
X = np.array(data_list)
y = np.array(labels_list)
patient_ids = np.array(patient_ids)

In [4]:
# Reshape to perform minirocket
X_reshaped = X.reshape(X.shape[0], 1, X.shape[1])

In [5]:
# Angus Dempster, Daniel F Schmidt, Geoffrey I Webb

# MiniRocket: A Very Fast (Almost) Deterministic Transform for Time Series
# Classification

# https://arxiv.org/abs/2012.08791

from numba import njit, prange, vectorize
import numpy as np

@njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath = True, parallel = False, cache = True)
def _fit_biases(X, dilations, num_features_per_dilation, quantiles):

    num_examples, input_length = X.shape

    # equivalent to:
    # >>> from itertools import combinations
    # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
    indices = np.array((
        0,1,2,0,1,3,0,1,4,0,1,5,0,1,6,0,1,7,0,1,8,
        0,2,3,0,2,4,0,2,5,0,2,6,0,2,7,0,2,8,0,3,4,
        0,3,5,0,3,6,0,3,7,0,3,8,0,4,5,0,4,6,0,4,7,
        0,4,8,0,5,6,0,5,7,0,5,8,0,6,7,0,6,8,0,7,8,
        1,2,3,1,2,4,1,2,5,1,2,6,1,2,7,1,2,8,1,3,4,
        1,3,5,1,3,6,1,3,7,1,3,8,1,4,5,1,4,6,1,4,7,
        1,4,8,1,5,6,1,5,7,1,5,8,1,6,7,1,6,8,1,7,8,
        2,3,4,2,3,5,2,3,6,2,3,7,2,3,8,2,4,5,2,4,6,
        2,4,7,2,4,8,2,5,6,2,5,7,2,5,8,2,6,7,2,6,8,
        2,7,8,3,4,5,3,4,6,3,4,7,3,4,8,3,5,6,3,5,7,
        3,5,8,3,6,7,3,6,8,3,7,8,4,5,6,4,5,7,4,5,8,
        4,6,7,4,6,8,4,7,8,5,6,7,5,6,8,5,7,8,6,7,8
    ), dtype = np.int32).reshape(84, 3)

    num_kernels = len(indices)
    num_dilations = len(dilations)

    num_features = num_kernels * np.sum(num_features_per_dilation)

    biases = np.zeros(num_features, dtype = np.float32)

    feature_index_start = 0

    for dilation_index in range(num_dilations):

        dilation = dilations[dilation_index]
        padding = ((9 - 1) * dilation) // 2

        num_features_this_dilation = num_features_per_dilation[dilation_index]

        for kernel_index in range(num_kernels):

            feature_index_end = feature_index_start + num_features_this_dilation

            _X = X[np.random.randint(num_examples)]

            A = -_X          # A = alpha * X = -X
            G = _X + _X + _X # G = gamma * X = 3X

            C_alpha = np.zeros(input_length, dtype = np.float32)
            C_alpha[:] = A

            C_gamma = np.zeros((9, input_length), dtype = np.float32)
            C_gamma[9 // 2] = G

            start = dilation
            end = input_length - padding

            for gamma_index in range(9 // 2):

                C_alpha[-end:] = C_alpha[-end:] + A[:end]
                C_gamma[gamma_index, -end:] = G[:end]

                end += dilation

            for gamma_index in range(9 // 2 + 1, 9):

                C_alpha[:-start] = C_alpha[:-start] + A[start:]
                C_gamma[gamma_index, :-start] = G[start:]

                start += dilation

            index_0, index_1, index_2 = indices[kernel_index]

            C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2]

            biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[feature_index_start:feature_index_end])

            feature_index_start = feature_index_end

    return biases

def _fit_dilations(input_length, num_features, max_dilations_per_kernel):

    num_kernels = 84

    num_features_per_kernel = num_features // num_kernels
    true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel)
    multiplier = num_features_per_kernel / true_max_dilations_per_kernel

    max_exponent = np.log2((input_length - 1) / (9 - 1))
    dilations, num_features_per_dilation = \
    np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base = 2).astype(np.int32), return_counts = True)
    num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector

    remainder = num_features_per_kernel - np.sum(num_features_per_dilation)
    i = 0
    while remainder > 0:
        num_features_per_dilation[i] += 1
        remainder -= 1
        i = (i + 1) % len(num_features_per_dilation)

    return dilations, num_features_per_dilation

# low-discrepancy sequence to assign quantiles to kernel/dilation combinations
def _quantiles(n):
    return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype = np.float32)

def fit(X, num_features = 10_000, max_dilations_per_kernel = 32):

    _, input_length = X.shape

    num_kernels = 84

    dilations, num_features_per_dilation = _fit_dilations(input_length, num_features, max_dilations_per_kernel)

    num_features_per_kernel = np.sum(num_features_per_dilation)

    quantiles = _quantiles(num_kernels * num_features_per_kernel)

    biases = _fit_biases(X, dilations, num_features_per_dilation, quantiles)

    return dilations, num_features_per_dilation, biases

# _PPV(C, b).mean() returns PPV for vector C (convolution output) and scalar b (bias)
@vectorize("float32(float32,float32)", nopython = True, cache = True)
def _PPV(a, b):
    if a > b:
        return 1
    else:
        return 0

@njit("float32[:,:](float32[:,:],Tuple((int32[:],int32[:],float32[:])))", fastmath = True, parallel = True, cache = True)
def transform(X, parameters):

    num_examples, input_length = X.shape

    dilations, num_features_per_dilation, biases = parameters

    # equivalent to:
    # >>> from itertools import combinations
    # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
    indices = np.array((
        0,1,2,0,1,3,0,1,4,0,1,5,0,1,6,0,1,7,0,1,8,
        0,2,3,0,2,4,0,2,5,0,2,6,0,2,7,0,2,8,0,3,4,
        0,3,5,0,3,6,0,3,7,0,3,8,0,4,5,0,4,6,0,4,7,
        0,4,8,0,5,6,0,5,7,0,5,8,0,6,7,0,6,8,0,7,8,
        1,2,3,1,2,4,1,2,5,1,2,6,1,2,7,1,2,8,1,3,4,
        1,3,5,1,3,6,1,3,7,1,3,8,1,4,5,1,4,6,1,4,7,
        1,4,8,1,5,6,1,5,7,1,5,8,1,6,7,1,6,8,1,7,8,
        2,3,4,2,3,5,2,3,6,2,3,7,2,3,8,2,4,5,2,4,6,
        2,4,7,2,4,8,2,5,6,2,5,7,2,5,8,2,6,7,2,6,8,
        2,7,8,3,4,5,3,4,6,3,4,7,3,4,8,3,5,6,3,5,7,
        3,5,8,3,6,7,3,6,8,3,7,8,4,5,6,4,5,7,4,5,8,
        4,6,7,4,6,8,4,7,8,5,6,7,5,6,8,5,7,8,6,7,8
    ), dtype = np.int32).reshape(84, 3)

    num_kernels = len(indices)
    num_dilations = len(dilations)

    num_features = num_kernels * np.sum(num_features_per_dilation)

    features = np.zeros((num_examples, num_features), dtype = np.float32)

    for example_index in prange(num_examples):

        _X = X[example_index]

        A = -_X          # A = alpha * X = -X
        G = _X + _X + _X # G = gamma * X = 3X

        feature_index_start = 0

        for dilation_index in range(num_dilations):

            _padding0 = dilation_index % 2

            dilation = dilations[dilation_index]
            padding = ((9 - 1) * dilation) // 2

            num_features_this_dilation = num_features_per_dilation[dilation_index]

            C_alpha = np.zeros(input_length, dtype = np.float32)
            C_alpha[:] = A

            C_gamma = np.zeros((9, input_length), dtype = np.float32)
            C_gamma[9 // 2] = G

            start = dilation
            end = input_length - padding

            for gamma_index in range(9 // 2):

                C_alpha[-end:] = C_alpha[-end:] + A[:end]
                C_gamma[gamma_index, -end:] = G[:end]

                end += dilation

            for gamma_index in range(9 // 2 + 1, 9):

                C_alpha[:-start] = C_alpha[:-start] + A[start:]
                C_gamma[gamma_index, :-start] = G[start:]

                start += dilation

            for kernel_index in range(num_kernels):

                feature_index_end = feature_index_start + num_features_this_dilation

                _padding1 = (_padding0 + kernel_index) % 2

                index_0, index_1, index_2 = indices[kernel_index]

                C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2]

                if _padding1 == 0:
                    for feature_count in range(num_features_this_dilation):
                        features[example_index, feature_index_start + feature_count] = _PPV(C, biases[feature_index_start + feature_count]).mean()
                else:
                    for feature_count in range(num_features_this_dilation):
                        features[example_index, feature_index_start + feature_count] = _PPV(C[padding:-padding], biases[feature_index_start + feature_count]).mean()

                feature_index_start = feature_index_end

    return features

## XGBoost

In [26]:
# Encode the labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Initialise variables to store results
all_preds = []
all_labels = []
all_patient_ids = np.unique(patient_ids)

# Convert data types to match what the functions expect
X_float32 = X.astype(np.float32)  # Ensure X is float32
parameters = fit(X_float32)  

# Transform the data
X_transformed = transform(X_float32, parameters)

# Initialise LOPO cross-validator
logo = LeaveOneGroupOut()

# Loop over each patient (Leave-One-Patient-Out)
for train_idx, test_idx in logo.split(X_transformed, y_encoded, groups=patient_ids):
    # Split the data based on patient IDs
    X_train, X_test = X_transformed[train_idx], X_transformed[test_idx]
    y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
    
    # Initialise the XGBoost classifier
    xgb_classifier = xgb.XGBClassifier(eval_metric='mlogloss', random_state=2)
    
    # Train the classifier
    xgb_classifier.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = xgb_classifier.predict(X_test)
    
    # Store the results
    all_preds.extend(y_pred)
    all_labels.extend(y_test)
    
    # Calculate accuracy for this patient
    accuracy = (y_pred == y_test).sum() / len(y_test)
    print(f'Patient ID: {patient_ids[test_idx[0]]}, Accuracy: {accuracy * 100:.2f}%')

Patient ID: L05200708_eeg_label.csv, Accuracy: 73.24%
Patient ID: L05211742_eeg_label.csv, Accuracy: 49.32%
Patient ID: L05250816_eeg_label.csv, Accuracy: 60.64%
Patient ID: L05250921_eeg_label.csv, Accuracy: 32.47%
Patient ID: L05271431_eeg_label.csv, Accuracy: 58.72%
Patient ID: L05281010_eeg_label.csv, Accuracy: 51.96%
Patient ID: L06101015_eeg_label.csv, Accuracy: 55.19%
Patient ID: L06181302_eeg_label.csv, Accuracy: 16.28%
Patient ID: L06181332_eeg_label.csv, Accuracy: 45.28%
Patient ID: L06221009_eeg_label.csv, Accuracy: 15.18%
Patient ID: L06221141_eeg_label.csv, Accuracy: 45.45%
Patient ID: L06221219_eeg_label.csv, Accuracy: 21.43%
Patient ID: L08181442_eeg_label.csv, Accuracy: 34.04%
Patient ID: L08190811_eeg_label.csv, Accuracy: 46.15%
Patient ID: L08190921_eeg_label.csv, Accuracy: 45.83%


In [36]:
accuracy = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average='macro')

print(f'Overall Accuracy: {accuracy * 100:.2f}%')
print(f'Overall F1 Score: {f1:.2f}')

Overall Accuracy: 73.24%
Overall F1 Score: 0.63


In [6]:
import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, LeaveOneGroupOut
from sklearn.preprocessing import LabelEncoder

# Encode the labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Convert data types to match what the functions expect
X_float32 = X.astype(np.float32)  # Ensure X is float32
parameters = fit(X_float32)  

# Transform the data
X_transformed = transform(X_float32, parameters)

# Initialise LOPO cross-validator
logo = LeaveOneGroupOut()

# Define the objective function for Optuna
def objective(trial):
    # Suggest values for hyperparameters (with rounding)
    learning_rate = round(trial.suggest_float('learning_rate', 0.01, 0.3), 2) 
    max_depth = trial.suggest_int('max_depth', 3, 10)
    n_estimators = trial.suggest_int('n_estimators', 100, 300)
    subsample = round(trial.suggest_float('subsample', 0.6, 1.0), 1)
    colsample_bytree = round(trial.suggest_float('colsample_bytree', 0.6, 1.0), 1)

    # Parameters for XGBoost
    param = {
        'learning_rate': learning_rate,
        'max_depth': max_depth,
        'n_estimators': n_estimators,
        'subsample': subsample,
        'colsample_bytree': colsample_bytree,
        'eval_metric': 'mlogloss',
        'random_state': 2
    }

    # Create the XGBoost classifier
    xgb_classifier = XGBClassifier(**param)

    # Perform cross-validation and return the average accuracy score (LOPO)
    accuracy = cross_val_score(xgb_classifier, X_transformed, y_encoded, cv=logo, groups=patient_ids, scoring='accuracy')
    
    # Return the mean accuracy over the cross-validation splits
    return accuracy.mean()

# Start the optimization process
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

# Output the best parameters
print("Best parameters:", study.best_params_)

[I 2024-09-18 00:01:49,245] A new study created in memory with name: no-name-00c0a590-f234-46c9-9506-48fffd7b367d
[I 2024-09-18 00:26:54,173] Trial 0 finished with value: 0.4643025347570714 and parameters: {'learning_rate': 0.1693489342438168, 'max_depth': 4, 'n_estimators': 256, 'subsample': 0.8265661936472543, 'colsample_bytree': 0.6150105512578169}. Best is trial 0 with value: 0.4643025347570714.
[I 2024-09-18 01:01:27,843] Trial 1 finished with value: 0.4526451381676521 and parameters: {'learning_rate': 0.04853493756638328, 'max_depth': 6, 'n_estimators': 148, 'subsample': 0.9069294818744005, 'colsample_bytree': 0.6876017005020187}. Best is trial 0 with value: 0.4643025347570714.
[I 2024-09-18 01:22:05,226] Trial 2 finished with value: 0.4465491947259446 and parameters: {'learning_rate': 0.2769729378226353, 'max_depth': 3, 'n_estimators': 265, 'subsample': 0.721927744764769, 'colsample_bytree': 0.8214973643761673}. Best is trial 0 with value: 0.4643025347570714.
[I 2024-09-18 01:52

AttributeError: 'Study' object has no attribute 'best_params_'

#### 2 second

In [7]:
file_paths = glob.glob('eeg_label/*_eeg_label.csv')

data_list = []
labels_list = []
patient_ids = []

for file_path in file_paths:
    eeg_data = pd.read_csv(file_path)
    patient_id = file_path.split('/')[-1]
    
    # Segment the data into non-overlapping 30 second windows
    segment_size = 128 * 2
    num_segments = len(eeg_data) // segment_size
    
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = start_idx + segment_size
        segment = eeg_data['EEG'].iloc[start_idx:end_idx].values
        label = eeg_data['Label'].iloc[start_idx:end_idx].mode()[0]
        
        data_list.append(segment)
        labels_list.append(label)
        patient_ids.append(patient_id)  # Repeat patient ID for each segment

# Convert lists to numpy arrays
X = np.array(data_list)
y = np.array(labels_list)
patient_ids = np.array(patient_ids)

# Reshape to perform minirocket
X_reshaped = X.reshape(X.shape[0], 1, X.shape[1])

In [8]:
# Encode the labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Initialise variables to store results
all_preds = []
all_labels = []
all_patient_ids = np.unique(patient_ids)

# Convert data types to match what the functions expect
X_float32 = X.astype(np.float32)  # Ensure X is float32
parameters = fit(X_float32)  

# Transform the data
X_transformed = transform(X_float32, parameters)

# Initialise LOPO cross-validator
logo = LeaveOneGroupOut()

# Loop over each patient (Leave-One-Patient-Out)
for train_idx, test_idx in logo.split(X_transformed, y_encoded, groups=patient_ids):
    # Split the data based on patient IDs
    X_train, X_test = X_transformed[train_idx], X_transformed[test_idx]
    y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
    
    # Initialise the XGBoost classifier
    xgb_classifier = xgb.XGBClassifier(eval_metric='mlogloss', random_state=2)
    
    # Train the classifier
    xgb_classifier.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = xgb_classifier.predict(X_test)
    
    # Store the results
    all_preds.extend(y_pred)
    all_labels.extend(y_test)
    
    # Calculate accuracy for this patient
    accuracy = (y_pred == y_test).sum() / len(y_test)
    print(f'Patient ID: {patient_ids[test_idx[0]]}, Accuracy: {accuracy * 100:.2f}%')

Patient ID: L05200708_eeg_label.csv, Accuracy: 56.84%
Patient ID: L05211742_eeg_label.csv, Accuracy: 46.49%
Patient ID: L05250816_eeg_label.csv, Accuracy: 50.39%
Patient ID: L05250921_eeg_label.csv, Accuracy: 44.39%
Patient ID: L05271431_eeg_label.csv, Accuracy: 47.97%
Patient ID: L05281010_eeg_label.csv, Accuracy: 47.75%
Patient ID: L06101015_eeg_label.csv, Accuracy: 48.38%
Patient ID: L06181302_eeg_label.csv, Accuracy: 32.77%
Patient ID: L06181332_eeg_label.csv, Accuracy: 35.74%
Patient ID: L06221009_eeg_label.csv, Accuracy: 29.47%
Patient ID: L06221141_eeg_label.csv, Accuracy: 44.74%
Patient ID: L06221219_eeg_label.csv, Accuracy: 24.10%
Patient ID: L08181442_eeg_label.csv, Accuracy: 34.46%
Patient ID: L08190811_eeg_label.csv, Accuracy: 51.05%
Patient ID: L08190921_eeg_label.csv, Accuracy: 54.00%


In [12]:
from sklearn.metrics import accuracy_score, f1_score

# Encode the labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Initialise variables to store results
all_preds = []
all_labels = []
all_patient_ids = np.unique(patient_ids)

# Convert data types to match what the functions expect
X_float32 = X.astype(np.float32)  # Ensure X is float32
parameters = fit(X_float32)  

# Transform the data
X_transformed = transform(X_float32, parameters)

# Initialise LOPO cross-validator
logo = LeaveOneGroupOut()

# Loop over each patient (Leave-One-Patient-Out)
for train_idx, test_idx in logo.split(X_transformed, y_encoded, groups=patient_ids):
    # Split the data based on patient IDs
    X_train, X_test = X_transformed[train_idx], X_transformed[test_idx]
    y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
    
    # Initialise the XGBoost classifier
    xgb_classifier = xgb.XGBClassifier(eval_metric='mlogloss', random_state=2)
    
    # Train the classifier
    xgb_classifier.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = xgb_classifier.predict(X_test)
    
    # Store the results
    all_preds.extend(y_pred)
    all_labels.extend(y_test)

# Calculate overall accuracy
overall_accuracy = accuracy_score(all_labels, all_preds)
print(f'Overall Accuracy: {overall_accuracy * 100:.2f}%')

# Calculate F1 score (weighted)
overall_f1_score = f1_score(all_labels, all_preds, average='weighted')
print(f'Overall F1 Score (Weighted): {overall_f1_score:.2f}')

Overall Accuracy: 45.29%
Overall F1 Score (Weighted): 0.40


2 second window F1 and accuracy is lower than 30 second

## ConvTran

In [7]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn as nn

In [8]:
import numpy as np
from torch import nn
from ConvTran.Models.AbsolutePositionalEncoding import tAPE, AbsolutePositionalEncoding, LearnablePositionalEncoding
from ConvTran.Models.Attention import Attention, Attention_Rel_Scl, Attention_Rel_Vec


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


class Permute(nn.Module):
    def forward(self, x):
        return x.permute(1, 0, 2)


def model_factory(config):
    if config['Net_Type'][0] == 'T':
        model = Transformer(config, num_classes=config['num_labels'])
    elif config['Net_Type'][0] == 'CC-T':
        model = CasualConvTran(config, num_classes=config['num_labels'])
    else:
        model = ConvTran(config, num_classes=config['num_labels'])
    return model


class Transformer(nn.Module):
    def __init__(self, config, num_classes):
        super().__init__()
        # Parameters Initialization -----------------------------------------------
        channel_size, seq_len = config['Data_shape'][1], config['Data_shape'][2]
        emb_size = config['emb_size']
        num_heads = config['num_heads']
        dim_ff = config['dim_ff']
        self.Fix_pos_encode = config['Fix_pos_encode']
        self.Rel_pos_encode = config['Rel_pos_encode']
        # Embedding Layer -----------------------------------------------------------
        self.embed_layer = nn.Sequential(
            nn.Linear(channel_size, emb_size),
            nn.LayerNorm(emb_size, eps=1e-5)
        )

        self.Fix_Position = tAPE(emb_size, dropout=config['dropout'], max_len=seq_len)

        self.LayerNorm1 = nn.LayerNorm(emb_size, eps=1e-5)
        self.LayerNorm2 = nn.LayerNorm(emb_size, eps=1e-5)
        if self.Rel_pos_encode == 'Scalar':
            self.attention_layer = Attention_Rel_Scl(emb_size, num_heads, seq_len, config['dropout'])
        elif self.Rel_pos_encode == 'Vector':
            self.attention_layer = Attention_Rel_Vec(emb_size, num_heads, seq_len, config['dropout'])
        else:
            self.attention_layer = Attention(emb_size, num_heads, config['dropout'])

        self.FeedForward = nn.Sequential(
            nn.Linear(emb_size, dim_ff),
            nn.ReLU(),
            nn.Dropout(config['dropout']),
            nn.Linear(dim_ff, emb_size),
            nn.Dropout(config['dropout']))

        self.gap = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()
        self.out = nn.Linear(emb_size, num_classes)

    def forward(self, x):
        x_src = self.embed_layer(x.permute(0, 2, 1))
        if self.Fix_pos_encode != 'None':
            x_src = self.Fix_Position(x_src)
        att = x_src + self.attention_layer(x_src)
        att = self.LayerNorm1(att)
        out = att + self.FeedForward(att)
        out = self.LayerNorm2(out)

        out = out.permute(0, 2, 1)
        out = self.gap(out)
        out = self.flatten(out)
        out = self.out(out)
        # out = out.permute(1, 0, 2)
        # out = self.out(out[-1])
        return out


class ConvTran(nn.Module):
    def __init__(self, config, num_classes):
        super().__init__()
        # Parameters Initialization -----------------------------------------------
        channel_size, seq_len = config['Data_shape'][1], config['Data_shape'][2]
        emb_size = config['emb_size']
        num_heads = config['num_heads']
        dim_ff = config['dim_ff']
        self.Fix_pos_encode = config['Fix_pos_encode']
        self.Rel_pos_encode = config['Rel_pos_encode']
        # Embedding Layer -----------------------------------------------------------
        self.embed_layer = nn.Sequential(nn.Conv2d(1, emb_size*4, kernel_size=[1, 8], padding='same'),
                                         nn.BatchNorm2d(emb_size*4),
                                         nn.GELU())

        self.embed_layer2 = nn.Sequential(nn.Conv2d(emb_size*4, emb_size, kernel_size=[channel_size, 1], padding='valid'),
                                          nn.BatchNorm2d(emb_size),
                                          nn.GELU())

        if self.Fix_pos_encode == 'tAPE':
            self.Fix_Position = tAPE(emb_size, dropout=config['dropout'], max_len=seq_len)
        elif self.Fix_pos_encode == 'Sin':
            self.Fix_Position = AbsolutePositionalEncoding(emb_size, dropout=config['dropout'], max_len=seq_len)
        elif config['Fix_pos_encode'] == 'Learn':
            self.Fix_Position = LearnablePositionalEncoding(emb_size, dropout=config['dropout'], max_len=seq_len)

        if self.Rel_pos_encode == 'eRPE':
            self.attention_layer = Attention_Rel_Scl(emb_size, num_heads, seq_len, config['dropout'])
        elif self.Rel_pos_encode == 'Vector':
            self.attention_layer = Attention_Rel_Vec(emb_size, num_heads, seq_len, config['dropout'])
        else:
            self.attention_layer = Attention(emb_size, num_heads, config['dropout'])

        self.LayerNorm = nn.LayerNorm(emb_size, eps=1e-5)
        self.LayerNorm2 = nn.LayerNorm(emb_size, eps=1e-5)

        self.FeedForward = nn.Sequential(
            nn.Linear(emb_size, dim_ff),
            nn.ReLU(),
            nn.Dropout(config['dropout']),
            nn.Linear(dim_ff, emb_size),
            nn.Dropout(config['dropout']))

        self.gap = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()
        self.out = nn.Linear(emb_size, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x_src = self.embed_layer(x)
        x_src = self.embed_layer2(x_src).squeeze(2)
        x_src = x_src.permute(0, 2, 1)
        if self.Fix_pos_encode != 'None':
            x_src_pos = self.Fix_Position(x_src)
            att = x_src + self.attention_layer(x_src_pos)
        else:
            att = x_src + self.attention_layer(x_src)
        att = self.LayerNorm(att)
        out = att + self.FeedForward(att)
        out = self.LayerNorm2(out)
        out = out.permute(0, 2, 1)
        out = self.gap(out)
        out = self.flatten(out)
        out = self.out(out)
        return out


class CasualConvTran(nn.Module):
    def __init__(self, config, num_classes):
        super().__init__()
        # Parameters Initialization -----------------------------------------------
        channel_size, seq_len = config['Data_shape'][1], config['Data_shape'][2]
        emb_size = config['emb_size']
        num_heads = config['num_heads']
        dim_ff = config['dim_ff']
        self.Fix_pos_encode = config['Fix_pos_encode']
        self.Rel_pos_encode = config['Rel_pos_encode']
        # Embedding Layer -----------------------------------------------------------
        self.causal_Conv1 = nn.Sequential(CausalConv1d(channel_size, emb_size, kernel_size=8, stride=2, dilation=1),
                                          nn.BatchNorm1d(emb_size), nn.GELU())

        self.causal_Conv2 = nn.Sequential(CausalConv1d(emb_size, emb_size, kernel_size=5, stride=2, dilation=2),
                                          nn.BatchNorm1d(emb_size), nn.GELU())

        self.causal_Conv3 = nn.Sequential(CausalConv1d(emb_size, emb_size, kernel_size=3, stride=2, dilation=2),
                                          nn.BatchNorm1d(emb_size), nn.GELU())

        if self.Fix_pos_encode == 'tAPE':
            self.Fix_Position = tAPE(emb_size, dropout=config['dropout'], max_len=seq_len)
        elif self.Fix_pos_encode == 'Sin':
            self.Fix_Position = tAPE(emb_size, dropout=config['dropout'], max_len=seq_len)
        elif config['Fix_pos_encode'] == 'Learn':
            self.Fix_Position = LearnablePositionalEncoding(emb_size, dropout=config['dropout'], max_len=seq_len)

        if self.Rel_pos_encode == 'eRPE':
            self.attention_layer = Attention_Rel_Scl(emb_size, num_heads, seq_len, config['dropout'])
        elif self.Rel_pos_encode == 'Vector':
            self.attention_layer = Attention_Rel_Vec(emb_size, num_heads, seq_len, config['dropout'])
        else:
            self.attention_layer = Attention(emb_size, num_heads, config['dropout'])

        self.LayerNorm = nn.LayerNorm(emb_size, eps=1e-5)
        self.LayerNorm2 = nn.LayerNorm(emb_size, eps=1e-5)

        self.FeedForward = nn.Sequential(
            nn.Linear(emb_size, dim_ff),
            nn.ReLU(),
            nn.Dropout(config['dropout']),
            nn.Linear(dim_ff, emb_size),
            nn.Dropout(config['dropout']))

        self.gap = nn.AdaptiveAvgPool1d(1)
        self.flatten = nn.Flatten()
        self.out = nn.Linear(emb_size, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x_src = self.embed_layer(x)
        x_src = self.embed_layer2(x_src).squeeze(2)
        x_src = x_src.permute(0, 2, 1)
        if self.Fix_pos_encode != 'None':
            x_src_pos = self.Fix_Position(x_src)
            att = x_src + self.attention_layer(x_src_pos)
        else:
            att = x_src + self.attention_layer(x_src)
        att = self.LayerNorm(att)
        out = att + self.FeedForward(att)
        out = self.LayerNorm2(out)
        out = out.permute(0, 2, 1)
        out = self.gap(out)
        out = self.flatten(out)
        out = self.out(out)
        return out


class CausalConv1d(nn.Conv1d):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 dilation=1,
                 groups=1,
                 bias=True):
        super(CausalConv1d, self).__init__(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=0,
            dilation=dilation,
            groups=groups,
            bias=bias)

        self.__padding = (kernel_size - 1) * dilation

    def forward(self, x):
        return super(CausalConv1d, self).forward(nn.functional.pad(x, (self.__padding, 0)))

In [9]:
class AbsolutePositionalEncoding(torch.nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(AbsolutePositionalEncoding, self).__init__()
        self.dropout = torch.nn.Dropout(p=dropout)

        # Create a matrix of [max_len, d_model] for positional encodings
        pe = torch.zeros(max_len, d_model)

        # Define the position vector, shape [max_len, 1]
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)

        # Calculate div_term to apply to the sin/cos functions
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        # Apply sine to even indices in the array (0::2)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)[:, :pe[:, 1::2].size(1)]

        pe = pe.unsqueeze(0)  # Add batch dimension
        self.register_buffer('pe', pe)

    def forward(self, x):
        # Add positional encoding to input x
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)


In [10]:
# Load data
file_paths = glob.glob('eeg_label/*_eeg_label.csv')

data_list = []
labels_list = []
patient_ids = []

for file_path in file_paths:
    eeg_data = pd.read_csv(file_path)
    patient_id = file_path.split('/')[-1]
    
    # Segment the data into non-overlapping 30-second windows
    segment_size = 128 * 30
    num_segments = len(eeg_data) // segment_size
    
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = start_idx + segment_size
        segment = eeg_data['EEG'].iloc[start_idx:end_idx].values
        label = eeg_data['Label'].iloc[start_idx:end_idx].mode()[0]
        
        data_list.append(segment)
        labels_list.append(label)
        patient_ids.append(patient_id)  # Repeat patient ID for each segment

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels_list)

# Convert lists to numpy arrays and then to PyTorch tensors
X = torch.tensor(np.array(data_list), dtype=torch.float32) 
y = torch.tensor(y_encoded, dtype=torch.long) 
patient_ids = np.array(patient_ids)  # Keep patient IDs as numpy array for easier indexing

# Reshape X to include a channel dimension
X = X.unsqueeze(1)  # Shape: (num_samples, 1, sequence_length)

In [9]:
from sklearn.model_selection import LeaveOneGroupOut
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Define the number of epochs
num_epochs = 10
batch_size = 8

# Model configuration
config = {
    'Data_shape': (X.shape[0], 1, X.shape[2]),
    'emb_size': 32,  
    'num_heads': 4,  
    'dim_ff': 64,  
    'dropout': 0.1,
    'Fix_pos_encode': 'tAPE',
    'Rel_pos_encode': 'Scalar',
    'num_labels': 5
}


# Initialise the LOPO cross-validator
logo = LeaveOneGroupOut()

# Loop over each patient
for train_idx, test_idx in logo.split(X, y, groups=patient_ids):
    # Split the data into training and testing sets
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Convert to PyTorch dataset
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize the ConvTran model
    model = ConvTran(config, num_classes=config['num_labels'])

    # Train the model
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        all_preds = []
        for batch_X, _ in test_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            all_preds.append(predicted)
        all_preds = torch.cat(all_preds)
        accuracy = (all_preds == y_test).sum().item() / y_test.size(0)
        print(f'Patient ID: {patient_ids[test_idx[0]]}, Accuracy: {accuracy * 100:.2f}%')

  return F.conv2d(input, weight, bias, self.stride,


Patient ID: L05200708_eeg_label.csv, Accuracy: 59.15%
Patient ID: L05211742_eeg_label.csv, Accuracy: 67.87%
Patient ID: L05250816_eeg_label.csv, Accuracy: 64.89%
Patient ID: L05250921_eeg_label.csv, Accuracy: 37.66%
Patient ID: L05271431_eeg_label.csv, Accuracy: 46.33%
Patient ID: L05281010_eeg_label.csv, Accuracy: 46.37%
Patient ID: L06101015_eeg_label.csv, Accuracy: 68.83%
Patient ID: L06181302_eeg_label.csv, Accuracy: 34.88%
Patient ID: L06181332_eeg_label.csv, Accuracy: 39.62%
Patient ID: L06221009_eeg_label.csv, Accuracy: 32.14%
Patient ID: L06221141_eeg_label.csv, Accuracy: 63.64%
Patient ID: L06221219_eeg_label.csv, Accuracy: 19.05%
Patient ID: L08181442_eeg_label.csv, Accuracy: 32.98%
Patient ID: L08190811_eeg_label.csv, Accuracy: 53.85%
Patient ID: L08190921_eeg_label.csv, Accuracy: 68.06%


#### 2 sescond

In [14]:
# Load data
file_paths = glob.glob('eeg_label/*_eeg_label.csv')

data_list = []
labels_list = []
patient_ids = []

for file_path in file_paths:
    eeg_data = pd.read_csv(file_path)
    patient_id = file_path.split('/')[-1]
    
    # Segment the data into non-overlapping 30-second windows
    segment_size = 128 * 2
    num_segments = len(eeg_data) // segment_size
    
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = start_idx + segment_size
        segment = eeg_data['EEG'].iloc[start_idx:end_idx].values
        label = eeg_data['Label'].iloc[start_idx:end_idx].mode()[0]
        
        data_list.append(segment)
        labels_list.append(label)
        patient_ids.append(patient_id)  # Repeat patient ID for each segment

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels_list)

# Convert lists to numpy arrays and then to PyTorch tensors
X = torch.tensor(np.array(data_list), dtype=torch.float32) 
y = torch.tensor(y_encoded, dtype=torch.long)  # Shape: (num_samples,)
patient_ids = np.array(patient_ids)  # Keep patient IDs as numpy array for easier indexing

# Reshape X to include a channel dimension
X = X.unsqueeze(1)  # Shape: (num_samples, 1, sequence_length)

In [17]:
all_preds = []
all_labels = []

# Define the number of epochs
num_epochs = 10
batch_size = 8

# Model configuration
config = {
    'Data_shape': (X.shape[0], 1, X.shape[2]),
    'emb_size': 32,  
    'num_heads': 4,  
    'dim_ff': 64,  
    'dropout': 0.1,
    'Fix_pos_encode': 'tAPE',
    'Rel_pos_encode': 'Scalar',
    'num_labels': 5
}

# Initialise the LOPO cross-validator
logo = LeaveOneGroupOut()

# Loop over each patient
for train_idx, test_idx in logo.split(X, y, groups=patient_ids):
    # Split the data into training and testing sets
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Convert to PyTorch dataset
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Initialize the ConvTran model
    model = ConvTran(config, num_classes=config['num_labels'])

    # Train the model
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        test_preds = []
        for batch_X, _ in test_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            test_preds.append(predicted)
        test_preds = torch.cat(test_preds)

        # Collect predictions and true labels for overall metrics
        all_preds.extend(test_preds.cpu().numpy())  # Convert to numpy
        all_labels.extend(y_test.cpu().numpy())  # Convert to numpy

# Calculate overall accuracy
overall_accuracy = accuracy_score(all_labels, all_preds)
print(f'Overall Accuracy: {overall_accuracy * 100:.2f}%')

# Calculate overall F1 score (weighted)
overall_f1_score = f1_score(all_labels, all_preds, average='weighted')
print(f'Overall F1 Score (Weighted): {overall_f1_score:.2f}')

  return F.conv2d(input, weight, bias, self.stride,


Overall Accuracy: 47.55%
Overall F1 Score (Weighted): 0.42


In [7]:
# config = {
#     'Data_shape': (64, 1, 128),  # Example: (batch_size, channels, sequence_length)
#     'emb_size': 64,
#     'num_heads': 8,
#     'dim_ff': 128,
#     'dropout': 0.2,
#     'Fix_pos_encode': 'Learn',  # Can be 'Sin', 'Learn', or 'None'
#     'Rel_pos_encode': 'Scalar',  # Can be 'Scalar', 'Vector', or 'None'
#     'num_labels': 5,  # Number of classes
#     'Net_Type': 'C'  # 'T' for Transformer, 'CC-T' for CasualConvTran, 'C' for ConvTran
# }

'CC-T' for CasualConvTran, 'C' for ConvTran - these have higher accuracy then Transformer

number of input channels = 1 since EEG data is usually single-channel per electrode
30 seconds of EEG data at a sampling rate of 128 Hz

## XGB vs ConvTran

In [27]:
xgb_patient_ids = [
    'L05200708_eeg_label.csv', 'L05211742_eeg_label.csv', 'L05250816_eeg_label.csv', 
    'L05250921_eeg_label.csv', 'L05271431_eeg_label.csv', 'L05281010_eeg_label.csv', 
    'L06101015_eeg_label.csv', 'L06181302_eeg_label.csv', 'L06181332_eeg_label.csv', 
    'L06221009_eeg_label.csv', 'L06221141_eeg_label.csv', 'L06221219_eeg_label.csv', 
    'L08181442_eeg_label.csv', 'L08190811_eeg_label.csv', 'L08190921_eeg_label.csv'
]

xgb_accuracies = [
    73.24, 49.32, 60.64, 32.47, 58.72, 51.96,
    55.19, 16.28, 45.28, 15.18, 45.45, 21.43,
    34.04, 46.15, 45.83
]

convtran_accuracies = [
    59.15, 67.87, 64.89, 37.66, 46.33, 46.37, 
    68.83, 34.88, 39.62, 32.14, 63.64, 19.05, 
    32.98, 53.85, 68.06
]

df = pd.DataFrame({
    'Patient ID': xgb_patient_ids,
    'XGBoost Accuracy (%)': xgb_accuracies,
    'ConvTran Accuracy (%)': convtran_accuracies
})

print(df)

                 Patient ID  XGBoost Accuracy (%)  ConvTran Accuracy (%)
0   L05200708_eeg_label.csv                 73.24                  59.15
1   L05211742_eeg_label.csv                 49.32                  67.87
2   L05250816_eeg_label.csv                 60.64                  64.89
3   L05250921_eeg_label.csv                 32.47                  37.66
4   L05271431_eeg_label.csv                 58.72                  46.33
5   L05281010_eeg_label.csv                 51.96                  46.37
6   L06101015_eeg_label.csv                 55.19                  68.83
7   L06181302_eeg_label.csv                 16.28                  34.88
8   L06181332_eeg_label.csv                 45.28                  39.62
9   L06221009_eeg_label.csv                 15.18                  32.14
10  L06221141_eeg_label.csv                 45.45                  63.64
11  L06221219_eeg_label.csv                 21.43                  19.05
12  L08181442_eeg_label.csv                 34.04  

In [32]:
mean_accuracy_xgb = np.mean(xgb_accuracies)
print(f'XGB mean accuracy: {mean_accuracy_xgb:.2f}%')

mean_accuracy_ct = np.mean(convtran_accuracies)
print(f'ConvTran mean accuracy: {mean_accuracy_ct:.2f}%')

XGB mean accuracy: 43.41%
ConvTran mean accuracy: 49.02%


In [None]:
import optuna
from sklearn.model_selection import GroupKFold

def convtran_objective(trial):
    # Suggest hyperparameters for ConvTran
    emb_size = trial.suggest_int('emb_size', 16, 64, step=8)
    num_heads = trial.suggest_int('num_heads', 2, 4)
    dim_ff = trial.suggest_int('dim_ff', 64, 256, step=64)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)

    # Model configuration
    config = {
        'Data_shape': (X.shape[0], 1, X.shape[2]),
        'emb_size': emb_size,
        'num_heads': num_heads,
        'dim_ff': dim_ff,
        'dropout': dropout,
        'Fix_pos_encode': 'tAPE',
        'Rel_pos_encode': 'Scalar',
        'num_labels': 5
    }

    model = ConvTran(config, num_classes=config['num_labels'])
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    # GroupKFold for cross-validation
    gkf = GroupKFold(n_splits=5)
    accuracies = []

    for train_idx, test_idx in gkf.split(X, y, groups=patient_ids):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Create DataLoader
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

        # Training
        model.train()
        for epoch in range(10):
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test)
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == y_test).sum().item() / y_test.size(0)
            accuracies.append(accuracy)

    # Average accuracy
    avg_accuracy = sum(accuracies) / len(accuracies)

    # Log the hyperparameters and accuracy
    print(f'Trial Parameters: emb_size={emb_size}, num_heads={num_heads}, dim_ff={dim_ff}, dropout={dropout}')
    print(f'Average Accuracy: {avg_accuracy * 100:.2f}%')

    return avg_accuracy

# Run the optimization
study = optuna.create_study(direction='maximize')
study.optimize(convtran_objective, n_trials=20)

# Output the best hyperparameters
print("Best Parameters: ", study.best_params)

[I 2024-09-17 00:39:03,579] A new study created in memory with name: no-name-67ed364c-d44d-4acc-b845-06483091619d
  return F.conv2d(input, weight, bias, self.stride,
[I 2024-09-17 01:42:37,808] Trial 0 finished with value: 0.5100581321808303 and parameters: {'emb_size': 40, 'num_heads': 2, 'dim_ff': 64, 'dropout': 0.4955238853208461}. Best is trial 0 with value: 0.5100581321808303.


Trial Parameters: emb_size=40, num_heads=2, dim_ff=64, dropout=0.4955238853208461
Average Accuracy: 51.01%


[I 2024-09-17 02:40:31,591] Trial 1 finished with value: 0.5119252947807207 and parameters: {'emb_size': 16, 'num_heads': 2, 'dim_ff': 128, 'dropout': 0.26148163976167516}. Best is trial 1 with value: 0.5119252947807207.


Trial Parameters: emb_size=16, num_heads=2, dim_ff=128, dropout=0.26148163976167516
Average Accuracy: 51.19%


In [None]:
import optuna
from sklearn.model_selection import GroupKFold

def convtran_objective(trial):
    # Suggest hyperparameters for ConvTran
    emb_size = trial.suggest_int('emb_size', 16, 64, step=8)
    num_heads = trial.suggest_int('num_heads', 2, 4)
    dim_ff = trial.suggest_int('dim_ff', 64, 256, step=64)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)

    # Model configuration
    config = {
        'Data_shape': (X.shape[0], 1, X.shape[2]),
        'emb_size': emb_size,
        'num_heads': num_heads,
        'dim_ff': dim_ff,
        'dropout': dropout,
        'Fix_pos_encode': 'tAPE',
        'Rel_pos_encode': 'Scalar',
        'num_labels': 5
    }

    model = ConvTran(config, num_classes=config['num_labels'])
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    # GroupKFold for cross-validation
    gkf = GroupKFold(n_splits=5)
    accuracies = []

    for train_idx, test_idx in gkf.split(X, y, groups=patient_ids):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Create DataLoader
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

        # Training
        model.train()
        for epoch in range(10):
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            outputs = model(X_test)
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == y_test).sum().item() / y_test.size(0)
            accuracies.append(accuracy)

    # Average accuracy
    avg_accuracy = sum(accuracies) / len(accuracies)

    # Log the hyperparameters and accuracy
    print(f'Trial Parameters: emb_size={emb_size}, num_heads={num_heads}, dim_ff={dim_ff}, dropout={dropout}')
    print(f'Average Accuracy: {avg_accuracy * 100:.2f}%')

    return avg_accuracy

# Run the optimization
study = optuna.create_study(direction='maximize')
study.optimize(convtran_objective, n_trials=10)

In [13]:
# Ensure emb_size is defined before the assertion
emb_size = 32  # or any other appropriate value
num_heads = 4  # Set this to match the attention heads you are using in your model

# Ensure seq_len * emb_size is divisible by num_heads
seq_len = X.shape[2]
assert (seq_len * emb_size) % num_heads == 0, "Incompatible shape for attention!"
