## EEG-Based Classification Between Alzheimers and FTD

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mne
from scipy import signal
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from mne.decoding import CSP
import json
import os
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
# Set plotting style
plt.style.use('ggplot')
sns.set(font_scale=1.2)
sns.set_style('whitegrid')

np.random.seed(42)

### Load the Data

In [2]:
def loadData(
    fold_name: str,
    kfold_dict: dict,
    data_path: str,
    set_suffix: str = "_task-eyesclosed_eeg.set"
):
    """
    Load MNE Raw objects for all training and validation subjects in one fold.
    
    Parameters
    ----------
    fold_name : str
        e.g. 'fold_1', 'fold_2', etc., as found in kfold_dict.
    kfold_dict : dict
        The JSON structure loaded from kfold_splits.json
    data_path : str
        Base directory where the sub-XXX folders are found.
        Typically something like '/path/to/dataset/derivatives'
        if .set files live in a derivatives folder.
    set_suffix : str
        The filename pattern for the EEGLAB .set file, 
        e.g. '_task-eyesclosed_eeg.set'. Adjust as needed.
    
    Returns
    -------
    train_raws : list
        List of mne.io.Raw objects for all training subjects in this fold.
    val_raws : list
        List of mne.io.Raw objects for all validation subjects in this fold.
    """

    fold_data = kfold_dict[fold_name]
    train_subjects = fold_data["train"]
    val_subjects   = fold_data["validation"]

    train_raws = []
    val_raws   = []

    for subj_id in train_subjects:
        subject_folder = os.path.join(data_path, subj_id, "eeg")
        set_filename   = f"{subj_id}{set_suffix}"
        set_filepath   = os.path.join(subject_folder, set_filename)

        print(f"[{fold_name}] Loading TRAIN subject: {subj_id} -> {set_filepath}")
        raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
        train_raws.append(raw_obj)

    for subj_id in val_subjects:
        subject_folder = os.path.join(data_path, subj_id, "eeg")
        set_filename   = f"{subj_id}{set_suffix}"
        set_filepath   = os.path.join(subject_folder, set_filename)

        print(f"[{fold_name}] Loading VAL subject: {subj_id} -> {set_filepath}")
        raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
        val_raws.append(raw_obj)

    return train_raws, val_raws

if __name__ == "__main__":
    with open("alzheimers_vs_ftd_splits.json", "r") as f:
        kfold_splits = json.load(f)

    data_dir = "dataset/derivatives"

    fold_name = "fold_1"
    train_raws, val_raws = loadData(
        fold_name=fold_name,
        kfold_dict=kfold_splits,
        data_path=data_dir,
        set_suffix="_task-eyesclosed_eeg.set"
    )

[fold_1] Loading TRAIN subject: sub-021 -> dataset/derivatives/sub-021/eeg/sub-021_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-026 -> dataset/derivatives/sub-026/eeg/sub-026_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-019 -> dataset/derivatives/sub-019/eeg/sub-019_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-010 -> dataset/derivatives/sub-010/eeg/sub-010_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-017 -> dataset/derivatives/sub-017/eeg/sub-017_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-028 -> dataset/derivatives/sub-028/eeg/sub-028_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-016 -> dataset/derivatives/sub-016/eeg/sub-016_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-029 -> dataset/derivatives/sub-029/eeg/sub-029_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-011 -> dataset/derivatives/sub-011/eeg/sub-011_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-027 -> dataset/derivatives/sub-027/eeg/sub-027_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-018 -> dataset/derivatives/sub-018/eeg/sub-018_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-020 -> dataset/derivatives/sub-020/eeg/sub-020_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-033 -> dataset/derivatives/sub-033/eeg/sub-033_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-034 -> dataset/derivatives/sub-034/eeg/sub-034_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-035 -> dataset/derivatives/sub-035/eeg/sub-035_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-004 -> dataset/derivatives/sub-004/eeg/sub-004_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-025 -> dataset/derivatives/sub-025/eeg/sub-025_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-014 -> dataset/derivatives/sub-014/eeg/sub-014_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-012 -> dataset/derivatives/sub-012/eeg/sub-012_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-015 -> dataset/derivatives/sub-015/eeg/sub-015_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-023 -> dataset/derivatives/sub-023/eeg/sub-023_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-024 -> dataset/derivatives/sub-024/eeg/sub-024_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-006 -> dataset/derivatives/sub-006/eeg/sub-006_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-001 -> dataset/derivatives/sub-001/eeg/sub-001_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-008 -> dataset/derivatives/sub-008/eeg/sub-008_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-031 -> dataset/derivatives/sub-031/eeg/sub-031_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-036 -> dataset/derivatives/sub-036/eeg/sub-036_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-007 -> dataset/derivatives/sub-007/eeg/sub-007_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-086 -> dataset/derivatives/sub-086/eeg/sub-086_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-072 -> dataset/derivatives/sub-072/eeg/sub-072_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-075 -> dataset/derivatives/sub-075/eeg/sub-075_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-081 -> dataset/derivatives/sub-081/eeg/sub-081_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-080 -> dataset/derivatives/sub-080/eeg/sub-080_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-074 -> dataset/derivatives/sub-074/eeg/sub-074_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-073 -> dataset/derivatives/sub-073/eeg/sub-073_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-087 -> dataset/derivatives/sub-087/eeg/sub-087_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-067 -> dataset/derivatives/sub-067/eeg/sub-067_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-069 -> dataset/derivatives/sub-069/eeg/sub-069_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-078 -> dataset/derivatives/sub-078/eeg/sub-078_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-076 -> dataset/derivatives/sub-076/eeg/sub-076_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-082 -> dataset/derivatives/sub-082/eeg/sub-082_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-085 -> dataset/derivatives/sub-085/eeg/sub-085_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-071 -> dataset/derivatives/sub-071/eeg/sub-071_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-084 -> dataset/derivatives/sub-084/eeg/sub-084_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-083 -> dataset/derivatives/sub-083/eeg/sub-083_task-eyesclosed_eeg.set
[fold_1] Loading TRAIN subject: sub-077 -> dataset/derivatives/sub-077/eeg/sub-077_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading TRAIN subject: sub-079 -> dataset/derivatives/sub-079/eeg/sub-079_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-002 -> dataset/derivatives/sub-002/eeg/sub-002_task-eyesclosed_eeg.set
[fold_1] Loading VAL subject: sub-005 -> dataset/derivatives/sub-005/eeg/sub-005_task-eyesclosed_eeg.set
[fold_1] Loading VAL subject: sub-032 -> dataset/derivatives/sub-032/eeg/sub-032_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-003 -> dataset/derivatives/sub-003/eeg/sub-003_task-eyesclosed_eeg.set
[fold_1] Loading VAL subject: sub-022 -> dataset/derivatives/sub-022/eeg/sub-022_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-013 -> dataset/derivatives/sub-013/eeg/sub-013_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-030 -> dataset/derivatives/sub-030/eeg/sub-030_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-009 -> dataset/derivatives/sub-009/eeg/sub-009_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-088 -> dataset/derivatives/sub-088/eeg/sub-088_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)
  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-068 -> dataset/derivatives/sub-068/eeg/sub-068_task-eyesclosed_eeg.set
[fold_1] Loading VAL subject: sub-066 -> dataset/derivatives/sub-066/eeg/sub-066_task-eyesclosed_eeg.set


  raw_obj = mne.io.read_raw_eeglab(set_filepath, preload=True)


[fold_1] Loading VAL subject: sub-070 -> dataset/derivatives/sub-070/eeg/sub-070_task-eyesclosed_eeg.set


In [3]:
train_raws
sfreq = train_raws[0].info['sfreq']
print("Sampling frequency:", sfreq)

Sampling frequency: 500.0


### Clean the Data

In [4]:
def extract_epochs_from_raw(raw, duration=1.0, tmin=0.0):
    """
    Extract epochs from a continuous Raw object.
    If no valid events (other than 'boundary') are found,
    fixed-length epochs are created.
    
    Parameters
    ----------
    raw : mne.io.Raw object
        The continuous EEG data.
    duration : float
        Duration (in seconds) of each epoch.
    tmin : float
        Start time relative to the epoch (default is 0).
    
    Returns
    -------
    epochs : mne.Epochs object
        The extracted epochs.
    """
    events, event_id = mne.events_from_annotations(raw, verbose=False)
    
    if events.size == 0:
        print("No valid events found (only boundaries). Creating fixed-length epochs.")
        epochs = mne.make_fixed_length_epochs(raw, duration=duration, overlap=0, preload=True)
    else:
        tmax = tmin + duration
        epochs = mne.Epochs(raw, events, event_id, tmin=tmin, tmax=tmax,
                            baseline=None, preload=True, verbose=False)
    return epochs

def extract_epochs_from_all(raw_list, duration=1.0):
    """
    Process a list of Raw objects, extract epochs from each, crop them to have a consistent
    number of time points, and concatenate them.
    
    Parameters
    ----------
    raw_list : list of mne.io.Raw objects
        The list of subjects' EEG data.
    duration : float
        Duration (in seconds) of each epoch.
    
    Returns
    -------
    epochs_all : mne.Epochs object
        Concatenated epochs from all subjects with a consistent time dimension.
    """
    epochs_list = []
    for raw in raw_list:
        epochs = extract_epochs_from_raw(raw, duration=duration)
        sfreq = raw.info['sfreq']
        n_samples_expected = int(sfreq * duration)
        # Crop the data of each epoch to ensure consistent length
        data = epochs.get_data()[:, :, :n_samples_expected]
        # Create a new Epochs object from the cropped data
        new_epochs = mne.EpochsArray(data, epochs.info, events=epochs.events, event_id=epochs.event_id)
        epochs_list.append(new_epochs)
    
    epochs_all = mne.concatenate_epochs(epochs_list)
    return epochs_all

epochs_train = extract_epochs_from_all(train_raws, duration=1.0)

X_train = epochs_train.get_data()      
y_train = epochs_train.events[:, -1]   

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

Not setting metadata
2 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
14 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
7 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
2 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
7 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
9 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
3 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Not setting metadata
3 matching events found
No baseli

In [5]:
print("Unique labels in y_train:", np.unique(y_train))

Unique labels in y_train: [1]


In [6]:
participants = pd.read_csv("dataset/participants.tsv", sep="\t")
print(participants.head())

group_dict = dict(zip(participants['participant_id'], participants['Group']))
print("Group mapping:", group_dict)

unique_groups = participants['Group'].unique()
print("Unique groups:", unique_groups)
label_map = {group: i for i, group in enumerate(unique_groups)}
print("Label map:", label_map)

train_subject_ids = [f"sub-{i:03d}" for i in range(1, len(train_raws)+1)]

def extract_epochs_from_raw(raw, duration=1.0, tmin=0.0):
    """
    Extract epochs from a Raw object. If no valid events (other than 'boundary') are found,
    create fixed-length epochs.
    """
    events, event_id = mne.events_from_annotations(raw, verbose=False)
    if events.size == 0:
        print(f"No valid events for subject {raw.info.get('subject_id', 'unknown')}. Creating fixed-length epochs.")
        epochs = mne.make_fixed_length_epochs(raw, duration=duration, overlap=0, preload=True)
    else:
        tmax = tmin + duration
        epochs = mne.Epochs(raw, events, event_id, tmin=tmin, tmax=tmax,
                            baseline=None, preload=True, verbose=False)
    return epochs

all_epochs_list = []
all_labels_list = []

for i, raw in enumerate(train_raws):
    subject_id = train_subject_ids[i]
    if subject_id not in group_dict:
        raise ValueError(f"Group for subject {subject_id} not found in participants.tsv")
    
    group = group_dict[subject_id]
    label = label_map[group]
    
    epochs = extract_epochs_from_raw(raw, duration=1.0)

    sfreq = raw.info['sfreq']
    n_samples_expected = int(sfreq * 1.0)
    data = epochs.get_data()[:, :, :n_samples_expected]
    new_epochs = mne.EpochsArray(data, epochs.info, events=epochs.events, event_id=epochs.event_id)
    
    all_epochs_list.append(new_epochs)
    subject_labels = np.full(new_epochs.get_data().shape[0], label)
    all_labels_list.append(subject_labels)

epochs_all = mne.concatenate_epochs(all_epochs_list)
X_train = epochs_all.get_data()   
y_train = np.concatenate(all_labels_list)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("Unique labels in y_train:", np.unique(y_train))

  participant_id Gender  Age Group  MMSE
0        sub-001      F   57     A    16
1        sub-002      F   78     A    22
2        sub-003      M   70     A    14
3        sub-004      F   67     A    20
4        sub-005      M   70     A    22
Group mapping: {'sub-001': 'A', 'sub-002': 'A', 'sub-003': 'A', 'sub-004': 'A', 'sub-005': 'A', 'sub-006': 'A', 'sub-007': 'A', 'sub-008': 'A', 'sub-009': 'A', 'sub-010': 'A', 'sub-011': 'A', 'sub-012': 'A', 'sub-013': 'A', 'sub-014': 'A', 'sub-015': 'A', 'sub-016': 'A', 'sub-017': 'A', 'sub-018': 'A', 'sub-019': 'A', 'sub-020': 'A', 'sub-021': 'A', 'sub-022': 'A', 'sub-023': 'A', 'sub-024': 'A', 'sub-025': 'A', 'sub-026': 'A', 'sub-027': 'A', 'sub-028': 'A', 'sub-029': 'A', 'sub-030': 'A', 'sub-031': 'A', 'sub-032': 'A', 'sub-033': 'A', 'sub-034': 'A', 'sub-035': 'A', 'sub-036': 'A', 'sub-037': 'C', 'sub-038': 'C', 'sub-039': 'C', 'sub-040': 'C', 'sub-041': 'C', 'sub-042': 'C', 'sub-043': 'C', 'sub-044': 'C', 'sub-045': 'C', 'sub-046': 'C', 's

In [7]:
participants = pd.read_csv("dataset/participants.tsv", sep="\t")
print(participants.head())

group_dict = dict(zip(participants['participant_id'], participants['Group']))
print("Group mapping:", group_dict)

unique_groups = participants['Group'].unique()
print("Unique groups:", unique_groups)
label_map = {group: i for i, group in enumerate(unique_groups)}
print("Label map:", label_map)

train_subject_ids = [f"sub-{i:03d}" for i in range(1, len(train_raws)+1)]
val_subject_ids = [f"sub-{i:03d}" for i in range(1, len(val_raws)+1)]

def extract_epochs_from_raw(raw, duration=1.0, tmin=0.0):
    """
    Extract epochs from a Raw object. If no valid events (other than 'boundary') are found,
    create fixed-length epochs.
    """
    events, event_id = mne.events_from_annotations(raw, verbose=False)
    if events.size == 0:
        print(f"No valid events for subject {raw.info.get('subject_id', 'unknown')}. Creating fixed-length epochs.")
        epochs = mne.make_fixed_length_epochs(raw, duration=duration, overlap=0, preload=True)
    else:
        tmax = tmin + duration
        epochs = mne.Epochs(raw, events, event_id, tmin=tmin, tmax=tmax,
                            baseline=None, preload=True, verbose=False)
    return epochs

all_epochs_list = []
all_labels_list = []

for i, raw in enumerate(train_raws):
    subject_id = train_subject_ids[i]
    if subject_id not in group_dict:
        raise ValueError(f"Group for subject {subject_id} not found in participants.tsv")
    
    group = group_dict[subject_id]
    label = label_map[group]

    epochs = extract_epochs_from_raw(raw, duration=1.0)

    sfreq = raw.info['sfreq']
    n_samples_expected = int(sfreq * 1.0)
    data = epochs.get_data()[:, :, :n_samples_expected]
    new_epochs = mne.EpochsArray(data, epochs.info, events=epochs.events, event_id=epochs.event_id)
    
    all_epochs_list.append(new_epochs)
    subject_labels = np.full(new_epochs.get_data().shape[0], label)
    all_labels_list.append(subject_labels)

epochs_all = mne.concatenate_epochs(all_epochs_list)
X_train = epochs_all.get_data()       
y_train = np.concatenate(all_labels_list)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("Unique labels in y_train:", np.unique(y_train))

for i, raw in enumerate(val_raws):
    subject_id = val_subject_ids[i]
    if subject_id not in group_dict:
        raise ValueError(f"Group for subject {subject_id} not found in participants.tsv")
    
    group = group_dict[subject_id]
    label = label_map[group]
    
    epochs = extract_epochs_from_raw(raw, duration=1.0)

    sfreq = raw.info['sfreq']
    n_samples_expected = int(sfreq * 1.0)
    data = epochs.get_data()[:, :, :n_samples_expected]
    new_epochs = mne.EpochsArray(data, epochs.info, events=epochs.events, event_id=epochs.event_id)
    
    all_epochs_list.append(new_epochs)
    subject_labels = np.full(new_epochs.get_data().shape[0], label)
    all_labels_list.append(subject_labels)

epochs_all = mne.concatenate_epochs(all_epochs_list)
X_val = epochs_all.get_data()      
y_val = np.concatenate(all_labels_list)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print("Unique labels in y_val:", np.unique(y_val))

  participant_id Gender  Age Group  MMSE
0        sub-001      F   57     A    16
1        sub-002      F   78     A    22
2        sub-003      M   70     A    14
3        sub-004      F   67     A    20
4        sub-005      M   70     A    22
Group mapping: {'sub-001': 'A', 'sub-002': 'A', 'sub-003': 'A', 'sub-004': 'A', 'sub-005': 'A', 'sub-006': 'A', 'sub-007': 'A', 'sub-008': 'A', 'sub-009': 'A', 'sub-010': 'A', 'sub-011': 'A', 'sub-012': 'A', 'sub-013': 'A', 'sub-014': 'A', 'sub-015': 'A', 'sub-016': 'A', 'sub-017': 'A', 'sub-018': 'A', 'sub-019': 'A', 'sub-020': 'A', 'sub-021': 'A', 'sub-022': 'A', 'sub-023': 'A', 'sub-024': 'A', 'sub-025': 'A', 'sub-026': 'A', 'sub-027': 'A', 'sub-028': 'A', 'sub-029': 'A', 'sub-030': 'A', 'sub-031': 'A', 'sub-032': 'A', 'sub-033': 'A', 'sub-034': 'A', 'sub-035': 'A', 'sub-036': 'A', 'sub-037': 'C', 'sub-038': 'C', 'sub-039': 'C', 'sub-040': 'C', 'sub-041': 'C', 'sub-042': 'C', 'sub-043': 'C', 'sub-044': 'C', 'sub-045': 'C', 'sub-046': 'C', 's

### SVM Model

In [8]:
selected_features = [
    "ch0_beta", "ch0_gamma", "ch2_beta", "ch2_gamma", "ch8_alpha", 
    "ch9_alpha", "ch12_beta", "ch12_gamma", "ch13_beta", "ch13_gamma",
    "ch14_alpha", "ch15_alpha", "ch8_alpha_beta_ratio", 
    "ch9_alpha_beta_ratio", "ch14_alpha_beta_ratio", 
    "ch15_alpha_beta_ratio", "CSP_0", "CSP_1", "CSP_2", "CSP_3"
]

X_train_flat = X_train.reshape(X_train.shape[0], -1)

selected_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 
X_train_selected = X_train_flat[:, selected_indices]


In [9]:
from sklearn.model_selection import train_test_split

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_selected, y_train, test_size=0.2, random_state=42
)

In [20]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

svm_model = SVC(kernel='rbf', C=10, gamma='scale', random_state=42)
svm_model.fit(X_train_split, y_train_split)

y_val_pred = svm_model.predict(X_val_split)

print("Validation Accuracy:", accuracy_score(y_val_split, y_val_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))

Validation Accuracy: 0.6733031674208145
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.03      0.07      3825
           1       0.50      0.98      0.67      3823

    accuracy                           0.51      7648
   macro avg       0.60      0.51      0.37      7648
weighted avg       0.60      0.51      0.37      7648



In [11]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5)
grid_search.fit(X_train_selected, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score:", grid_search.best_score_)

Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Cross-Validation Score: 0.6925593481539772


In [14]:
X_val_flat = X_val.reshape(X_val.shape[0], -1)

selected_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
X_val_selected = X_val_flat[:, selected_indices]
y_pred = svm_model.predict(X_val_selected)

In [15]:
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

Validation Accuracy: 0.5022228033472803


### Random Forest Classifier Model

In [19]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_selected, y_train)

y_val_pred_rf = rf_model.predict(X_val_selected)
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred_rf))
print("Classification Report:\n", classification_report(y_val, y_pred))

Validation Accuracy: 0.758760460251046
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.03      0.07      3825
           1       0.50      0.98      0.67      3823

    accuracy                           0.51      7648
   macro avg       0.60      0.51      0.37      7648
weighted avg       0.60      0.51      0.37      7648



### Decision Tree Classifier Model

In [17]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_selected, y_train)

y_pred = dt_model.predict(X_val_selected)

print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))

Validation Accuracy: 0.8143305439330544
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.63      0.77      3825
           1       0.73      1.00      0.84      3823

    accuracy                           0.81      7648
   macro avg       0.86      0.81      0.81      7648
weighted avg       0.86      0.81      0.81      7648

