## Hybrid approaches (Egan)
This notebook presents the hybrid approaches to Egan his decoding. It first starts with combining alpha and P300. This approach follows a simple pipeline.

In [73]:
def extracting_ERP(X, y, z, V, fs, epoch_size, step_size, time_moment):
    """
    Extract ERP features at a specific time point from all channels.

    Parameters
    ----------
    X : np.ndarray
        EEG data, shape (trials, channels, samples)
    y : np.ndarray
        Labels for each trial, shape (trials,)
    z : np.ndarray
        Target presence info, shape (trials, epochs, sides)
    V : np.ndarray
        Code cycle template (unused here, but required for compatibility)
    fs : float
        Sampling frequency
    epoch_size : float
        Duration of one epoch (unused here)
    step_size : float
        Step size between epochs (unused here)
    time_moment : float
        Time (in seconds) at which to extract the ERP feature

    Returns
    -------
    X_erp : np.ndarray
        ERP feature matrix, shape (n_selected_trials, n_channels)
    y_erp : np.ndarray
        Corresponding labels, shape (n_selected_trials,)
    """
    time_sample = int(round(time_moment * fs))  # Convert time to sample index
    X_erp = []
    y_erp = []

    for i in range(X.shape[0]):
        if not (np.any(z[i, :, 0]) or np.any(z[i, :, 1])):
            continue  # Skip if no target on left or right

        trial_features = X[i, :, time_sample]  # All channels
        X_erp.append(trial_features)
        y_erp.append(y[i])

    return np.array(X_erp), np.array(y_erp)

from scipy.signal import welch

def extracting_alpha(X, fs, fmin=8, fmax=12, nperseg=None):
    """
    Extract alpha band power features using Welch's PSD method for all channels.

    Parameters
    ----------
    X : np.ndarray
        EEG data, shape (trials, channels, samples)
    fs : float
        Sampling frequency
    fmin : float
        Lower frequency bound of the alpha band (default: 8 Hz)
    fmax : float
        Upper frequency bound of the alpha band (default: 12 Hz)
    nperseg : int or None
        Length of each segment for Welch's method (default: fs)

    Returns
    -------
    X_alpha : np.ndarray
        Alpha power features, shape (trials, n_channels)
    """
    if nperseg is None:
        nperseg = int(fs)  # 1-second window by default

    n_trials, n_channels, _ = X.shape
    X_alpha = np.zeros((n_trials, n_channels))

    for trial in range(n_trials):
        for ch in range(n_channels):
            f, psd = welch(X[trial, ch, :], fs=fs, nperseg=nperseg)
            alpha_band = (f >= fmin) & (f <= fmax)
            X_alpha[trial, ch] = np.mean(psd[alpha_band])

    return X_alpha



In [74]:
# Suppress all warnings
import mne
import os
from os.path import join
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import pyntbci
from sklearn.preprocessing import StandardScaler


mne.set_log_level('warning')

# Directory containing the preprocessed data
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Initialize results storage
results = []
fold_pr_auc = []
fold_correct_trials = []

biosemi_layout = mne.channels.make_standard_montage("biosemi64")
channel_names = biosemi_layout.ch_names

# Selecting the channels for alpha
desired_channels_alpha = ['F2', 'FT8', 'FC3', 'FC1']
channel_indices_alpha = [channel_names.index(ch) for ch in desired_channels_alpha]

# Selecting the channels for p300
desired_channels_p300 = ['AF3', 'F1', 'CP1', 'P1']
channel_indices_p300 = [channel_names.index(ch) for ch in desired_channels_p300]

# Parameters for P300 extraction
time_moment = 0.53  # Time in seconds for ERP extraction
epoch_size = 0.8  # seconds
step_size = 0.1   # seconds

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

# Load Data for each subject
for subject in subjects:
    print(f"Processing {subject}...")
    # Load preprocessed data
    fn = os.path.join(file_dir, f"sub-{subject}_task-covert_c-VEP+P300_ICA.npz")
    tmp = np.load(fn)

    X = tmp["X"]  # EEG data matrix (trials, channels, samples)
    y = tmp["y"]  # Labels
    z = tmp["z"]  # Target presence (trials, epochs, sides)
    V = tmp["V"]  # One code cycle (classes, samples)
    fs = tmp["fs"].flatten()[0]

    # Cross-validation setup
    fold_accuracies = []
    fold_roc_auc = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        # Extracting ERP features
        X_trn_ERP, y_trn_ERP = extracting_ERP(X_trn, y_trn, z_trn, V, fs, epoch_size, step_size, time_moment)
        X_tst_ERP, y_tst_ERP = extracting_ERP(X_tst, y_tst, z_tst, V, fs, epoch_size, step_size, time_moment)
        
        # Extracting alpha features
        X_trn_alpha = extracting_alpha(X_trn, fs)
        X_tst_alpha = extracting_alpha(X_tst, fs)

        # Concatenate ERP and alpha features
        X_trn_combined = np.concatenate([X_trn_ERP, X_trn_alpha], axis=1)
        X_tst_combined = np.concatenate([X_tst_ERP, X_tst_alpha], axis=1)

        # Normalize (z-score using training set statistics)
        scaler = StandardScaler()
        X_trn_combined = scaler.fit_transform(X_trn_combined)
        X_tst_combined = scaler.transform(X_tst_combined)

        # Train LDA classifier
        clf = LinearDiscriminantAnalysis()
        clf.fit(X_trn_combined, y_trn_ERP)

        # Predict
        y_pred = clf.predict(X_tst_combined)
        y_prob = clf.predict_proba(X_tst_combined)[:, 1]

        # Compute metrics
        acc = accuracy_score(y_tst_ERP, y_pred)
        auc = roc_auc_score(y_tst_ERP, y_prob)

        fold_accuracies.append(acc)
        fold_roc_auc.append(auc)
        print(f"    Accuracy: {acc:.3f}, AUC: {auc:.3f}")
        
    print(f"  Mean accuracy: {np.mean(fold_accuracies):.3f}, Mean AUC: {np.mean(fold_roc_auc):.3f}")
    results.append({
        "subject": subject,
        "accuracy": np.mean(fold_accuracies),
        "auc": np.mean(fold_roc_auc),
    })

# Compute grand average across subjects
all_accuracies = [r["accuracy"] for r in results]
all_aucs = [r["auc"] for r in results]

grand_avg_accuracy = np.mean(all_accuracies)
grand_avg_auc = np.mean(all_aucs)

print("\n===== Grand Average Results =====")
print(f"Grand Average Accuracy: {grand_avg_accuracy:.3f}")
print(f"Grand Average AUC: {grand_avg_auc:.3f}")

Processing VPpdia...
  Fold 1/4
    Accuracy: 0.600, AUC: 0.660
  Fold 2/4
    Accuracy: 0.600, AUC: 0.510
  Fold 3/4
    Accuracy: 0.700, AUC: 0.670
  Fold 4/4
    Accuracy: 0.600, AUC: 0.730
  Mean accuracy: 0.625, Mean AUC: 0.642
Processing VPpdib...
  Fold 1/4
    Accuracy: 0.600, AUC: 0.750
  Fold 2/4
    Accuracy: 0.700, AUC: 0.690
  Fold 3/4
    Accuracy: 0.800, AUC: 0.910
  Fold 4/4


KeyboardInterrupt: 

In [21]:
# Compute grand average across subjects
all_accuracies = [r["accuracy"] for r in results]
all_aucs = [r["auc"] for r in results]

grand_avg_accuracy = np.mean(all_accuracies)
grand_avg_auc = np.mean(all_aucs)

print("\n===== Grand Average Results =====")
print(f"Grand Average Accuracy: {grand_avg_accuracy:.3f}")
print(f"Grand Average AUC: {grand_avg_auc:.3f}")




===== Grand Average Results =====
Grand Average Accuracy: 0.712
Grand Average AUC: 0.770


In [47]:
print(X_trn_ERP.shape)


(120, 4)
