## c-VEP

In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Extract c-VEP RCCA features per trial per subject and task.
@author: Adapted from Jordy Thielen
"""

import os
import numpy as np
import joblib
import pyntbci

# Paths
data_dir = '/Users/juliette/Desktop/thesis/preprocessing/c-VEP_preprocessing/c-VEP_ICA'
save_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'
os.makedirs(save_dir, exist_ok=True)

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["overt", "covert"]

# RCCA settings
event = "dur"
onset_event = True
encoding_length = 0.3
ensemble = True

# Loop participants
for subject in subjects:
    print(f"{subject}", end="\t")

    for task in tasks:
        print(f"{task}: ", end="")

        # Load data
        fn = os.path.join(data_dir, f"sub-{subject}_task-{task}_ICA.npz")
        tmp = np.load(fn)
        fs = int(tmp["fs"])
        print("Before feature extraction:")
        X = tmp["X"]
        y = tmp["y"]
        V = tmp["V"]
        print("Shape of V:", V.shape)
        print("Shape of X:", X.shape)
        print("Shape of y:", y.shape)

        # Fit RCCA model
        rcca = pyntbci.classifiers.rCCA(stimulus=V, fs=fs, event=event,
                                        encoding_length=encoding_length,
                                        onset_event=onset_event, ensemble=ensemble)
        rcca.fit(X, y)

        # Extract features (project trials into canonical space)
        features = rcca.decision_function(X)
        features = features.reshape(features.shape[0], -1)  # shape: (n_trials, n_classes * n_components)
        print("n_components:", rcca.n_components)

        print("After feature extraction:")
        print("Shape of features:", features.shape)
#         save_path = os.path.join(save_dir, f"sub-{subject}_task-{task}_features.npz")
#         np.savez(save_path, features=features, y=y)
#         print(f"saved to {save_path}, shape: {features.shape}")


VPpdia	overt: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (20, 62, 2400)
Shape of y: (20,)
n_components: 1
After feature extraction:
Shape of features: (20, 2)
covert: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (80, 62, 2400)
Shape of y: (80,)
n_components: 1
After feature extraction:
Shape of features: (80, 2)
VPpdib	overt: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (20, 61, 2400)
Shape of y: (20,)
n_components: 1
After feature extraction:
Shape of features: (20, 2)
covert: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (80, 61, 2400)
Shape of y: (80,)
n_components: 1
After feature extraction:
Shape of features: (80, 2)
VPpdic	overt: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (20, 63, 2400)
Shape of y: (20,)
n_components: 1
After feature extraction:
Shape of features: (20, 2)
covert: Before feature extraction:
Shape of V: (2, 252)
Shape of X: (80, 63, 2400)
Shape of y: (80,)
n_components: 1
After feature e

KeyboardInterrupt: 

## Alpha

In [55]:
import os
import numpy as np
from scipy.signal import butter, sosfilt, hilbert
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.covariance import LedoitWolf
from mne.decoding import CSP
import warnings
warnings.filterwarnings("ignore")

# Bandpass filter function
def bandpass_filter(data, lowcut, highcut, fs, order=4):
    """
    Apply a bandpass filter to the data.
    """
    sos = butter(order, [lowcut, highcut], btype='band', fs=fs, output='sos')
    return sosfilt(sos, data, axis=-1)

# Hilbert transform feature function
def compute_average_hilbert_amplitude(data):
    """
    Compute log-mean amplitude using Hilbert transform.
    """
    analytic = hilbert(data, axis=2)
    amplitude = np.abs(analytic)
    mean_amplitude = amplitude.mean(axis=2)
    return np.log(mean_amplitude)

# Parameters
task = "covert"
n_comp = 4  # number of CSP components
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
decoding_results_dir = '/Users/juliette/Desktop/thesis/features/alpha'

# Loop through subjects
for subject in subjects:
    print(f"Processing subject {subject}")
    
    # File paths
    file_dir = '/Users/juliette/Desktop/thesis/preprocessing/alpha_preprocessing/alpha_ICA'
    file_path = os.path.join(file_dir, f"sub-{subject}_task-{task}_alpha_ICA.npz")

    # Check file exists
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        continue

    # Load data
    npz_data = np.load(file_path)
    X = npz_data['X']  # EEG data: trials x channels x samples
    y = npz_data['y']  # Labels: trials
    fs = npz_data['fs'].flatten()[0]  # Sampling frequency as integer

    print("Shape of X:", X.shape)

    # Preprocessing
    X = bandpass_filter(X, 8, 12, fs=fs)  # Alpha band filtering
    X = X[:, :, 120:-120]  # Remove edge artifacts

    # CSP and feature extraction
    csp = CSP(n_components=n_comp, reg=0.01, log=None, transform_into='csp_space')
    X_csp = csp.fit_transform(X, y)  # Apply CSP
    features = compute_average_hilbert_amplitude(X_csp)  # Extract features

    print(f"Extracted features shape for subject {subject}: {features.shape}")

    # Save features
    if not os.path.exists(decoding_results_dir):
        os.makedirs(decoding_results_dir)

    out_path = os.path.join(decoding_results_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
    np.savez(out_path, features=features, labels=y)

    print(f"Saved features for subject {subject} to {out_path}")


Processing subject VPpdia
Shape of X: (80, 62, 10239)
Computing rank from data with rank=None
    Using tolerance 0.00013 (2.2e-16 eps * 62 dim * 9.3e+09  max singular value)
    Estimated rank (mag): 62
    MAG: rank 62 computed from 62 data channels with 0 projectors
Reducing data rank from 62 -> 62
Estimating covariance using SHRINKAGE
Done.
Computing rank from data with rank=None
    Using tolerance 0.00013 (2.2e-16 eps * 62 dim * 9.5e+09  max singular value)
    Estimated rank (mag): 62
    MAG: rank 62 computed from 62 data channels with 0 projectors
Reducing data rank from 62 -> 62
Estimating covariance using SHRINKAGE
Done.
Extracted features shape for subject VPpdia: (80, 4)
Saved features for subject VPpdia to /Users/juliette/Desktop/thesis/features/alpha/sub-VPpdia_task-covert_alpha_features.npz
Processing subject VPpdib
Shape of X: (80, 61, 10239)
Computing rank from data with rank=None
    Using tolerance 0.00051 (2.2e-16 eps * 61 dim * 3.8e+10  max singular value)
    Est

Shape of X: (80, 63, 10239)
Computing rank from data with rank=None
    Using tolerance 0.00013 (2.2e-16 eps * 63 dim * 9.6e+09  max singular value)
    Estimated rank (mag): 63
    MAG: rank 63 computed from 63 data channels with 0 projectors
Reducing data rank from 63 -> 63
Estimating covariance using SHRINKAGE
Done.
Computing rank from data with rank=None
    Using tolerance 0.00013 (2.2e-16 eps * 63 dim * 9.5e+09  max singular value)
    Estimated rank (mag): 63
    MAG: rank 63 computed from 63 data channels with 0 projectors
Reducing data rank from 63 -> 63
Estimating covariance using SHRINKAGE
Done.
Extracted features shape for subject VPpdik: (80, 4)
Saved features for subject VPpdik to /Users/juliette/Desktop/thesis/features/alpha/sub-VPpdik_task-covert_alpha_features.npz
Processing subject VPpdil
Shape of X: (80, 63, 10239)
Computing rank from data with rank=None
    Using tolerance 0.00016 (2.2e-16 eps * 63 dim * 1.2e+10  max singular value)
    Estimated rank (mag): 63
    

Computing rank from data with rank=None
    Using tolerance 0.00011 (2.2e-16 eps * 63 dim * 8e+09  max singular value)
    Estimated rank (mag): 63
    MAG: rank 63 computed from 63 data channels with 0 projectors
Reducing data rank from 63 -> 63
Estimating covariance using SHRINKAGE
Done.
Computing rank from data with rank=None
    Using tolerance 0.00011 (2.2e-16 eps * 63 dim * 8.1e+09  max singular value)
    Estimated rank (mag): 63
    MAG: rank 63 computed from 63 data channels with 0 projectors
Reducing data rank from 63 -> 63
Estimating covariance using SHRINKAGE
Done.
Extracted features shape for subject VPpdiu: (80, 4)
Saved features for subject VPpdiu to /Users/juliette/Desktop/thesis/features/alpha/sub-VPpdiu_task-covert_alpha_features.npz
Processing subject VPpdiv
Shape of X: (80, 60, 10239)
Computing rank from data with rank=None
    Using tolerance 0.00035 (2.2e-16 eps * 60 dim * 2.6e+10  max singular value)
    Estimated rank (mag): 60
    MAG: rank 60 computed from 60 

## P300
In a different script, the features were already extracted. Therefore, these can be loaded in directly.

In [56]:
def balance_classes(X, y, ratio_0_to_1=1.0):
    
    """
    Sub-select X and y based on a specified ratio of 0s to 1s, keeping the original order.

    Parameters:
    X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
    y (numpy.ndarray): Label vector of shape (n_samples,).
    ratio_0_to_1 (float): The desired ratio of 0s to 1s in the balanced dataset.

    Returns:
    X_balanced, y_balanced: Sub-selected feature matrix and label vector.
    """
    # Step 1: Identify indices of 0s and 1s
    indices_0 = np.where(y == 0)[0]
    indices_1 = np.where(y == 1)[0]
    
    # Step 2: Calculate the number of samples to select for each class
    num_1s = len(indices_1)
    num_0s = min(len(indices_0), int(num_1s * ratio_0_to_1))
    
    # Step 3: Randomly sample the desired number of 0s and 1s
    selected_indices_0 = np.random.choice(indices_0, num_0s, replace=False)
    selected_indices_1 = np.random.choice(indices_1, num_1s, replace=False)
    
    # Step 4: Combine selected indices and sort to preserve original order
    balanced_indices = np.sort(np.concatenate([selected_indices_0, selected_indices_1]))
    
    # Step 5: Sub-select X and y based on the balanced indices
    X_balanced = X[balanced_indices]
    y_balanced = y[balanced_indices]
    
    return X_balanced, y_balanced

def filter_valid_epochs(X, y, z=None, return_mask=False):
    """
    Filters out epochs where either the features in X or the labels in y contain NaN values.
    Optionally, if a z array is provided, it is filtered similarly.
    
    Parameters:
        X (np.ndarray): A 2D numpy array with shape (n_epochs, n_features).
        y (np.ndarray): A 1D numpy array with shape (n_epochs,).
        z (np.ndarray, optional): An array that will be filtered using the same mask.
        return_mask (bool, optional): If True, the boolean mask used for filtering is returned.
    
    Returns:
        filtered_X (np.ndarray): X with only rows that have no NaN values.
        filtered_y (np.ndarray): y with only entries corresponding to valid epochs.
        filtered_z (np.ndarray or None): Filtered z array (if provided) or None.
        mask (np.ndarray, optional): The boolean mask of valid epochs; only returned if return_mask=True.
    """
    # Create a mask for valid labels and features
    valid_label_mask = ~np.isnan(y)
    valid_feature_mask = ~np.isnan(X).any(axis=1)
    combined_mask = valid_label_mask & valid_feature_mask

    # Apply the mask to X and y
    filtered_X = X[combined_mask]
    filtered_y = y[combined_mask]
    
    if z is not None:
        filtered_z = z[combined_mask]
    else:
        filtered_z = None

    if return_mask:
        return filtered_X, filtered_y, filtered_z, combined_mask
    else:
        return filtered_X, filtered_y, filtered_z

## Feature concatenation
In the two cells below, the three features are concatenated. However, I faced a few issues because of the dimensions of the features, the features have the following dimensions:

    X_alpha: (trials, features)
    X_cvep: (trials, features)
    X_p300: (trials, epochs, channels, features)
    
The dimensions of X_p300 do not match those of X_alpha and X_cvep, so I tried a couple of different approaches. Each approach corresponds to the cells below, in order.

1. This approach loads all the separate data, and then first preprocessed X_p300 in the following way. It first reshapes the data into (trials * epochs, channels * features). X_p300 is filtered to have only valid entries. Then, it finds the minimum amount of trials in all three variables, for X_alpha and X_p300 this is the same, as they both have 80 trials. Because X_p300 exceeds this, it only selects the first 80 trials (which should correspond to only the trials, and ignore the epochs).

        This approach has a grand average accuracy of 0.9125.
        
2. The second approach loads the data, and then takes the average of the data across all epochs for each trial, so that each trial will only have one value per channel instead of multiple values per epoch. The shape changes from (n_trials, n_epochs, n_channels, n_features) to (n_trials, n_channels, n_features). In the next step, the data is flattened to (n_trials, n_channels * n_features). In the cross-validation, the NaN values are set to 0 to ensure valid processing (instead of calling the filter_valid_epochs function).

        This approach has a grand average accuracy of 0.9125.
        
3. The third approach is more similar, it calculates the average of the data across all epochs for each trial, the shape becomes (n_trials, n_channels, n_features). Each trial now has a single value per channel for the features, rather than one value per epoch. Then, it calculates the average across all channels for each trial. The shape becomes (n_trials, n_features). Each trial now has one single feature vector, where the values for each feature are the average of all the channels

        This approach has a grand average accuracy of 0.862.

Furthermore, in the cross-validation y_cvep is used, however, y_alpha can also be used since they are the same anyways. On the other hand, y_p300 can also be used, but then more preprocessing steps have to be done because of the differences in dimensionality. Therefore, a more simpler approach such as using y_cvep is chosen.

In [3]:
import os
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.covariance import LedoitWolf

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]

# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

        # Reshape P300 data to (trials * epochs, channels * features)
        X_p300 = X_p300.reshape(-1, X_p300.shape[2] * X_p300.shape[3])
        print()

        # Extract labels for training epochs using z and y 
        trial_indices_trn = np.arange(len(y_p300))  # indices for trials
        y_p300 = z_p300[trial_indices_trn, :, y_p300].reshape(-1)
        
        # Filter training epochs
        X_p300, y_p300, _ = filter_valid_epochs(X_p300, y_p300)        

        # Check shapes of each feature matrix
        print(f"X_p300 shape: {X_p300.shape}")
        print(f"X_alpha shape: {X_alpha.shape}")
        print(f"X_cvep shape: {X_cvep.shape}")
        
        # Ensure all feature matrices have the same number of trials
        min_trials = min(X_p300.shape[0], X_alpha.shape[0], X_cvep.shape[0])
        
        # Trim data to have the same number of trials (min_trials)
        X_p300 = X_p300[:min_trials, :]
        
        print("Shapes after trimming:")
        print(f"X_p300 shape: {X_p300.shape}")
        print(f"X_alpha shape: {X_alpha.shape}")
        print(f"X_cvep shape: {X_cvep.shape}")

        # Concatenate features
        X_combined = np.concatenate([X_p300, X_alpha, X_cvep], axis=1)
        print("Shape of X_combined:", X_combined.shape)
        
        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        print(f"Average accuracy over all folds: {average_accuracy}")


Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 62, 6)

X_p300 shape: (6252, 372)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shapes after trimming:
X_p300 shape: (80, 372)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 378)
  Fold 1/4
Fold 1 accuracy: 0.75
  Fold 2/4
Fold 2 accuracy: 0.85
  Fold 3/4
Fold 3 accuracy: 0.6
  Fold 4/4
Fold 4 accuracy: 0.85
Average accuracy over all folds: 0.7625000000000001
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 61, 6)

X_p300 shape: (5325, 366)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shapes after trimming:
X_p300 shape: (80, 366)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 372)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy 

Fold 2 accuracy: 0.95
  Fold 3/4
Fold 3 accuracy: 0.95
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all folds: 0.9375
Loading features for VPpdir, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 64, 6)

X_p300 shape: (5786, 384)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shapes after trimming:
X_p300 shape: (80, 384)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 390)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 0.95
  Fold 3/4
Fold 3 accuracy: 0.95
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 0.975
Loading features for VPpdis, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 64, 6)

X_p300 shape: (5939, 384)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shapes after trimming:
X_p300 shape: (80, 384)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 390)
  Fold 1/4
Fold 1 accuracy: 1.0

In [155]:
import os
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.covariance import LedoitWolf

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]

# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

        # Reshape P300 data
        # Average over epochs, this changes the shape to (n_trials, n_channels, n_features)
        X_p300 = np.mean(X_p300, axis=1)  # Average over the epochs (axis=1)
        
        # Reshape P300 data into (n_trials, channel * features)
        X_p300 = X_p300.reshape(X_p300.shape[0], -1)  # Flatten the channels and features
    

        # Check shapes of each feature matrix
        print(f"X_p300 shape: {X_p300.shape}")
        print(f"X_alpha shape: {X_alpha.shape}")
        print(f"X_cvep shape: {X_cvep.shape}")

        # Concatenate features
        X_combined = np.concatenate([X_p300, X_alpha, X_cvep], axis=1)
        print("Shape of X_combined:", X_combined.shape)
        
        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]
            
            # Replace NaN values with 0
            X_trn = np.nan_to_num(X_trn, nan=0)
            X_tst = np.nan_to_num(X_tst, nan=0)

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        print(f"Average accuracy over all folds: {average_accuracy}")


Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 62, 6)
X_p300 shape: (80, 372)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 378)
  Fold 1/4
Fold 1 accuracy: 0.75
  Fold 2/4
Fold 2 accuracy: 0.85
  Fold 3/4
Fold 3 accuracy: 0.6
  Fold 4/4
Fold 4 accuracy: 0.85
Average accuracy over all folds: 0.7625000000000001
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 61, 6)
X_p300 shape: (80, 366)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 372)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
Loading features for VPpdic, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 63, 6)
X_p300 shape: (80, 378)
X_alp

Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 0.85
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all folds: 0.925
Loading features for VPpdiv, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 60, 6)
X_p300 shape: (80, 360)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 366)
  Fold 1/4
Fold 1 accuracy: 0.95
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 0.9875
Loading features for VPpdiw, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 59, 6)
X_p300 shape: (80, 354)
X_alpha shape: (80, 4)
X_cvep shape: (80, 2)
Shape of X_combined: (80, 360)
  Fold 1/4
Fold 1 accuracy: 0.95
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all folds: 0.95
Loading features for VP

In [160]:
import os
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.covariance import LedoitWolf

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]

# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

            # Step 1: Average across the epochs for each trial
            X_p300_epochs_avg = np.mean(X_p300, axis=1)  # Shape will be (trials, channels, features)
            
            # Step 2: Average across the channels for each trial
            X_p300_final = np.mean(X_p300_epochs_avg, axis=1)  # Shape will be (trials, features)
            
            print(f"Reshaped p300 features: {X_p300_final.shape}")

        # Concatenate features from c-VEP, alpha, and P300
        X_combined = np.concatenate([X_p300_final, X_alpha, X_cvep], axis=1)
        print("Shape of X_combined:", X_combined.shape)

        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]
            
            # Replace NaN values with 0
            X_trn = np.nan_to_num(X_trn, nan=0)
            X_tst = np.nan_to_num(X_tst, nan=0)

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        print(f"Average accuracy over all folds: {average_accuracy}")


Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 62, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 0.7
  Fold 2/4
Fold 2 accuracy: 0.85
  Fold 3/4
Fold 3 accuracy: 0.55
  Fold 4/4
Fold 4 accuracy: 0.6
Average accuracy over all folds: 0.6749999999999999
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 61, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
Loading features for VPpdic, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 63, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
F

Loaded p300 features: (80, 80, 59, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 0.95
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all folds: 0.95
Loading features for VPpdix, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 64, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
Loading features for VPpdiy, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 64, 6)
Reshaped p300 features: (80, 6)
Shape of X_combined: (80, 12)
  Fold 1/4
Fold 1 accuracy: 0.95
  Fold 2/4
Fold 2 accuracy: 0.95
  Fold 3/4
Fold 3 accuracy: 0.95
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all fol

# Below is the one I obtained after the meeting with jordy

In [51]:
import os
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.covariance import LedoitWolf

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored
save_dir = '/Users/juliette/Desktop/thesis/results/hybrid_simple/alpha+p300+c-vep'

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]
subject_accuracies = []
subject_std = []



# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

        # Flatten z_p300 to (trials * epochs, sides)
        z_p300_flat = z_p300.reshape(-1, z_p300.shape[2])  # Shape: (trials * epochs, sides)

        # Find the number of epochs per trial
        epochs_per_trial = X_p300.shape[1]  # Number of epochs per trial

        # Initialize lists to store averaged epochs for each trial
        left_target_averaged_trials = []
        right_target_averaged_trials = []

        # Loop over each trial
        for i_trial in range(X_p300.shape[0]):
            # Extract the epochs for the current trial (shape: epochs x channels x features)
            trial_epochs = X_p300[i_trial]

            # Extract the target labels for the current trial (shape: epochs x 2)
            trial_targets = z_p300_flat[i_trial * epochs_per_trial: (i_trial + 1) * epochs_per_trial]

            # Average epochs for left target (assuming 1 = left target)
            left_target_epochs = trial_epochs[trial_targets[:, 0] == 1]
            if len(left_target_epochs) > 0:
                left_target_averaged = np.mean(left_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                left_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no left target, set to zeros

            # Average epochs for right target (assuming 1 = right target)
            right_target_epochs = trial_epochs[trial_targets[:, 1] == 1]
            if len(right_target_epochs) > 0:
                right_target_averaged = np.mean(right_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                right_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no right target, set to zeros

            # Store the averaged epochs for the current trial
            left_target_averaged_trials.append(left_target_averaged)
            right_target_averaged_trials.append(right_target_averaged)

        # Convert lists to numpy arrays
        left_target_averaged_trials = np.array(left_target_averaged_trials)
        right_target_averaged_trials = np.array(right_target_averaged_trials)
        print("SHAPE OF left_target_averaged_trials:", left_target_averaged_trials.shape)

        # Concatenate the averaged features from both groups (left and right)
        # Now it is 2 * channels, but you could also do 2 * features
        X_p300_averaged = np.concatenate([left_target_averaged_trials, right_target_averaged_trials], axis=2)

        print("SHAPE OF X_p300_averaged:", X_p300_averaged.shape)
        # Flatten the last two dimensions (channels and features)
        X_p300_averaged_flat = X_p300_averaged.reshape(X_p300_averaged.shape[0], -1)
        print("SHAPE of X_p300_averaged_flat:", X_p300_averaged_flat.shape)
        # Concatenate the features of the P300, alpha and c-VEP
        X_combined = np.concatenate([X_p300_averaged_flat, X_alpha, X_cvep], axis=1)

        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            # Assert dat alle y gelijk zijn
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        subject_accuracies.append(fold_accuracies)
        subject_std.append(np.std(fold_accuracies))  # compute std over folds
        print(f"Average accuracy over all folds: {average_accuracy}\n STD: {np.std(fold_accuracies):.2f}")

grand_average_accuracy = np.mean(subject_accuracies)
print(f"\nGrand average accuracy across all subjects: {grand_average_accuracy:.4f}")

# Convert list to array
subject_accuracies_array = np.array(subject_accuracies)
subject_std_array = np.array(subject_std)

# Save the results
save_path = os.path.join(save_dir, 'cvep_p300_alpha_hybrid_accuracy_results.npz')
np.savez(save_path,
         accuracy=subject_accuracies_array,
         std=subject_std_array,
         subjects=subjects,
         tasks=tasks,
         n_folds=n_folds,
         method='hybrid')

print(f"\nSaved results to: {save_path}")




Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 62, 6)
SHAPE OF left_target_averaged_trials: (80, 62, 6)
SHAPE OF X_p300_averaged: (80, 62, 12)
SHAPE of X_p300_averaged_flat: (80, 744)
  Fold 1/4
Fold 1 accuracy: 0.75
  Fold 2/4
Fold 2 accuracy: 0.8
  Fold 3/4
Fold 3 accuracy: 0.6
  Fold 4/4
Fold 4 accuracy: 0.85
Average accuracy over all folds: 0.75
 STD: 0.09
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 61, 6)
SHAPE OF left_target_averaged_trials: (80, 61, 6)
SHAPE OF X_p300_averaged: (80, 61, 12)
SHAPE of X_p300_averaged_flat: (80, 732)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
 STD: 0.00
Loading features for VPpdic, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)


Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 0.975
 STD: 0.04
Loading features for VPpdit, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 64, 6)
SHAPE OF left_target_averaged_trials: (80, 64, 6)
SHAPE OF X_p300_averaged: (80, 64, 12)
SHAPE of X_p300_averaged_flat: (80, 768)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
 STD: 0.00
Loading features for VPpdiu, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 63, 6)
SHAPE OF left_target_averaged_trials: (80, 63, 6)
SHAPE OF X_p300_averaged: (80, 63, 12)
SHAPE of X_p300_averaged_flat: (80, 756)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 0.85
  Fold 4/4
Fold 4 

In [14]:
print(subject_accuracies)

[0.75, 1.0]


In [23]:
# -- c-VEP AND ALPHA

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored
save_dir = '/Users/juliette/Desktop/thesis/results/hybrid_simple/alpha+c-vep'


# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]
subject_accuracies = []
subject_std = []


# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Concatenate the features of the P300, alpha and c-VEP
        X_combined = np.concatenate([X_alpha, X_cvep], axis=1)

        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        subject_accuracies.append(fold_accuracies)
        subject_std.append(np.std(fold_accuracies))  # compute std over folds
        print(f"Average accuracy over all folds: {average_accuracy}\n STD: {np.std(fold_accuracies):.2f}")

grand_average_accuracy = np.mean(subject_accuracies)
print(f"\nGrand average accuracy across all subjects: {grand_average_accuracy:.4f}")

# Convert list to array
subject_accuracies_array = np.array(subject_accuracies)
subject_std_array = np.array(subject_std)

# Save the results
save_path = os.path.join(save_dir, 'cvep_alpha_hybrid_accuracy_results.npz')
np.savez(save_path,
         accuracy=subject_accuracies_array,
         std=subject_std_array,
         subjects=subjects,
         tasks=tasks,
         n_folds=n_folds,
         method='hybrid')

print(f"\nSaved results to: {save_path}")




Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
  Fold 1/4
Fold 1 accuracy: 0.6
  Fold 2/4
Fold 2 accuracy: 0.7
  Fold 3/4
Fold 3 accuracy: 0.55
  Fold 4/4
Fold 4 accuracy: 0.5
Average accuracy over all folds: 0.5874999999999999
 STD: 0.07
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
 STD: 0.00
Loading features for VPpdic, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 0.85
  Fold 4/4
Fold 4 accuracy: 0.95
Average accuracy over all folds: 0.95
 STD: 0.06
Loading features for VPpdid, task=covert
Loaded c-VEP features: (80, 2)
Loaded alpha features: (80, 4)
  Fold 1/4
Fold 1 accuracy: 0.9
  Fold 2/

In [52]:
# -- ALPHA AND P300

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored
save_dir = '/Users/juliette/Desktop/thesis/results/hybrid_simple/alpha+p300'


# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]
subject_accuracies = []
subject_std = []

# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load alpha features
        alpha_file_path = os.path.join(alpha_features_dir, f"sub-{subject}_task-{task}_alpha_features.npz")
        if os.path.exists(alpha_file_path):
            alpha_data = np.load(alpha_file_path)
            X_alpha = alpha_data['features']  # Features for alpha task (trials x features)
            y_alpha = alpha_data['labels']  # Labels for alpha task
            print(f"Loaded alpha features: {X_alpha.shape}")
        else:
            print(f"Warning: alpha features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

        # Flatten z_p300 to (trials * epochs, sides)
        z_p300_flat = z_p300.reshape(-1, z_p300.shape[2])  # Shape: (trials * epochs, sides)

        # Find the number of epochs per trial
        epochs_per_trial = X_p300.shape[1]  # Number of epochs per trial

        # Initialize lists to store averaged epochs for each trial
        left_target_averaged_trials = []
        right_target_averaged_trials = []

        # Loop over each trial
        for i_trial in range(X_p300.shape[0]):
            # Extract the epochs for the current trial (shape: epochs x channels x features)
            trial_epochs = X_p300[i_trial]

            # Extract the target labels for the current trial (shape: epochs x 2)
            trial_targets = z_p300_flat[i_trial * epochs_per_trial: (i_trial + 1) * epochs_per_trial]

            # Average epochs for left target (assuming 1 = left target)
            left_target_epochs = trial_epochs[trial_targets[:, 0] == 1]
            if len(left_target_epochs) > 0:
                left_target_averaged = np.mean(left_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                left_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no left target, set to zeros

            # Average epochs for right target (assuming 1 = right target)
            right_target_epochs = trial_epochs[trial_targets[:, 1] == 1]
            if len(right_target_epochs) > 0:
                right_target_averaged = np.mean(right_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                right_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no right target, set to zeros

            # Store the averaged epochs for the current trial
            left_target_averaged_trials.append(left_target_averaged)
            right_target_averaged_trials.append(right_target_averaged)

        # Convert lists to numpy arrays
        left_target_averaged_trials = np.array(left_target_averaged_trials)
        right_target_averaged_trials = np.array(right_target_averaged_trials)

        # Concatenate the averaged features from both groups (left and right)
        X_p300_averaged = np.concatenate([left_target_averaged_trials, right_target_averaged_trials], axis=2)

        # Flatten the last two dimensions (channels and features)
        X_p300_averaged_flat = X_p300_averaged.reshape(X_p300_averaged.shape[0], -1)
        
        # Concatenate the features of the P300, alpha and c-VEP
        X_combined = np.concatenate([X_p300_averaged_flat, X_alpha], axis=1)

        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_p300[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_p300[folds == i_fold]

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        subject_accuracies.append(fold_accuracies)
        subject_std.append(np.std(fold_accuracies))  # compute std over folds
        print(f"Average accuracy over all folds: {average_accuracy}\n STD: {np.std(fold_accuracies):.2f}")

grand_average_accuracy = np.mean(subject_accuracies)
print(f"\nGrand average accuracy across all subjects: {grand_average_accuracy:.4f}")

# Convert list to array
subject_accuracies_array = np.array(subject_accuracies)
subject_std_array = np.array(subject_std)

# Save the results
save_path = os.path.join(save_dir, 'p300_alpha_hybrid_accuracy_results.npz')
np.savez(save_path,
         accuracy=subject_accuracies_array,
         std=subject_std_array,
         subjects=subjects,
         tasks=tasks,
         n_folds=n_folds,
         method='hybrid')

print(f"\nSaved results to: {save_path}")


Loading features for VPpdia, task=covert
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 62, 6)
  Fold 1/4
Fold 1 accuracy: 0.55
  Fold 2/4
Fold 2 accuracy: 0.7
  Fold 3/4
Fold 3 accuracy: 0.6
  Fold 4/4
Fold 4 accuracy: 0.5
Average accuracy over all folds: 0.5875
 STD: 0.07
Loading features for VPpdib, task=covert
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 61, 6)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 1.0
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 1.0
 STD: 0.00
Loading features for VPpdic, task=covert
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 63, 6)
  Fold 1/4
Fold 1 accuracy: 1.0
  Fold 2/4
Fold 2 accuracy: 1.0
  Fold 3/4
Fold 3 accuracy: 0.85
  Fold 4/4
Fold 4 accuracy: 1.0
Average accuracy over all folds: 0.9625
 STD: 0.06
Loading features for VPpdid, task=covert
Loaded alpha features: (80, 4)
Loaded p300 features: (80, 80, 63, 6)
  Fold 1/4
Fold 1 accura

Fold 1 accuracy: 0.7
  Fold 2/4
Fold 2 accuracy: 0.9
  Fold 3/4
Fold 3 accuracy: 0.9
  Fold 4/4
Fold 4 accuracy: 0.7
Average accuracy over all folds: 0.8
 STD: 0.10

Grand average accuracy across all subjects: 0.9220

Saved results to: /Users/juliette/Desktop/thesis/results/hybrid_simple/alpha+p300/p300_alpha_hybrid_accuracy_results.npz


In [53]:
# -- c-VEP AND P300

# Paths
cvep_features_dir = '/Users/juliette/Desktop/thesis/features/c-VEP'  # where c-VEP features are stored
alpha_features_dir = '/Users/juliette/Desktop/thesis/features/alpha'  # where alpha features are stored
p300_features_dir = '/Users/juliette/Desktop/thesis/preprocessing/features/with_ICA'  # where P300 features are stored
save_dir = '/Users/juliette/Desktop/thesis/results/hybrid_simple/p300+cvep'
subject_std = []

# Subject and task lists
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["covert"]
subject_accuracies = []

# Loop over subjects and tasks
for subject in subjects:
    for task in tasks:
        print(f"Loading features for {subject}, task={task}")

        # Load c-VEP features
        cvep_file_path = os.path.join(cvep_features_dir, f"sub-{subject}_task-{task}_features.npz")
        if os.path.exists(cvep_file_path):
            cvep_data = np.load(cvep_file_path)
            X_cvep = cvep_data['features']  # Features for c-VEP task (trials x features)
            y_cvep = cvep_data['y']  # Labels for c-VEP task
            print(f"Loaded c-VEP features: {X_cvep.shape}")
        else:
            print(f"Warning: c-VEP features file not found for {subject}, task={task}")

        # Load P300 features
        file_path_p300 = os.path.join(p300_features_dir, f"sub-{subject}", f"sub-{subject}_task-{task}_p300_features_ICA.npz")
        if os.path.exists(file_path_p300):
            p300_features = np.load(file_path_p300)

            X_p300 = p300_features['X']  # Shape: trials x epochs x channels x features
            y_p300 = p300_features['y']  # Labels indicating cued side: trials
            z_p300 = p300_features['z']  # Left and right targets: trials x epochs x sides
            fs_p300 = p300_features['fs']
            print(f"Loaded p300 features: {X_p300.shape}")

        # Flatten z_p300 to (trials * epochs, sides)
        z_p300_flat = z_p300.reshape(-1, z_p300.shape[2])  # Shape: (trials * epochs, sides)

        # Find the number of epochs per trial
        epochs_per_trial = X_p300.shape[1]  # Number of epochs per trial

        # Initialize lists to store averaged epochs for each trial
        left_target_averaged_trials = []
        right_target_averaged_trials = []

        # Loop over each trial
        for i_trial in range(X_p300.shape[0]):
            # Extract the epochs for the current trial (shape: epochs x channels x features)
            trial_epochs = X_p300[i_trial]

            # Extract the target labels for the current trial (shape: epochs x 2)
            trial_targets = z_p300_flat[i_trial * epochs_per_trial: (i_trial + 1) * epochs_per_trial]

            # Average epochs for left target
            left_target_epochs = trial_epochs[trial_targets[:, 0] == 1]
            if len(left_target_epochs) > 0:
                left_target_averaged = np.mean(left_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                left_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no left target, set to zeros

            # Average epochs for right target
            right_target_epochs = trial_epochs[trial_targets[:, 1] == 1]
            if len(right_target_epochs) > 0:
                right_target_averaged = np.mean(right_target_epochs, axis=0)  # Average across epochs (axis=0)
            else:
                right_target_averaged = np.zeros(trial_epochs.shape[1:])  # If no right target, set to zeros

            # Store the averaged epochs for the current trial
            left_target_averaged_trials.append(left_target_averaged)
            right_target_averaged_trials.append(right_target_averaged)

        # Convert lists to numpy arrays
        left_target_averaged_trials = np.array(left_target_averaged_trials)
        right_target_averaged_trials = np.array(right_target_averaged_trials)

        # Concatenate the averaged features from both groups (left and right)
        X_p300_averaged = np.concatenate([left_target_averaged_trials, right_target_averaged_trials], axis=2)

        
        # Flatten the last two dimensions (channels and features)
        X_p300_averaged_flat = X_p300_averaged.reshape(X_p300_averaged.shape[0], -1)
        
        # Concatenate the features of the P300, alpha and c-VEP
        X_combined = np.concatenate([X_p300_averaged_flat, X_alpha, X_cvep], axis=1)

        # Cross-validation setup
        fold_accuracies = []
        fold_roc_auc = []
        n_folds = 4
        n_trials = X_combined.shape[0] // n_folds
        folds = np.repeat(np.arange(n_folds), n_trials)

        for i_fold in range(n_folds):
            print(f"  Fold {i_fold + 1}/{n_folds}")

            # Split train and test data
            X_trn, y_trn = X_combined[folds != i_fold, :], y_cvep[folds != i_fold]
            X_tst, y_tst = X_combined[folds == i_fold, :], y_cvep[folds == i_fold]

            # LDA classifier
            lda = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=LedoitWolf())
            lda.fit(X_trn, y_trn)

            # Make predictions
            y_pred = lda.predict(X_tst)

            # Compute performance metrics
            accuracy = accuracy_score(y_tst, y_pred)
            print(f"Fold {i_fold+1} accuracy: {accuracy}")

            fold_accuracies.append(accuracy)

        # Calculate average accuracy over all folds
        average_accuracy = np.mean(fold_accuracies)
        subject_accuracies.append(fold_accuracies)
        subject_std.append(np.std(fold_accuracies))  # compute std over folds
        print(f"Average accuracy over all folds: {average_accuracy}\n STD: {np.std(fold_accuracies):.2f}")

grand_average_accuracy = np.mean(subject_accuracies)
print(f"\nGrand average accuracy across all subjects: {grand_average_accuracy:.4f}")

# Convert list to array
subject_accuracies_array = np.array(subject_accuracies)
subject_std_array = np.array(subject_std)

# Save the results
save_path = os.path.join(save_dir, 'p300_cvep_hybrid_accuracy_results.npz')
np.savez(save_path,
         accuracy=subject_accuracies_array,
         std=subject_std_array,
         subjects=subjects,
         tasks=tasks,
         n_folds=n_folds,
         method='hybrid')

print(f"\nSaved results to: {save_path}")


Loading features for VPpdia, task=covert
Loaded c-VEP features: (80, 2)
Loaded p300 features: (80, 80, 62, 6)
  Fold 1/4
Fold 1 accuracy: 0.55
  Fold 2/4
Fold 2 accuracy: 0.7
  Fold 3/4
Fold 3 accuracy: 0.6
  Fold 4/4
Fold 4 accuracy: 0.8
Average accuracy over all folds: 0.6625000000000001
 STD: 0.10
Loading features for VPpdib, task=covert
Loaded c-VEP features: (80, 2)
Loaded p300 features: (80, 80, 61, 6)
  Fold 1/4
Fold 1 accuracy: 0.85
  Fold 2/4
Fold 2 accuracy: 0.7
  Fold 3/4
Fold 3 accuracy: 0.8
  Fold 4/4
Fold 4 accuracy: 0.65
Average accuracy over all folds: 0.7499999999999999
 STD: 0.08
Loading features for VPpdic, task=covert
Loaded c-VEP features: (80, 2)
Loaded p300 features: (80, 80, 63, 6)
  Fold 1/4
Fold 1 accuracy: 0.6
  Fold 2/4
Fold 2 accuracy: 0.55
  Fold 3/4
Fold 3 accuracy: 0.7
  Fold 4/4
Fold 4 accuracy: 0.75
Average accuracy over all folds: 0.6499999999999999
 STD: 0.08
Loading features for VPpdid, task=covert
Loaded c-VEP features: (80, 2)
Loaded p300 features

Fold 4 accuracy: 0.7
Average accuracy over all folds: 0.7749999999999999
 STD: 0.06
Loading features for VPpdizc, task=covert
Loaded c-VEP features: (80, 2)
Loaded p300 features: (80, 80, 58, 6)
  Fold 1/4
Fold 1 accuracy: 0.85
  Fold 2/4
Fold 2 accuracy: 0.95
  Fold 3/4
Fold 3 accuracy: 0.95
  Fold 4/4
Fold 4 accuracy: 0.9
Average accuracy over all folds: 0.9125
 STD: 0.04

Grand average accuracy across all subjects: 0.8168

Saved results to: /Users/juliette/Desktop/thesis/results/hybrid_simple/p300+cvep/p300_cvep_hybrid_accuracy_results.npz


These averaged responses for the left and right conditions are appended to separate lists, allowing further analysis such as classification or visualization based on condition-specific averaged brain activity.


In [31]:
# Load the .npz file
results = np.load('/Users/juliette/Desktop/thesis/results/hybrid_simple/p300+cvep/p300_cvep_hybrid_accuracy_results.npz')

# Access the arrays inside the file
accuracy_array = results['accuracy']  # Shape: (n_subjects, n_folds)
std_array = results['std']            # Shape: (n_subjects,)
print(std_array)

[0.09601432 0.07905694 0.07905694 0.02165064 0.06123724 0.025
 0.07905694 0.06495191 0.08196798 0.075      0.12930101 0.07905694
 0.07905694 0.04330127 0.11388042 0.05448624 0.073951   0.05448624
 0.12930101 0.073951   0.09601432 0.06495191 0.05448624 0.08926786
 0.02165064 0.12990381 0.125      0.0559017  0.04145781]
