# Pairwise combination: alpha + P300

The following structures will be incorporated into the code:
1. PSD to extract the alpha and ERP features and shrinkage LDA (sLDA) for classification.
2. PSD to extract the alpha and ERP features, and block-Toeplitz LDA for classification.
3. PSD to extract the alpha and ERP features, and RDA for classification.
4. CSP to extract the alpha and ERP features, and sLDA for classification.
5. CSP to extract the alpha and ERP features, and block-Toeplitz LDA for classification.
6. CSP to extract the alpha and ERP features, and RDA for classification.

Start with the focus on 1 and 4. Then do 2 and 5, and finally 3 and 6.

### Alpha (PSD) + P300
Below is the working code with an average of 75% accuracy and a standard deviation of 0.03.

In [21]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
import mne

# Suppress all warnings
mne.set_log_level('warning')

# Directory containing the preprocessed data
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Define the alpha range for PSD calculation
min_bin = 8
max_bin = 12

# Initialize results storage
results = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

subjects = ["VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit"]
# Load Data for each subject
for subject in subjects:
    print(f"Processing {subject}...")

    try:
        # Load preprocessed data
        fn = os.path.join(file_dir, f"sub-{subject}_task-covert_c-VEP+P300_ICA.npz")
        tmp = np.load(fn)
        

        X = tmp["X"]  # EEG data matrix (trials, channels, samples)
        print(X.shape)
        
        print(X.shape)
        y = tmp["y"]  # Labels
        z = tmp["z"]  # Target presence (trials, epochs, sides)
        V = tmp["V"]  # One code cycle (classes, samples)
        fs = tmp["fs"].flatten()[0]

    except Exception as e:
        print(f"Error loading data for subject {subject}: {e}")
        continue

    # Cross-validation
    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    new_feature_vectors = []
    lda_1 = LDA(solver="svd")

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        # Reshape X to (trials, channels x samples) for LDA
        X_trn_reshaped = X_trn.reshape(X_trn.shape[0], -1)
        X_tst_reshaped = X_tst.reshape(X_tst.shape[0], -1)

        # --- P300 ---

        # Extract stimulus condition correctly
        print("z_tst.shape:", z_tst.shape)
        side_tst = z_tst[:, 2, :]
        z_left = side_tst[:, 0] == 1
        z_right = side_tst[:, 1] == 1

        # Train LDA (for extracting correlation of P300)
        print("Fitting LDA for P300.")
        lda_1.fit(X_trn_reshaped, y_trn)

        # Predict on test data
        print("Predicting P300!")
        y_hat = lda_1.predict(X_tst_reshaped)

        # Compute correlation, [0,1] makes sure to get the off-diagonal elements that represent the correlation
        rho_left = np.corrcoef(y_hat[z_left], y_tst[z_left])[0, 1]
        rho_right = np.corrcoef(y_hat[z_right], y_tst[z_right])[0, 1]

        print(f"    Correlation (Left Stimulus): {rho_left:.3f}")
        print(f"    Correlation (Right Stimulus): {rho_right:.3f}")

        # --- Alpha PSD ---

        # Extract alpha features. For all trials, average over frequency bin (8-12 Hz) per channel
        psd_features_trn = np.array([
            psd_array_multitaper(trial, sfreq=fs, fmin=min_bin, fmax=max_bin, bandwidth=1)[0].mean(axis=1)
            for trial in X_trn])

        psd_features_tst = np.array([
            psd_array_multitaper(trial, sfreq=fs, fmin=min_bin, fmax=max_bin, bandwidth=1)[0].mean(axis=1)
            for trial in X_tst])

        print("Shape of psd_features_trn:", psd_features_trn.shape)

        # --- Concatenation ---
        # Concatenate the P300 correlation and the alpha PSD features
        combined_features_trn = np.column_stack([rho_left * np.ones(X_trn.shape[0]),  # Left stimulus correlation
                                                 rho_right * np.ones(X_trn.shape[0]),  # Right stimulus correlation
                                                 psd_features_trn])  # Alpha PSD features

        combined_features_tst = np.column_stack([rho_left * np.ones(X_tst.shape[0]),  # Left stimulus correlation
                                                 rho_right * np.ones(X_tst.shape[0]),  # Right stimulus correlation
                                                 psd_features_tst])  # Alpha PSD features

        # Replace NaN values with 0
        combined_features_trn = np.nan_to_num(combined_features_trn, nan=0)
        combined_features_tst = np.nan_to_num(combined_features_tst, nan=0)

        print(f"Shape of combined_features_trn: {combined_features_trn.shape}")
        print(f"Shape of combined_features_tst: {combined_features_tst.shape}")

        # --- Ensemble ---
        # Train final LDA on new feature vectors (combined features)
        lda2 = LDA(solver="svd")
        lda2.fit(combined_features_trn, y_trn)

        # Final prediction
        y_final = lda2.predict(combined_features_tst)

        # Calculate accuracy
        accuracy = np.mean(y_final == y_tst)
        fold_accuracies.append(accuracy)

        print(f"Fold {i_fold + 1} Accuracy: {accuracy:.3f}")

    # Compute subject-level results
    accuracy = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, accuracy, se))

    # Print average accuracy per subject
    print(f"Average Accuracy for {subject}: {accuracy:.3f}")

# # Save results
# if not os.path.exists(decoding_results_dir):
#         os.makedirs(decoding_results_dir)
# results_save_path = join(decoding_results_dir, f"covert_alpha_p300_results_concat.npy")     
# np.save(results_save_path, results_array)    

# Convert results to a structured numpy array
results_array = np.array(
    results, dtype=[('subject', 'U10'), ('accuracy', 'f4'), ('standard_error', 'f4')]
)

# Overall results
overall_accuracy = np.round(results_array['accuracy'].mean(), 2)
overall_se = np.round(results_array['standard_error'].mean(), 2)
print(f"Overall LDA accuracy with PSD: {overall_accuracy:.2f} ± {overall_se:.2f}")

Processing VPpdik...
(80, 63, 2400)
(80, 63, 2400)
  Fold 1/4
z_tst.shape: (20, 80, 2)
Fitting LDA for P300.
Predicting P300!
    Correlation (Left Stimulus): -0.500
    Correlation (Right Stimulus): 0.500


KeyboardInterrupt: 

## Most similar to Egan

In [24]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
import mne

# Suppress warnings
mne.set_log_level('warning')

# Directory setup
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Parameters
min_bin, max_bin = 8, 12  # Alpha range
p300_window = (0.3, 0.5)  # P300 time window in seconds, after stimulus
baseline_window = (-0.25, 0)  # Baseline window
fs = 250  # Sampling rate, adjust if different
erp_window = (0, 0.75)  # ERP time window (0 to 0.75 seconds) in seconds
baseline_window = (-0.25, 0)  # Baseline period (0.25 s before target)
window_size = 2 # Size for the sliding window to extract alpha
step_size = 0.5

# Initialize results storage
results = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

# Main processing loop
for subject in subjects:
    print(f"Processing {subject}...")

    # Load preprocessed data
    fn = os.path.join(file_dir, f"sub-{subject}_task-covert_alpha+p300.npz")
    tmp = np.load(fn)

    X = tmp["X"]  # EEG data matrix (trials, channels, samples)
    y = tmp["y"]  # Labels (cued side)
    z = tmp["z"]  # Target presence (trials, epochs, sides)
    V = tmp["V"]  # One code cycle (classes, samples)
    fs = tmp["fs"].flatten()[0]
       
    # Calculates trial length in seconds
    trial_length = X.shape[2] / fs
    
    # Create time vector for P300 analysis
    trial_times = np.linspace(0, trial_length, X.shape[2])

    # Cross-validation
    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        
        # --- Alpha Feature Extraction ---
        
        # Get dataset shapes
        num_trials_trn, num_channels_trn, num_samples_trn = X_trn.shape
        num_trials_tst, num_channels_tst, num_samples_tst = X_tst.shape

        window_samples = int(window_size * fs)
        step_samples = int(step_size * fs)

        # Compute number of windows per trial
        num_windows_trn = (num_samples_trn - window_samples) // step_samples + 1
        num_windows_tst = (num_samples_tst - window_samples) // step_samples + 1

        # Initialize storage for alpha power
        alpha_power_trn = np.zeros((num_trials_trn, num_channels_trn, num_windows_trn))
        alpha_power_tst = np.zeros((num_trials_tst, num_channels_tst, num_windows_tst))

        # Process training data
        for trial in range(num_trials_trn):
            for i in range(num_windows_trn):
                
                # Start and end of the current window to extract this window
                start = i * step_samples
                end = start + window_samples
                window_data = X_trn[trial, :, start:end]  # (channels, window_samples)

                # Compute PSD using multitaper method for every window
                psd, freqs = psd_array_multitaper(window_data, sfreq=fs, fmin=min_bin, fmax=max_bin, bandwidth=1)

                # Average power in alpha band over the frequency range, store it with current trial, all channels and this window
                alpha_power_trn[trial, :, i] = psd.mean(axis=1)  # (channels,)

        # Average across windows for final feature extraction --> final alpha power feature per trial and channel
        alpha_features_trn = alpha_power_trn.mean(axis=2)  # Shape (trials, channels)

        # Process testing data
        for trial in range(num_trials_tst):
            for i in range(num_windows_tst):
                
                # Start and end of the current window to extract this window
                start = i * step_samples
                end = start + window_samples
                window_data = X_tst[trial, :, start:end]  # (channels, window_samples)

                # Compute PSD using multitaper method for every window
                psd, freqs = psd_array_multitaper(window_data, sfreq=fs, fmin=min_bin, fmax=max_bin, bandwidth=1)

                # Average power in alpha band over the frequency range, store it with current trial, all channels and this window
                alpha_power_tst[trial, :, i] = psd.mean(axis=1)  # (channels,)

        # Average across windows for final feature extraction --> final alpha power feature per trial and channel.
        alpha_features_tst = alpha_power_tst.mean(axis=2)  # Shape (trials, channels)

        
        # --- ERP Feature Extraction (P300) ---
        
        # Convert time windows to sample indices
        erp_samples = (int(erp_window[0] * fs), int(erp_window[1] * fs))
        baseline_samples = (int(baseline_window[0] * fs), int(baseline_window[1] * fs))
        print("erp_samples:", erp_samples)

        # Loop over test trials and assign attended and unattended trials based on cued_side
        attended_trials = []
        unattended_trials = []

        for t_idx in range(len(y_tst)):
            cued_side = y_tst[t_idx]  # Get cued side (0 or 1)

            # Create event vectors & ground truth for cued side
            left_targets = z_tst[t_idx][:, 0]  # Left targets
            right_targets = z_tst[t_idx][:, 1]  # Right targets

            # Attended trials are those where cued_side matches the target
            if cued_side == 0:  # If cued side is left
                attended_trials.append(left_targets)  # Target is left
                unattended_trials.append(right_targets)  # Non-target is right
            else:  # If cued side is right
                attended_trials.append(right_targets)  # Target is right
                unattended_trials.append(left_targets)  # Non-target is left

        # Compute ERP by averaging over target-locked epochs
        attended_erp = np.mean(X_tst[attended_trials, :, erp_samples[0]:erp_samples[1]], axis=1) # Only the attended trials, all channels, and ERP window
        unattended_erp = np.mean(X_tst[unattended_trials, :, erp_samples[0]:erp_samples[1]], axis=1)
        print("attended_erp:",attended_erp)    
        
        # Baseline correction: subtract mean of baseline period
        attended_erp -= np.mean(attended_erp[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
        unattended_erp -= np.mean(unattended_erp[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
        
        
        # Reshape ERP features to ensure they match the number of trials
        # For training set
        attended_erp_flat_trn = attended_erp.flatten()
        unattended_erp_flat_trn = unattended_erp.flatten()

        # For testing set
        attended_erp_flat_tst = attended_erp.flatten()
        unattended_erp_flat_tst = unattended_erp.flatten()

        # Ensure that the feature matrices are consistently sized (trials, features)
        attended_features_trn = np.tile(attended_erp_flat_trn, (X_trn.shape[0], 1))
        unattended_features_trn = np.tile(unattended_erp_flat_trn, (X_trn.shape[0], 1))

        attended_features_tst = np.tile(attended_erp_flat_tst, (X_tst.shape[0], 1))
        unattended_features_tst = np.tile(unattended_erp_flat_tst, (X_tst.shape[0], 1))

        # Concatenate ERP features (attended, unattended) and alpha features
        combined_features_trn = np.column_stack([attended_features_trn, unattended_features_trn, alpha_features_trn])
        combined_features_tst = np.column_stack([attended_features_tst, unattended_features_tst, alpha_features_tst])

        # Ensure no NaNs are present
        combined_features_trn = np.nan_to_num(combined_features_trn, nan=0)
        combined_features_tst = np.nan_to_num(combined_features_tst, nan=0)

        
        # --- Ensemble ---
        
        # Train LDA
        lda = LDA(solver="svd")
        lda.fit(combined_features_trn, y_trn)

        # Make predictions on the test set
        y_pred = lda.predict(combined_features_tst)

        # Calculate accuracy
        accuracy = np.mean(y_pred == y_tst)
        fold_accuracies.append(accuracy)
        print(f"Fold {i_fold + 1} Accuracy: {accuracy:.3f}")

    # Compute subject-level results
    accuracy = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, accuracy, se))
    print(f"Average Accuracy for {subject}: {accuracy:.3f}")

# Save and report results
results_array = np.array(
    results, dtype=[('subject', 'U10'), ('accuracy', 'f4'), ('standard_error', 'f4')]
)

overall_accuracy = np.round(results_array['accuracy'].mean(), 2)
overall_se = np.round(results_array['standard_error'].mean(), 2)
print(f"Overall Hybrid BCI Accuracy: {overall_accuracy:.2f} ± {overall_se:.2f}")

# Save results
if not os.path.exists(decoding_results_dir):
    os.makedirs(decoding_results_dir)
results_save_path = os.path.join(decoding_results_dir, "covert_hybrid_alpha_p300_egan_results.npy")     
np.save(results_save_path, results_array)


Processing VPpdia...
  Fold 1/4
erp_samples: (0, 90)
attended_erp: [[[ 3.9218653e-06  4.2053962e-06  4.0387440e-06 ...  4.1708104e-06
    2.2054160e-06  2.1079245e-06]
  [ 6.0769526e-06  4.5894117e-06  4.2751431e-06 ...  6.0202051e-06
    2.9386622e-06 -1.4478107e-07]
  [ 8.2181268e-06  7.9081819e-06  8.6731179e-06 ...  2.9184735e-06
    1.5519291e-07 -3.3862907e-06]
  ...
  [ 1.3906446e-06  3.2693933e-06  8.8202239e-07 ...  4.5312927e-06
    4.1484313e-06  4.0014415e-06]
  [-4.2869842e-06 -3.1689840e-06 -2.8112627e-06 ...  5.4742368e-06
    6.3795828e-06  6.5977010e-06]
  [ 1.6592248e-06  6.2399145e-06  3.5307785e-06 ...  8.4167741e-06
    6.6407802e-06  4.9112109e-06]]

 [[ 3.8277090e-06  4.1218982e-06  3.9311481e-06 ...  4.2021084e-06
    2.1673259e-06  2.0388147e-06]
  [ 6.1622750e-06  4.6331961e-06  4.2708389e-06 ...  6.0472776e-06
    2.8698973e-06 -2.7882385e-07]
  [ 8.4122858e-06  8.0509681e-06  8.7668577e-06 ...  2.8534996e-06
   -4.7664345e-10 -3.5943783e-06]
  ...
  [ 1.2198

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4
erp_samples: (0, 90)
attended_erp: [[[-1.6965621e-06 -6.7491155e-06 -1.0080703e-05 ...  1.2815756e-06
   -5.7821410e-07  6.8893549e-08]
  [-3.2843054e-06 -5.6061567e-06 -6.9012472e-06 ...  3.2993116e-06
    1.3089486e-06  2.5899176e-06]
  [-3.1750897e-06 -5.0578969e-06 -4.5438337e-06 ...  4.9471083e-07
   -1.3067681e-06  4.0531580e-07]
  ...
  [ 2.9199182e-06  1.0470972e-06 -1.8531028e-07 ... -7.3012661e-06
   -7.6920478e-06 -7.8729990e-06]
  [ 2.0602615e-06 -2.4828779e-07 -5.4900759e-08 ... -4.9756486e-06
   -5.0534477e-06 -3.2249204e-06]
  [ 4.3082769e-06  3.6849603e-06  2.6390835e-06 ... -1.1182621e-05
   -1.0480317e-05 -1.0223211e-05]]

 [[-1.6965625e-06 -6.7491155e-06 -1.0080703e-05 ...  1.2815756e-06
   -5.7821416e-07  6.8893577e-08]
  [-3.2843050e-06 -5.6061567e-06 -6.9012481e-06 ...  3.2993116e-06
    1.3089488e-06  2.5899176e-06]
  [-3.1750901e-06 -5.0578969e-06 -4.5438342e-06 ...  4.9471072e-07
   -1.3067680e-06  4.0531580e-07]
  ...
  [ 2.91

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4
erp_samples: (0, 90)
attended_erp: [[[ 1.94234344e-05  2.37893328e-05  1.33808135e-05 ... -1.43253828e-05
   -1.75093301e-05 -1.13399865e-05]
  [ 4.42669176e-07  5.79992943e-08  2.21346863e-06 ... -4.73630280e-06
   -4.72193960e-06 -3.96746782e-06]
  [-3.13614066e-07 -4.68031021e-06  6.30917043e-07 ... -4.29707916e-06
   -4.96522716e-06 -5.56514397e-06]
  ...
  [ 1.54077020e-06 -1.46033483e-06 -7.08538346e-06 ...  1.03560706e-06
    4.33236073e-06  4.40745407e-06]
  [ 4.92417757e-06  3.01258433e-06 -2.21010328e-06 ...  1.35307391e-06
    4.21539062e-06  4.06210711e-06]
  [-4.15042723e-06 -9.38164612e-06 -1.49736934e-05 ...  2.84481507e-06
    4.80112158e-06  3.70736848e-06]]

 [[ 1.91168947e-05  2.34468971e-05  1.31862589e-05 ... -1.41271103e-05
   -1.72775635e-05 -1.11452991e-05]
  [ 3.10143520e-07 -2.67750231e-08  2.14405259e-06 ... -4.65486119e-06
   -4.65049197e-06 -3.91046888e-06]
  [-4.19351039e-07 -4.70102486e-06  5.74305318e-07 ... -4.19292064e

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.500
  Fold 4/4


KeyboardInterrupt: 

## Implement Welch's PSD
The first cell is what I initially came up with, the second cell is more similar to Egan's paper. These two cells implement Welch's PSD instead of anothet multitaper method.

In [81]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
import mne

from scipy.signal import welch
# Suppress all warnings
mne.set_log_level('warning')

# Directory containing the preprocessed data
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Define the alpha range for PSD calculation
min_bin = 8
max_bin = 12

# Initialize results storage
results = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

# Load Data for each subject
for subject in subjects:
    print(f"Processing {subject}...")

    try:
        # Load preprocessed data
        fn = os.path.join(file_dir, f"sub-{subject}_task-covert_alpha+p300.npz")
        tmp = np.load(fn)

        X = tmp["X"]  # EEG data matrix (trials, channels, samples)
        y = tmp["y"]  # Labels
        z = tmp["z"]  # Target presence (trials, epochs, sides)
        V = tmp["V"]  # One code cycle (classes, samples)
        fs = tmp["fs"].flatten()[0]

    except Exception as e:
        print(f"Error loading data for subject {subject}: {e}")
        continue

    # Cross-validation
    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    new_feature_vectors = []
    lda_1 = LDA(solver="svd")

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        # Reshape X to (trials, channels x samples) for LDA
        X_trn_reshaped = X_trn.reshape(X_trn.shape[0], -1)
        X_tst_reshaped = X_tst.reshape(X_tst.shape[0], -1)

        # --- P300 ---

        # Extract stimulus condition correctly
        side_tst = z_tst[:, 2, :]
        z_left = side_tst[:, 0] == 1
        z_right = side_tst[:, 1] == 1

        # Train LDA (for extracting correlation of P300)
        lda_1.fit(X_trn_reshaped, y_trn)

        # Predict on test data
        y_hat = lda_1.predict(X_tst_reshaped)

        # Compute correlation, [0,1] makes sure to get the off-diagonal elements that represent the correlation
        rho_left = np.corrcoef(y_hat[z_left], y_tst[z_left])[0, 1]
        rho_right = np.corrcoef(y_hat[z_right], y_tst[z_right])[0, 1]
        # --- Alpha PSD ---

        nyquist_freq = fs //2
        
        # Compute PSD using Welch's method
        psd_features_trn = np.array([
            welch(trial, fs=fs, nperseg=nyquist_freq, scaling='density')[1] # Set number of data points in each segment to the Nyquist frequency
            [:, (min_bin <= freqs) & (freqs <= max_bin)].mean(axis=1) # Selects only the frequencies between min_bin and max_bin and averages over all channels
            for trial, freqs in [(X_trn[i], welch(X_trn[i][0], fs=fs, nperseg=fs//2)[0]) 
            for i in range(X_trn.shape[0])] # For each trial, pair its EEG data with frequency bins computed from the first channel's Welch PSD) to prepare for bandpower analysis
        ])
        
        print("psd_features_trn:", psd_features_trn)
        print("psd_features_trn.shape:", psd_features_trn.shape)
        psd_features_tst = np.array([
            welch(trial, fs=fs, nperseg=nyquist_freq, scaling='density')[1] # Set number of data points in each segment to the Nyquist frequency
            [:, (min_bin <= freqs) & (freqs <= max_bin)].mean(axis=1) # Selects only the frequencies between min_bin and max_bin and averages over all channels
            for trial, freqs in [(X_tst[i], welch(X_tst[i][0], fs=fs, nperseg=fs//2)[0])
            for i in range(X_tst.shape[0])] # For each trial, pair its EEG data with frequency bins computed from the first channel's Welch PSD) to prepare for bandpower analysis
        ])


        # --- Concatenation ---
        # Concatenate the P300 correlation and the alpha PSD features
        combined_features_trn = np.column_stack([rho_left * np.ones(X_trn.shape[0]),  # Left stimulus correlation
                                                 rho_right * np.ones(X_trn.shape[0]),  # Right stimulus correlation
                                                 psd_features_trn])  # Alpha PSD features

        combined_features_tst = np.column_stack([rho_left * np.ones(X_tst.shape[0]),  # Left stimulus correlation
                                                 rho_right * np.ones(X_tst.shape[0]),  # Right stimulus correlation
                                                 psd_features_tst])  # Alpha PSD features

        # Replace NaN values with 0
        combined_features_trn = np.nan_to_num(combined_features_trn, nan=0)
        combined_features_tst = np.nan_to_num(combined_features_tst, nan=0)

        print(f"Shape of combined_features_trn: {combined_features_trn.shape}")
        print(f"Shape of combined_features_tst: {combined_features_tst.shape}")

        # --- Ensemble ---
        # Train final LDA on new feature vectors (combined features)
        lda2 = LDA(solver="svd")
        lda2.fit(combined_features_trn, y_trn)

        # Final prediction
        y_final = lda2.predict(combined_features_tst)

        # Calculate accuracy
        accuracy = np.mean(y_final == y_tst)
        fold_accuracies.append(accuracy)

        print(f"Fold {i_fold + 1} Accuracy: {accuracy:.3f}")

    # Compute subject-level results
    accuracy = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, accuracy, se))

    # Print average accuracy per subject
    print(f"Average Accuracy for {subject}: {accuracy:.3f}")

# # Save results
# if not os.path.exists(decoding_results_dir):
#         os.makedirs(decoding_results_dir)
# results_save_path = join(decoding_results_dir, f"covert_alpha_p300_results_concat.npy")     
# np.save(results_save_path, results_array)    

# Convert results to a structured numpy array
results_array = np.array(
    results, dtype=[('subject', 'U10'), ('accuracy', 'f4'), ('standard_error', 'f4')]
)

# Overall results
overall_accuracy = np.round(results_array['accuracy'].mean(), 2)
overall_se = np.round(results_array['standard_error'].mean(), 2)
print(f"Overall LDA accuracy with PSD: {overall_accuracy:.2f} ± {overall_se:.2f}")

Processing VPpdia...
  Fold 1/4


  c /= stddev[:, None]
  c /= stddev[None, :]


psd_features_trn: [[1.5200778e-12 1.5078374e-12 1.6363573e-12 ... 6.6661222e-13
  4.7474909e-13 1.0875865e-12]
 [1.3305238e-12 1.3502245e-12 1.4642459e-12 ... 1.3430532e-12
  6.9845432e-13 2.1823869e-12]
 [1.3662575e-12 1.7124673e-12 1.9843777e-12 ... 1.0917072e-12
  7.2049967e-13 1.4873388e-12]
 ...
 [2.2817956e-12 1.8661704e-12 1.8002434e-12 ... 8.8336386e-13
  4.9529868e-13 1.8749767e-12]
 [1.6871048e-12 1.6333869e-12 1.8336556e-12 ... 1.6168470e-12
  1.1001625e-12 2.9306880e-12]
 [1.8652794e-12 1.8726925e-12 2.0177473e-12 ... 8.4892493e-13
  6.1278809e-13 1.3958179e-12]]
psd_features_trn.shape: (60, 28)
Shape of combined_features_trn: (60, 30)
Shape of combined_features_tst: (20, 30)
Fold 1 Accuracy: 0.700
  Fold 2/4


  c /= stddev[:, None]
  c /= stddev[None, :]


psd_features_trn: [[1.2020385e-12 1.2736791e-12 1.4772843e-12 ... 8.4635277e-13
  5.3293736e-13 1.2218689e-12]
 [1.8516237e-12 1.9922104e-12 2.1090627e-12 ... 1.5057360e-12
  1.0001363e-12 2.0644474e-12]
 [1.4402949e-12 1.4874179e-12 1.6348157e-12 ... 1.4110116e-12
  9.7150142e-13 1.7438970e-12]
 ...
 [2.2817956e-12 1.8661704e-12 1.8002434e-12 ... 8.8336386e-13
  4.9529868e-13 1.8749767e-12]
 [1.6871048e-12 1.6333869e-12 1.8336556e-12 ... 1.6168470e-12
  1.1001625e-12 2.9306880e-12]
 [1.8652794e-12 1.8726925e-12 2.0177473e-12 ... 8.4892493e-13
  6.1278809e-13 1.3958179e-12]]
psd_features_trn.shape: (60, 28)
Shape of combined_features_trn: (60, 30)
Shape of combined_features_tst: (20, 30)
Fold 2 Accuracy: 0.600
  Fold 3/4
psd_features_trn: [[1.2020385e-12 1.2736791e-12 1.4772843e-12 ... 8.4635277e-13
  5.3293736e-13 1.2218689e-12]
 [1.8516237e-12 1.9922104e-12 2.1090627e-12 ... 1.5057360e-12
  1.0001363e-12 2.0644474e-12]
 [1.4402949e-12 1.4874179e-12 1.6348157e-12 ... 1.4110116e-12
  9

  c /= stddev[:, None]
  c /= stddev[None, :]


psd_features_trn: [[1.2020385e-12 1.2736791e-12 1.4772843e-12 ... 8.4635277e-13
  5.3293736e-13 1.2218689e-12]
 [1.8516237e-12 1.9922104e-12 2.1090627e-12 ... 1.5057360e-12
  1.0001363e-12 2.0644474e-12]
 [1.4402949e-12 1.4874179e-12 1.6348157e-12 ... 1.4110116e-12
  9.7150142e-13 1.7438970e-12]
 ...
 [2.0523197e-12 2.1100524e-12 2.3106848e-12 ... 8.0133063e-13
  6.1696508e-13 1.2585398e-12]
 [1.6849586e-12 1.8340050e-12 2.0245071e-12 ... 7.5332736e-13
  6.3192594e-13 1.1941779e-12]
 [1.9134538e-12 2.1424368e-12 2.3507947e-12 ... 7.2386677e-13
  4.2587136e-13 1.1848161e-12]]
psd_features_trn.shape: (60, 28)
Shape of combined_features_trn: (60, 30)
Shape of combined_features_tst: (20, 30)
Fold 4 Accuracy: 0.800
Average Accuracy for VPpdia: 0.680
Processing VPpdib...
  Fold 1/4


  c /= stddev[:, None]
  c /= stddev[None, :]


psd_features_trn: [[8.47471659e-12 8.50534573e-12 1.07885497e-11 ... 2.16964966e-12
  2.89518735e-12 6.20265297e-12]
 [1.21018794e-11 1.21333429e-11 1.45627278e-11 ... 3.64520176e-12
  4.28087990e-12 6.76829773e-12]
 [1.18954326e-11 1.21184130e-11 1.43433113e-11 ... 3.50758723e-12
  3.67971669e-12 9.59165646e-12]
 ...
 [1.60618827e-11 1.75737220e-11 1.98604744e-11 ... 6.79969579e-12
  6.45497197e-12 1.35854149e-11]
 [1.77654332e-11 1.78825166e-11 2.08521620e-11 ... 5.49745492e-12
  5.76982082e-12 1.24206817e-11]
 [1.92606157e-11 1.96062749e-11 2.31391833e-11 ... 5.08803590e-12
  6.86808900e-12 1.05352385e-11]]
psd_features_trn.shape: (60, 26)
Shape of combined_features_trn: (60, 28)
Shape of combined_features_tst: (20, 28)
Fold 1 Accuracy: 0.950
  Fold 2/4


  c /= stddev[:, None]
  c /= stddev[None, :]


psd_features_trn: [[6.82129180e-12 7.07239649e-12 8.43316042e-12 ... 2.52211555e-12
  2.68847553e-12 6.10302477e-12]
 [1.01811016e-11 9.38213136e-12 1.12533984e-11 ... 2.97523312e-12
  3.18333489e-12 5.85760390e-12]
 [1.40588713e-11 1.49193088e-11 1.74146825e-11 ... 3.54086704e-12
  4.27588824e-12 9.86704902e-12]
 ...
 [1.60618827e-11 1.75737220e-11 1.98604744e-11 ... 6.79969579e-12
  6.45497197e-12 1.35854149e-11]
 [1.77654332e-11 1.78825166e-11 2.08521620e-11 ... 5.49745492e-12
  5.76982082e-12 1.24206817e-11]
 [1.92606157e-11 1.96062749e-11 2.31391833e-11 ... 5.08803590e-12
  6.86808900e-12 1.05352385e-11]]
psd_features_trn.shape: (60, 26)
Shape of combined_features_trn: (60, 28)
Shape of combined_features_tst: (20, 28)
Fold 2 Accuracy: 0.800
  Fold 3/4
psd_features_trn: [[6.82129180e-12 7.07239649e-12 8.43316042e-12 ... 2.52211555e-12
  2.68847553e-12 6.10302477e-12]
 [1.01811016e-11 9.38213136e-12 1.12533984e-11 ... 2.97523312e-12
  3.18333489e-12 5.85760390e-12]
 [1.40588713e-11 

KeyboardInterrupt: 

In [4]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
import mne

# Suppress warnings
mne.set_log_level('warning')

# Directory setup
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Parameters
min_bin, max_bin = 8, 12  # Alpha range
p300_window = (0.3, 0.5)  # P300 time window in seconds, after stimulus
baseline_window = (-0.25, 0)  # Baseline window
fs = 250  # Sampling rate, adjust if different
erp_window = (0, 0.75)  # ERP time window (0 to 0.75 seconds) in seconds
baseline_window = (-0.25, 0)  # Baseline period (0.25 s before target)
window_size = 2 # Size for the sliding window to extract alpha
step_size = 0.5

# Initialize results storage
results = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

# Main processing loop
for subject in subjects:
    print(f"Processing {subject}...")

    # Load preprocessed data
    fn = os.path.join(file_dir, f"sub-{subject}_task-covert_alpha+p300.npz")
    tmp = np.load(fn)

    X = tmp["X"]  # EEG data matrix (trials, channels, samples)
    y = tmp["y"]  # Labels (cued side)
    z = tmp["z"]  # Target presence (trials, epochs, sides)
    V = tmp["V"]  # One code cycle (classes, samples)
    fs = tmp["fs"].flatten()[0]
       
    # Calculates trial length in seconds
    trial_length = X.shape[2] / fs
    
    # Create time vector for P300 analysis
    trial_times = np.linspace(0, trial_length, X.shape[2])

    # Cross-validation
    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        
       # --- Alpha Feature Extraction using Welch's method ---

        # Get dataset shapes
        num_trials_trn, num_channels_trn, num_samples_trn = X_trn.shape
        num_trials_tst, num_channels_tst, num_samples_tst = X_tst.shape

        window_samples = int(window_size * fs)
        step_samples = int(step_size * fs)

        # Compute number of windows per trial
        num_windows_trn = (num_samples_trn - window_samples) // step_samples + 1
        num_windows_tst = (num_samples_tst - window_samples) // step_samples + 1

        # Initialize storage for alpha power
        alpha_power_trn = np.zeros((num_trials_trn, num_channels_trn, num_windows_trn))
        alpha_power_tst = np.zeros((num_trials_tst, num_channels_tst, num_windows_tst))

        # Training data
        for trial in range(num_trials_trn):
            for i in range(num_windows_trn):
                start = i * step_samples
                end = start + window_samples
                window_data = X_trn[trial, :, start:end]  # (channels, samples)

                for ch in range(num_channels_trn):
                    freqs, psd = welch(window_data[ch], fs=fs, nperseg=window_samples, scaling='density')
                    # Select alpha band and average power
                    alpha_mask = (freqs >= min_bin) & (freqs <= max_bin)
                    alpha_power_trn[trial, ch, i] = psd[alpha_mask].mean()

        alpha_features_trn = alpha_power_trn.mean(axis=2)  # (trials, channels)

        # Testing data
        for trial in range(num_trials_tst):
            for i in range(num_windows_tst):
                start = i * step_samples
                end = start + window_samples
                window_data = X_tst[trial, :, start:end]  # (channels, samples)

                for ch in range(num_channels_tst):
                    freqs, psd = welch(window_data[ch], fs=fs, nperseg=window_samples, scaling='density')
                    # Select alpha band and average power
                    alpha_mask = (freqs >= min_bin) & (freqs <= max_bin)
                    alpha_power_tst[trial, ch, i] = psd[alpha_mask].mean()

        alpha_features_tst = alpha_power_tst.mean(axis=2)  # (trials, channels)


        
        # --- ERP Feature Extraction (P300) ---
        
        # Convert time windows to sample indices
        erp_samples = (int(erp_window[0] * fs), int(erp_window[1] * fs))
        baseline_samples = (int(baseline_window[0] * fs), int(baseline_window[1] * fs))

        # Loop over test trials and assign attended and unattended trials based on cued_side
        attended_trials = []
        unattended_trials = []

        for t_idx in range(len(y_tst)):
            cued_side = y_tst[t_idx]  # Get cued side (0 or 1)

            # Create event vectors & ground truth for cued side
            left_targets = z_tst[t_idx][:, 0]  # Left targets
            right_targets = z_tst[t_idx][:, 1]  # Right targets

            # Attended trials are those where cued_side matches the target
            if cued_side == 0:  # If cued side is left
                attended_trials.append(left_targets)  # Target is left
                unattended_trials.append(right_targets)  # Non-target is right
            else:  # If cued side is right
                attended_trials.append(right_targets)  # Target is right
                unattended_trials.append(left_targets)  # Non-target is left

        # Compute ERP by averaging over target-locked epochs
        attended_erp = np.mean(X_tst[attended_trials, :, erp_samples[0]:erp_samples[1]], axis=1) # Only the attended trials, all channels, and ERP window
        unattended_erp = np.mean(X_tst[unattended_trials, :, erp_samples[0]:erp_samples[1]], axis=1)
                
        # Baseline correction: subtract mean of baseline period
        attended_erp -= np.mean(attended_erp[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
        unattended_erp -= np.mean(unattended_erp[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
        
        # Reshape ERP features to ensure they match the number of trials
        # For training set
        attended_erp_flat_trn = attended_erp.flatten()
        unattended_erp_flat_trn = unattended_erp.flatten()

        # For testing set
        attended_erp_flat_tst = attended_erp.flatten()
        unattended_erp_flat_tst = unattended_erp.flatten()

        # Ensure that the feature matrices are consistently sized (trials, features)
        attended_features_trn = np.tile(attended_erp_flat_trn, (X_trn.shape[0], 1))
        unattended_features_trn = np.tile(unattended_erp_flat_trn, (X_trn.shape[0], 1))

        attended_features_tst = np.tile(attended_erp_flat_tst, (X_tst.shape[0], 1))
        unattended_features_tst = np.tile(unattended_erp_flat_tst, (X_tst.shape[0], 1))

        # Concatenate ERP features (attended, unattended) and alpha features
        combined_features_trn = np.column_stack([attended_features_trn, unattended_features_trn, alpha_features_trn])
        combined_features_tst = np.column_stack([attended_features_tst, unattended_features_tst, alpha_features_tst])

        # Ensure no NaNs are present
        combined_features_trn = np.nan_to_num(combined_features_trn, nan=0)
        combined_features_tst = np.nan_to_num(combined_features_tst, nan=0)

        
        # --- Ensemble ---
        
        # Train LDA
        lda = LDA(solver="svd")
        lda.fit(combined_features_trn, y_trn)

        # Make predictions on the test set
        y_pred = lda.predict(combined_features_tst)

        # Calculate accuracy
        accuracy = np.mean(y_pred == y_tst)
        fold_accuracies.append(accuracy)
        print(f"Fold {i_fold + 1} Accuracy: {accuracy:.3f}")

    # Compute subject-level results
    accuracy = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, accuracy, se))
    print(f"Average Accuracy for {subject}: {accuracy:.3f}")

# Save and report results
results_array = np.array(
    results, dtype=[('subject', 'U10'), ('accuracy', 'f4'), ('standard_error', 'f4')]
)

overall_accuracy = np.round(results_array['accuracy'].mean(), 2)
overall_se = np.round(results_array['standard_error'].mean(), 2)
print(f"Overall Hybrid BCI Accuracy: {overall_accuracy:.2f} ± {overall_se:.2f}")

# Save results
if not os.path.exists(decoding_results_dir):
    os.makedirs(decoding_results_dir)
results_save_path = os.path.join(decoding_results_dir, "covert_hybrid_alpha_p300_egan_results.npy")     
np.save(results_save_path, results_array)


Processing VPpdia...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.750
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.450
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.700
Average Accuracy for VPpdia: 0.640
Processing VPpdib...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.900
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.750
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.750
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdib: 0.840
Processing VPpdic...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.600
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.550
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.750
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.750
Average Accuracy for VPpdic: 0.660
Processing VPpdid...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.400
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.550
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.550
Average Accuracy for VPpdid: 0.540
Processing VPpdie...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.700
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.700
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.650
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.750
Average Accuracy for VPpdie: 0.700
Processing VPpdif...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.900
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdif: 0.860
Processing VPpdig...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.700
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.700
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.900
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.850
Average Accuracy for VPpdig: 0.790
Processing VPpdih...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.900
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.900
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.850
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.850
Average Accuracy for VPpdih: 0.880
Processing VPpdii...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.800
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.800
Average Accuracy for VPpdii: 0.800
Processing VPpdij...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.450
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.600
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.800
Average Accuracy for VPpdij: 0.620
Processing VPpdik...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.700
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.700
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.600
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.550
Average Accuracy for VPpdik: 0.640
Processing VPpdil...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.850
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 1.000
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.950
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdil: 0.940
Processing VPpdim...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.850
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.850
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.850
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdim: 0.880
Processing VPpdin...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.750
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.700
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.750
Average Accuracy for VPpdin: 0.710
Processing VPpdio...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.650
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.850
Average Accuracy for VPpdio: 0.780
Processing VPpdip...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.650
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.500
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.700
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.600
Average Accuracy for VPpdip: 0.610
Processing VPpdiq...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.900
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.900
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.650
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.900
Average Accuracy for VPpdiq: 0.840
Processing VPpdir...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.850
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.750
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.700
Average Accuracy for VPpdir: 0.770
Processing VPpdis...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.850
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.900
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdis: 0.880
Processing VPpdit...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.900
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.950
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 1.000
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 1.000
Average Accuracy for VPpdit: 0.960
Processing VPpdiu...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.800
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.500
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.450
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.300
Average Accuracy for VPpdiu: 0.510
Processing VPpdiv...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.850
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.900
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.950
Average Accuracy for VPpdiv: 0.880
Processing VPpdiw...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.750
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.750
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.850
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.750
Average Accuracy for VPpdiw: 0.780
Processing VPpdix...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.900
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.850
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.800
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.550
Average Accuracy for VPpdix: 0.770
Processing VPpdiy...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.650
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.700
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.700
Average Accuracy for VPpdiy: 0.710
Processing VPpdiz...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.600
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.500
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.650
Average Accuracy for VPpdiz: 0.640
Processing VPpdiza...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.750
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.800
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.850
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.900
Average Accuracy for VPpdiza: 0.820
Processing VPpdizb...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.850
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.700
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.950
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.750
Average Accuracy for VPpdizb: 0.810
Processing VPpdizc...
  Fold 1/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 1 Accuracy: 0.650
  Fold 2/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 2 Accuracy: 0.650
  Fold 3/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 3 Accuracy: 0.850
  Fold 4/4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(


Fold 4 Accuracy: 0.550
Average Accuracy for VPpdizc: 0.680
Overall Hybrid BCI Accuracy: 0.76 ± 0.04


## Fixing LDA solver efficiency
Fixes the epoching

In [3]:
def extract_epochs(X, start_idx=120, end_idx=2520, step_size=30, 
                   start_sample_offset=-24, end_sample_offset=84, 
                   amplitude_threshold=40e-6):
    """
    Function to extract epochs from time-series data for ERP features, 
    baseline-correct each epoch, and identify bad epochs based on amplitude threshold.
    
    Parameters:
    - X: Input data array of shape (n_trials, n_channels, n_samples)
    - start_idx: The starting sample index for the first epoch (default=120)
    - end_idx: The last sample index where the final epoch starts (default=2520)
    - step_size: Step size in samples, corresponding to the sliding window (default=30)
    - start_sample_offset: The offset for the start of the time window (default=-24, corresponds to -200 ms)
    - end_sample_offset: The offset for the end of the time window (default=84, corresponds to 700 ms)
    - amplitude_threshold: Threshold for identifying bad epochs based on amplitude range (default=100)
    
    Returns:
    - output_matrix: A 4D array of extracted and baseline-corrected epochs of shape 
                     (n_trials, n_epochs, n_channels, window_size)
    - bad_epochs_idx: List of indices of bad epochs for each trial and channel 
                      where amplitude range exceeds the threshold.
    """
    # Check input dimensions
    if X.ndim != 3:
        raise ValueError(f"Input X must have 3 dimensions (n_trials, n_channels, n_samples), but got {X.ndim} dimensions.")
    
    n_trials, n_channels, n_samples = X.shape
    window_size = end_sample_offset + np.abs(start_sample_offset)  # 108 samples
    epoch_timestamps = np.arange(start_idx, end_idx, step_size)    # (80,)
    n_epochs = len(epoch_timestamps)
    
    # Initialize the output matrix for the epochs and a list for bad epoch indices
    output_matrix = np.zeros((n_trials, n_epochs, n_channels, window_size))
    bad_epochs_idx = []  # To store (trial, epoch, channel) indices of bad epochs
    
    # Loop over trials, channels, and epochs to extract and baseline-correct the windows
    for i_trial in range(n_trials):
        for i_channel in range(n_channels):
            data = X[i_trial, i_channel, :]

            for i_epoch, t in enumerate(epoch_timestamps):
                epoch_start_idx = t + start_sample_offset  # Start at t - 24 samples (-200 ms)
                epoch_end_idx = t + end_sample_offset      # End at t + 84 samples (700 ms)
                
                # Ensure the window stays within bounds
                if epoch_start_idx >= 0 and epoch_end_idx <= n_samples:
                    epoch_data = data[epoch_start_idx:epoch_end_idx]
                    
                    # Baseline correction
                    baseline_mean = np.mean(epoch_data[:25])
                    epoch_data = epoch_data - baseline_mean
                    
                    # Store the epoch in the output matrix
                    output_matrix[i_trial, i_epoch, i_channel, :] = epoch_data
                    
                    # Check amplitude range after baseline subtraction
                    min_amp, max_amp = np.min(epoch_data), np.max(epoch_data)
                    amplitude_range = max_amp - min_amp
                    
                    # Log bad epochs if amplitude range exceeds threshold
                    if amplitude_range > amplitude_threshold:
                        bad_epochs_idx.append((i_trial, i_epoch, i_channel))
    
    # Return the 4D output matrix and the indices of bad epochs
    return output_matrix, bad_epochs_idx

def mark_bad_epochs(X, z, bad_idx):
    """
    Marks bad epochs in both EEG data (X) and labels (z) by setting them to NaN (or another sentinel, ie -1).

    Parameters
    ----------
    X : ndarray
        4D array of shape (n_trials, n_epochs, n_channels, n_timepoints).
    z : ndarray
        3D array of shape (n_trials, n_epochs, label_dim).
    bad_idx : list of tuples
        List of (trial, epoch, channel) indices indicating bad epochs.

    Returns
    -------
    X_marked : ndarray
        Same shape as X, with bad epochs set to NaN (or a chosen sentinel).
    z_marked : ndarray
        Same shape as z, with bad epochs set to NaN (or a chosen sentinel).
    """
    # Convert list of (trial, epoch, channel) to a set of (trial, epoch) pairs
    bad_trial_epoch_pairs = set((trial, epoch) for trial, epoch, _ in bad_idx)

    # Make copies so we don't overwrite the original arrays
    X_marked = np.copy(X)
    z_marked = np.copy(z).astype(np.float64)

    # Mark each bad epoch in both X and z
    for trial_idx, epoch_idx in bad_trial_epoch_pairs:
        X_marked[trial_idx, epoch_idx, :, :] = np.nan 
        z_marked[trial_idx, epoch_idx, :]    = np.nan 

    return X_marked, z_marked

def balance_classes(X, y, ratio_0_to_1=1.0):
    
    """
    Sub-select X and y based on a specified ratio of 0s to 1s, keeping the original order.

    Parameters:
    X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
    y (numpy.ndarray): Label vector of shape (n_samples,).
    ratio_0_to_1 (float): The desired ratio of 0s to 1s in the balanced dataset.

    Returns:
    X_balanced, y_balanced: Sub-selected feature matrix and label vector.
    """
    # Step 1: Identify indices of 0s and 1s
    indices_0 = np.where(y == 0)[0]
    indices_1 = np.where(y == 1)[0]
    
    # Step 2: Calculate the number of samples to select for each class
    num_1s = len(indices_1)
    num_0s = min(len(indices_0), int(num_1s * ratio_0_to_1))
    
    # Step 3: Randomly sample the desired number of 0s and 1s
    selected_indices_0 = np.random.choice(indices_0, num_0s, replace=False)
    selected_indices_1 = np.random.choice(indices_1, num_1s, replace=False)
    
    # Step 4: Combine selected indices and sort to preserve original order
    balanced_indices = np.sort(np.concatenate([selected_indices_0, selected_indices_1]))
    
    # Step 5: Sub-select X and y based on the balanced indices
    X_balanced = X[balanced_indices]
    y_balanced = y[balanced_indices]
    
    return X_balanced, y_balanced

def filter_valid_epochs(X, y, z=None, return_mask=False):
    """
    Filters out epochs where either the features in X or the labels in y contain NaN values.
    Optionally, if a z array is provided, it is filtered similarly.
    
    Parameters:
        X (np.ndarray): A 2D numpy array with shape (n_epochs, n_features).
        y (np.ndarray): A 1D numpy array with shape (n_epochs,).
        z (np.ndarray, optional): An array that will be filtered using the same mask.
        return_mask (bool, optional): If True, the boolean mask used for filtering is returned.
    
    Returns:
        filtered_X (np.ndarray): X with only rows that have no NaN values.
        filtered_y (np.ndarray): y with only entries corresponding to valid epochs.
        filtered_z (np.ndarray or None): Filtered z array (if provided) or None.
        mask (np.ndarray, optional): The boolean mask of valid epochs; only returned if return_mask=True.
    """
    # Create a mask for valid labels and features
    valid_label_mask = ~np.isnan(y)
    valid_feature_mask = ~np.isnan(X).any(axis=1)
    combined_mask = valid_label_mask & valid_feature_mask

    # Apply the mask to X and y
    filtered_X = X[combined_mask]
    filtered_y = y[combined_mask]
    
    if z is not None:
        filtered_z = z[combined_mask]
    else:
        filtered_z = None

    if return_mask:
        return filtered_X, filtered_y, filtered_z, combined_mask
    else:
        return filtered_X, filtered_y, filtered_z
    
def extract_features_from_X(X_matrix, ToI = None):
    """
    Extracts the maximum amplitudes from specified time ranges for each trial, epoch, and channel in the input data.

    Parameters:
    - X_matrix: A 4D numpy array of shape (n_trials, n_epochs, n_channels, n_samples) representing the input data.
    - ToI: A list of tuples, where each tuple contains the start and end indices of a time range of interest.

    Returns:
    - feature_matrix: A 4D numpy array of shape (n_trials, n_epochs, n_channels, len(ToI)) containing the maximum
                      values from the specified time ranges for each trial, epoch, and channel.
    """
    # Extract the shape of the input matrix
    n_trials, n_epochs, n_channels, n_samples = X_matrix.shape 
    
    # Initialize the feature matrix to store maximum values for each time range
    feature_matrix = np.zeros((n_trials, n_epochs, n_channels, len(ToI)))

    # Loop over the time ranges (ToI) and extract the max value for each range
    for i_range, (start, end) in enumerate(ToI):
        # For each time range, find the maximum values along the last axis (time samples) in the specified range
        feature_matrix[ :, :, :, i_range] = np.mean((X_matrix[ :, :, :, start:end]), axis=-1)

    # Return the feature matrix
    return feature_matrix

# Function to apply baseline correction
def apply_baseline_correction(epochs, baseline_start_sample, baseline_end_sample):
    baseline = np.mean(epochs[:, :, baseline_start_sample:baseline_end_sample], axis=2, keepdims=True)
    return epochs - baseline  # Subtract baseline from all samples in the epoch

In [12]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
from mne.time_frequency import psd_array_welch

import mne

# Suppress warnings
mne.set_log_level('warning')

# Directory setup
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'


# Parameters
min_bin, max_bin = 8, 13  # Alpha range
p300_window = (0.3, 0.5)  # P300 time window in seconds, after stimulus
baseline_window = (-0.5, 0)
erp_window = (0, 0.75)  # ERP time window
window_size = 2  # Alpha window size in seconds
step_size = 0.5
rejection_threshold = 60e-6



# Initialize results storage
results = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

# Main processing loop
for subject in subjects:
    print(f"Processing {subject}...")

    fn = os.path.join(file_dir, f"sub-{subject}_task-covert_c-VEP+P300_ICA.npz")
    tmp = np.load(fn)

    X = tmp["X"]
    print("X:", X.shape)
    y = tmp["y"]
    z = tmp["z"]
    V = tmp["V"]
    fs = tmp["fs"].flatten()[0]

    trial_length = X.shape[2] / fs
    trial_times = np.linspace(0, trial_length, X.shape[2])

    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)
    lda = LDA()

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split into train and test set
        X_trn, y_trn, z_trn = X[folds != i_fold], y[folds != i_fold], z[folds != i_fold]
        X_tst, y_tst, z_tst = X[folds == i_fold], y[folds == i_fold], z[folds == i_fold]

        
        # --- Alpha Feature Extraction ---
        
        # Get dataset shapes
        num_trials_trn, num_channels_trn, num_samples_trn = X_trn.shape
        num_trials_tst, num_channels_tst, num_samples_tst = X_tst.shape

        # Define the window steps for the samples
        window_samples = int(window_size * fs) # How many samples there are in a 2 second sliding window
        step_samples = int(step_size * fs) # # Moves the window forward by 0.5 seconds

        # Compute number of windows per trial
        num_windows_trn = (num_samples_trn - window_samples) // step_samples + 1
        num_windows_tst = (num_samples_tst - window_samples) // step_samples + 1

        # Initialize storage for alpha band-pass power
        alpha_power_trn = np.zeros((num_trials_trn, num_channels_trn, num_windows_trn))
        alpha_power_tst = np.zeros((num_trials_tst, num_channels_tst, num_windows_tst))

        # Process training data
        # Loop over all trials
        for trial in range(num_trials_trn):
            for i in range(num_windows_trn):
                # Start and end of the current window to extract this window
                start = i * step_samples
                end = start + window_samples
                window_data = X_trn[trial, :, start:end]  # For this trial, all channels and selected time window. Shape: (channels, window_samples)
                
                # Compute PSD using multitaper method for every window
                psd, freqs = psd_array_welch(window_data, sfreq=fs, fmin=min_bin, fmax=max_bin, n_fft=window_samples, n_overlap=0, average='mean')

                # Average power in alpha band over the frequency range, store it with current trial, all channels and this window
                alpha_power_trn[trial, :, i] = psd.mean(axis=1)  # (channels,)

        # Average across windows for final feature extraction: final alpha power feature per trial and channel
        alpha_features_trn = alpha_power_trn.mean(axis=2)  # Shape (trials, channels)

        # Process testing data
        for trial in range(num_trials_tst):
            for i in range(num_windows_tst):
                
                # Start and end of the current window to extract this window
                start = i * step_samples
                end = start + window_samples
                window_data = X_tst[trial, :, start:end]  # (channels, window_samples)

                # Compute PSD using multitaper method for every window
                psd, freqs = psd_array_welch(window_data, sfreq=fs, fmin=min_bin, fmax=max_bin, n_fft=window_samples, n_overlap=0, average='mean')

                # Average power in alpha band over the frequency range, store it with current trial, all channels and this window
                alpha_power_tst[trial, :, i] = psd.mean(axis=1)  # (channels,)

        # Average across windows for final feature extraction --> final alpha power feature per trial and channel.
        alpha_features_tst = alpha_power_tst.mean(axis=2)  # Shape: (trials, channels)
        
        print("Shape of alpha_features_trn:", alpha_features_trn.shape)
        print("Shape of alpha_features_tst:", alpha_features_tst.shape)


        # --- P300 ---
        
        # - Obtaining target-locked epochs - 
        # Extract epochs
        epochs_trn, bad_epochs_trn = extract_epochs(X_trn, amplitude_threshold=rejection_threshold)
        epochs_tst, bad_epochs_tst = extract_epochs(X_tst, amplitude_threshold=rejection_threshold)
        print("Shape of epochs_trn:", epochs_trn.shape)
        print("Shape of epochs_tst:", epochs_tst.shape)

        # Mark bad epochs in both EEG data and labels
        X_clean_trn, z_clean_trn = mark_bad_epochs(epochs_trn, z_trn, bad_epochs_trn)
        X_clean_tst, z_clean_tst = mark_bad_epochs(epochs_tst, z_tst, bad_epochs_tst)
        print("Shape of X_clean_trn:", X_clean_trn.shape)
        print("Shape of X_clean_tst:", X_clean_tst.shape)
        
        # Extract features from valid epochs. Shape: (trials, epochs, channels, samples)
        ToI = [(30, 38), (38, 48), (48, 57), (57, 69), (69, 87), (87, 108)]
        features_trn = extract_features_from_X(X_clean_trn, ToI)
        features_tst = extract_features_from_X(X_clean_tst, ToI)
        print("Shape of features_trn:", features_trn.shape)
        print("Shape of features_tst:", features_tst.shape)
        
        # - Decoding -

        # Separate attended and unattended trials for training and testing sets
        attended_trials_trn = []
        unattended_trials_trn = []
        attended_trials_tst = []
        unattended_trials_tst = []
        
        # Flatten training trials into epochs: shape becomes [n_trials * epochs, channel * features]       
        X_trn_epochs = features_trn.reshape(-1, features_trn.shape[2] * features_trn.shape[3])
        X_tst_epochs = features_tst.reshape(-1, features_tst.shape[2] * features_tst.shape[3])

        # Now filter the epochs with NaN values and bad epochs
        X_filtered_trn, y_filtered_trn, z_filtered_trn, combined_mask_trn = filter_valid_epochs(X_trn_epochs, y_trn, z_clean_trn)
        X_filtered_tst, y_filtered_tst, z_filtered_tst, combined_mask_tst = filter_valid_epochs(X_tst_epochs, y_tst, z_clean_tst)

        # Extract labels for training epochs using z and y 
        trial_indices_trn = np.arange(len(y_trn))  # indices for trials
        y_trn_epochs = z_trn[trial_indices_trn, :, y_trn].reshape(-1) # Shape becomes 
        
        print("Shape of y_trn_epochs:", y_trn_epochs.shape)

        # Testing trials
        for t_idx in range(len(y_tst)):
            cued_side = y_tst[t_idx]  # Get cued side (0 or 1)

            # Create event vectors & ground truth for cued side
            left_targets = z_tst[t_idx][:, 0]  # Left targets
            right_targets = z_tst[t_idx][:, 1]  # Right targets

            # Attended trials are those where cued_side matches the target
            if cued_side == 0:  # If cued side is left
                attended_trials_tst.append(left_targets)  # Target is left
                unattended_trials_tst.append(right_targets)  # Non-target is right
            else:  # If cued side is right
                attended_trials_tst.append(right_targets)  # Target is right
                unattended_trials_tst.append(left_targets)  # Non-target is left

        # Average target-locked epochs for attended and unattended targets (training set)
        avg_attended_epochs_trn = np.mean(attended_trials_trn, axis=0)
        avg_unattended_epochs_trn = np.mean(unattended_trials_trn, axis=0)

        # Average target-locked epochs for attended and unattended targets (test set)
        avg_attended_epochs_tst = np.mean(attended_trials_tst, axis=0)
        avg_unattended_epochs_tst = np.mean(unattended_trials_tst, axis=0)

#         # Apply baseline correction to the averaged epochs (after averaging trials)
#         baseline_samples = (int(baseline_window[0] * fs), int(baseline_window[1] * fs))

#         # Ensure avg_attended_epochs_trn is 2D before applying baseline correction
#         if avg_attended_epochs_trn.ndim == 1:
#             avg_attended_epochs_trn = avg_attended_epochs_trn.reshape(1, -1)  # Reshape to 2D if it's 1D

#         if avg_unattended_epochs_trn.ndim == 1:
#             avg_unattended_epochs_trn = avg_unattended_epochs_trn.reshape(1, -1)  # Reshape to 2D if it's 1D

#         # Apply baseline correction
#         avg_attended_epochs_trn -= np.mean(avg_attended_epochs_trn[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
#         avg_unattended_epochs_trn -= np.mean(avg_unattended_epochs_trn[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)

#         # Apply the same to the test set
#         if avg_attended_epochs_tst.ndim == 1:
#             avg_attended_epochs_tst = avg_attended_epochs_tst.reshape(1, -1)  # Reshape to 2D if it's 1D

#         if avg_unattended_epochs_tst.ndim == 1:
#             avg_unattended_epochs_tst = avg_unattended_epochs_tst.reshape(1, -1)  # Reshape to 2D if it's 1D

#         # Apply baseline correction
#         avg_attended_epochs_tst -= np.mean(avg_attended_epochs_tst[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)
#         avg_unattended_epochs_tst -= np.mean(avg_unattended_epochs_tst[:, baseline_samples[0]:baseline_samples[1]], axis=1, keepdims=True)


        # --- Combine Decoding ---
        # Concatenate alpha and P300 features for training and testing
        print("avg_attended_epochs_trn.shape", avg_attended_epochs_trn.shape)
        print("avg_unattended_epochs_trn.shape", avg_unattended_epochs_trn.shape)
        print("alpha_features_trn.shape", alpha_features_trn.shape)
        features_trn = features_trn.reshape(features_trn.shape[0], -1)
        features_tst = features_tst.reshape(features_tst.shape[0], -1)

        # Flatten the ERP features if they are 3D (trials, channels, features)
        if features_trn.ndim == 3:
            features_trn = features_trn.reshape(features_trn.shape[0], -1)
            features_tst = features_tst.reshape(features_tst.shape[0], -1)

        # Combine alpha and ERP features
        X_train_combined = np.concatenate((alpha_features_trn, features_trn), axis=1)
        X_test_combined = np.concatenate((alpha_features_tst, features_tst), axis=1)

        # Use the cued side labels as target labels (0 = left, 1 = right)

        # Remove trials with NaN values in alpha or P300 features
        # Ensure no NaNs are present
        combined_features_trn = np.nan_to_num(X_train_combined, nan=0)
        combined_features_tst = np.nan_to_num(X_test_combined, nan=0)
        
        # --- Train LDA classifier ---
        lda.fit(combined_features_trn, y_trn)

        # --- Testing ---
        y_pred = lda.predict(combined_features_tst)
        
        # Compute accuracy for this fold
        accuracy = np.mean(y_pred == y_tst)
        fold_accuracies.append(accuracy)
        print(f"    Fold accuracy: {accuracy:.2f}")
        
    # Save subject-level results
    acc = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, acc, se))
    print(f"{subject}: Accuracy = {acc:.2f}, SE = {se:.2f}")

# Save all results
results = np.array(results, dtype=object)
os.makedirs(decoding_results_dir, exist_ok=True)
np.save(os.path.join(decoding_results_dir, "hybrid_accuracy.npy"), results)
print("All done!")


Processing VPpdia...
X: (80, 62, 2400)
  Fold 1/4
Shape of alpha_features_trn: (60, 62)
Shape of alpha_features_tst: (20, 62)
Shape of epochs_trn: (60, 80, 62, 108)
Shape of epochs_tst: (20, 80, 62, 108)
Shape of X_clean_trn: (60, 80, 62, 108)
Shape of X_clean_tst: (20, 80, 62, 108)
Shape of features_trn: (60, 80, 62, 6)
Shape of features_tst: (20, 80, 62, 6)


ValueError: operands could not be broadcast together with shapes (60,) (4800,) 

# Steps from Egan's paper

1. Preprocessing
The paper also proposes preprocessing steps, however I follow the preprocessing steps that we already did.

2. Epoching
Create trial-length epochs for alpha, these span the entire trial (20 seconds), they start 0.5 seconds before the trial until 0.5 seconds after the trial. For the P300 aalysis, create target-locked epochs, starting 0.25 seconds before the target and ending 1 second after the target.

3. Frequency Analysis for Alpha Band
Perform frequency analysis using a 2-second rectangular sliding window across each trial-length epoch in steps of 0.5 seconds.

For each window, apply a Fast Fourier Transform (FFT) to the data from each electrode.

Extract the average amplitude between 8–13 Hz (alpha band) for each step. Average the features within a selected set of electrodes to obtain the  alpha values that will be used for classification.

4. ERP Feature Extraction for P300
Separate target-locked epochs for attended and unattended stimuli. Average the target-locked epochs across occurrences of attended and unattended targets. Baseline correct the ERP by subtracting the average of the 0.5 seconds preceding the appearance of the target from the remaining data (0–0.75 s).

5. Feature Fusion
Combine the frequency domain features from the alpha-band analysis with the ERP features from the P300 analysis.

Ensure that the features are concatenated in a way that is suitable for classification, typically forming a combined feature vector for each trial.

6. Classification
Train an LDA classifier using the hybrid feature set (alpha-band + P300 ERP features), possibly with ledoit wolf regularization.

Use cross-validation to evaluate classification performance and avoid overfitting.

7. Evaluation
Evaluate the classification performance using appropriate metrics such as accuracy, precision, recall, F1 score, or ROC curves.

In [9]:
import numpy as np
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from mne.time_frequency import psd_array_multitaper
import warnings
import mne
from sklearn.covariance import LedoitWolf
from scipy.signal import welch
from sklearn.metrics import precision_recall_curve, auc
from scipy.stats import pearsonr



# Suppress all warnings
mne.set_log_level('warning')

# Directory containing the preprocessed data
file_dir = '/Users/juliette/Desktop/thesis/preprocessing/hybrid_preprocessing'
decoding_results_dir = '/Users/juliette/Desktop/thesis/results/alpha+p300'

# Define the alpha range for PSD calculation
min_bin = 8
max_bin = 12
rejection_threshold = 60e-6
discard_threshold = 20


# Initialize results storage
results = []
fold_pr_auc = []
fold_correct_trials = []

# List of subjects
subjects = ["VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij",
            "VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit",
            "VPpdiu", "VPpdiv", "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"]

subjects = ["VPpdik", "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit"]
# Load Data for each subject
for subject in subjects:
    print(f"Processing {subject}...")
    # Load preprocessed data
    fn = os.path.join(file_dir, f"sub-{subject}_task-covert_c-VEP+P300_ICA.npz")
    tmp = np.load(fn)

    X = tmp["X"]  # EEG data matrix (trials, channels, samples)
    y = tmp["y"]  # Labels
    z = tmp["z"]  # Target presence (trials, epochs, sides)
    V = tmp["V"]  # One code cycle (classes, samples)
    fs = tmp["fs"].flatten()[0]
    nyquist_freq = fs // 2

    # Cross-validation
    fold_accuracies = []
    n_folds = 4
    n_trials = X.shape[0] // n_folds
    folds = np.repeat(np.arange(n_folds), n_trials)

    new_feature_vectors = []

    for i_fold in range(n_folds):
        print(f"  Fold {i_fold + 1}/{n_folds}")

        # Split train and test data
        X_trn, y_trn, z_trn = X[folds != i_fold, :, :], y[folds != i_fold], z[folds != i_fold, :, :]
        X_tst, y_tst, z_tst = X[folds == i_fold, :, :], y[folds == i_fold], z[folds == i_fold, :, :]

        
        # --- P300 ---
        # - Obtaining target-locked epochs - 
        
        # Extract epochs
        epochs_trn, bad_epochs_trn = extract_epochs(X_trn, amplitude_threshold=rejection_threshold)
        epochs_tst, bad_epochs_tst = extract_epochs(X_tst, amplitude_threshold=rejection_threshold)
        print("Shape of epochs_trn:", epochs_trn.shape)
        print("Shape of epochs_tst:", epochs_tst.shape)

        # Mark bad epochs in both EEG data and labels
        X_clean_trn, z_clean_trn = mark_bad_epochs(epochs_trn, z_trn, bad_epochs_trn)
        X_clean_tst, z_clean_tst = mark_bad_epochs(epochs_tst, z_tst, bad_epochs_tst)
        print("Shape of X_clean_trn:", X_clean_trn.shape)
        print("Shape of X_clean_tst:", X_clean_tst.shape)
        
        # Extract features from valid epochs. Shape: (trials, epochs, channels, samples)
        ToI = [(30, 38), (38, 48), (48, 57), (57, 69), (69, 87), (87, 108)]
        features_trn = extract_features_from_X(X_clean_trn, ToI)
        features_tst = extract_features_from_X(X_clean_tst, ToI)
        print("Shape of features_trn:", features_trn.shape)
        print("Shape of features_tst:", features_tst.shape)
        
        # - Training -
        # Flatten training trials into epochs: shape becomes (n_trials * epochs, channel * features)
        features_trn = features_trn.reshape(-1, features_trn.shape[2] * features_trn.shape[3])
        
        # Extract labels for training epochs using z and y 
        trial_indices_trn = np.arange(len(y_trn))  # indices for trials
        y_trn_epochs = z_trn[trial_indices_trn, :, y_trn].reshape(-1)
        
        X_trn_epochs = np.nan_to_num(features_trn, nan=0)
        y_trn_epochs = np.nan_to_num(y_trn_epochs, nan=0)
        
        print("Shape of X_trn_epochs (this is fed into LDA):", X_trn_epochs.shape)
        print("Shape of y_trn_epochs (this is fed into LDA):", y_trn_epochs.shape)
        
        # Fit LDA
        lda = LDA(solver="lsqr", covariance_estimator=LedoitWolf())
        lda.fit(X_trn_epochs, y_trn_epochs)  # Dimensionality bust be 2

        # - Testing -
        # Flatten testing trials into epochs
        X_tst_epochs = features_tst.reshape(-1, features_tst.shape[2] * features_tst.shape[3])
        
        # Extract labels for testing epochs (again, using z and y for indexing)
        trial_indices_tst = np.arange(len(y_tst))
        y_tst_epochs = z_tst[trial_indices_tst, :, y_tst].reshape(-1)
        
        # Reshape z_tst_trials into epochs.
        z_tst_epochs = z_tst.reshape(len(y_tst) * z_tst.shape[1], 2)
        
        # Filter testing epochs and also retrieve the original mask so we can count epochs per trial
        X_tst_epochs = np.nan_to_num(X_tst_epochs, nan=0)
        y_tst_epochs = np.nan_to_num(y_tst_epochs, nan=0)
        
        # Filter testing epochs and also retrieve the original mask so we can count epochs per trial
        X_tst_epochs, y_tst_epochs, z_tst_epochs, combined_mask_tst = filter_valid_epochs(
            X_tst_epochs, y_tst_epochs, z=z_tst_epochs, return_mask=True
        )
        
        # Calculate the number of preserved epochs per trial for testing.
        # Here, combined_mask_tst still has the original shape before filtering.
        # Reshape it to [n_trials, epochs_per_trial] and sum True values per trial.
        epoch_counts = combined_mask_tst.reshape(len(y_tst), -1)
        num_epochs = np.sum(epoch_counts, axis=1)
        
        # Rebuild trial structure for testing data based on num_epochs
        nested_X_tst_trials = []
        nested_z_tst_epochs = []
        start_idx = 0
        for trial_idx, n_ep in enumerate(num_epochs):
            end_idx = start_idx + n_ep
            nested_X_tst_trials.append(X_tst_epochs[start_idx:end_idx])
            nested_z_tst_epochs.append(z_tst_epochs[start_idx:end_idx])
            start_idx = end_idx
        

        # Evaluate model on test data
        correct_trials = 0
        
        for t_idx in range(len(y_tst)):

            num_preserved_epochs = num_epochs[t_idx]
            if num_preserved_epochs < discard_threshold:
                discarded_trial_counter +=1
                continue
            
            # Log cued side informed by y_tst
            cued_side = y_tst[t_idx]
            # create event vectors & ground truth
            left_targets = nested_z_tst_epochs[t_idx][:, 0]
            right_targets = nested_z_tst_epochs[t_idx][:, 1]
            cued_targets = nested_z_tst_epochs[t_idx] [:, cued_side]
   
            # Compute LDA scores for epochs
            epoch_scores = lda.decision_function(nested_X_tst_trials[t_idx])
            
            # Log performance per fold
            precision, recall, _ = precision_recall_curve(cued_targets, epoch_scores)
            pr_auc_score = auc(recall, precision)
            fold_pr_auc.append(pr_auc_score)

            # Correlation-based decision
            corr_left, _ = pearsonr(epoch_scores, left_targets)
            corr_right, _ = pearsonr(epoch_scores, right_targets)

            # Trial-level decision rule based on correlation
            decision = 0 if corr_left > corr_right else 1
            if decision == cued_side:
                correct_trials += 1


        # --- Alpha Extraction ---

        # Extract alpha features. For all trials, average over frequency bin (8-12 Hz) per channel using Welch's method
        psd_features_trn = np.array([
            welch(trial, fs=fs, nperseg=nyquist_freq, scaling='density')[1] # Set number of data points in each segment to the Nyquist frequency
            [:, (min_bin <= freqs) & (freqs <= max_bin)].mean(axis=1) # Selects only the frequencies between min_bin and max_bin and averages over all channels
            for trial, freqs in [(X_trn[i], welch(X_trn[i][0], fs=fs, nperseg=fs//2)[0]) 
            for i in range(X_trn.shape[0])] # For each trial, pair its EEG data with frequency bins computed from the first channel's Welch PSD) to prepare for bandpower analysis
        ])
        
        psd_features_tst = np.array([
            welch(trial, fs=fs, nperseg=nyquist_freq, scaling='density')[1] # Set number of data points in each segment to the Nyquist frequency
            [:, (min_bin <= freqs) & (freqs <= max_bin)].mean(axis=1) # Selects only the frequencies between min_bin and max_bin and averages over all channels
            for trial, freqs in [(X_tst[i], welch(X_tst[i][0], fs=fs, nperseg=fs//2)[0])
            for i in range(X_tst.shape[0])] # For each trial, pair its EEG data with frequency bins computed from the first channel's Welch PSD) to prepare for bandpower analysis
        ])


        # --- Concatenation ---
        # Concatenate the P300 correlation and the alpha PSD features
        combined_features_trn = np.column_stack([corr_left * np.ones(X_trn.shape[0]),  # Left stimulus correlation
                                                 corr_right * np.ones(X_trn.shape[0]),  # Right stimulus correlation
                                                 psd_features_trn])  # Alpha PSD features

        combined_features_tst = np.column_stack([corr_left * np.ones(X_tst.shape[0]),  # Left stimulus correlation
                                                 corr_right * np.ones(X_tst.shape[0]),  # Right stimulus correlation
                                                 psd_features_tst])  # Alpha PSD features

        # Replace NaN values with 0
        combined_features_trn = np.nan_to_num(combined_features_trn, nan=0)
        combined_features_tst = np.nan_to_num(combined_features_tst, nan=0)

        print(f"Shape of combined_features_trn: {combined_features_trn.shape}")
        print(f"Shape of combined_features_tst: {combined_features_tst.shape}")

        # --- Ensemble ---
        # Train final LDA on new feature vectors (combined features)
        lda_combine = LDA(solver="lsqr", covariance_estimator=LedoitWolf())
        lda_combine.fit(combined_features_trn, y_trn)

        # Final prediction
        y_final = lda_combine.predict(combined_features_tst)

        # Calculate accuracy
        accuracy = np.mean(y_final == y_tst)
        fold_accuracies.append(accuracy)

        print(f"Fold {i_fold + 1} Accuracy: {accuracy:.3f}")

    # Compute subject-level results
    accuracy = np.round(np.mean(fold_accuracies), 2)
    se = np.round(np.std(fold_accuracies) / np.sqrt(n_folds), 2)
    results.append((subject, accuracy, se))

    # Print average accuracy per subject
    print(f"Average Accuracy for {subject}: {accuracy:.3f}")

# # Save results
# if not os.path.exists(decoding_results_dir):
#         os.makedirs(decoding_results_dir)
# results_save_path = join(decoding_results_dir, f"covert_alpha_p300_results_concat.npy")     
# np.save(results_save_path, results_array)    

# Convert results to a structured numpy array
results_array = np.array(
    results, dtype=[('subject', 'U10'), ('accuracy', 'f4'), ('standard_error', 'f4')]
)

# Overall results
overall_accuracy = np.round(results_array['accuracy'].mean(), 2)
overall_se = np.round(results_array['standard_error'].mean(), 2)
print(f"Overall LDA accuracy with PSD: {overall_accuracy:.2f} ± {overall_se:.2f}")

Processing VPpdik...
  Fold 1/4
Shape of epochs_trn: (60, 80, 63, 108)
Shape of epochs_tst: (20, 80, 63, 108)
Shape of X_clean_trn: (60, 80, 63, 108)
Shape of X_clean_tst: (20, 80, 63, 108)
Shape of features_trn: (60, 80, 63, 6)
Shape of features_tst: (20, 80, 63, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 378)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combined_features_trn: (60, 65)
Shape of combined_features_tst: (20, 65)
Fold 1 Accuracy: 0.500
  Fold 2/4
Shape of epochs_trn: (60, 80, 63, 108)
Shape of epochs_tst: (20, 80, 63, 108)
Shape of X_clean_trn: (60, 80, 63, 108)
Shape of X_clean_tst: (20, 80, 63, 108)
Shape of features_trn: (60, 80, 63, 6)
Shape of features_tst: (20, 80, 63, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 378)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combined_features_trn: (60, 65)
Shape of combined_features_tst: (20, 65)
Fold 2 Accuracy: 0.650
  Fold 3/4
Shape of epochs_trn: (60, 80, 63, 108)
S

  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)


Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 2 Accuracy: 0.500
  Fold 3/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)


Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 3 Accuracy: 0.950
  Fold 4/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)


Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 4 Accuracy: 0.500
Average Accuracy for VPpdim: 0.660
Processing VPpdin...
  Fold 1/4
Shape of epochs_trn: (60, 80, 62, 108)
Shape of epochs_tst: (20, 80, 62, 108)
Shape of X_clean_trn: (60, 80, 62, 108)
Shape of X_clean_tst: (20, 80, 62, 108)
Shape of features_trn: (60, 80, 62, 6)
Shape of features_tst: (20, 80, 62, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 372)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combined_features_trn: (60, 64)
Shape of combined_features_tst: (20, 64)
Fold 1 Accuracy: 0.500
  Fold 2/4
Shape of epochs_trn: (60, 80, 62, 108)
Shape of epochs_tst: (20, 80, 62, 108)
Shape of X_clean_trn: (60, 80, 62, 108)
Shape of X_clean_tst: (20, 80, 62, 108)
Shape of features_trn: (60, 80, 62, 6)
Shape of features_tst: (20, 80, 62, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 372)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combine

  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 1 Accuracy: 0.950
  Fold 2/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 2 Accuracy: 0.950
  Fold 3/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 3 Accuracy: 0.800
  Fold 4/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 4 Accuracy: 0.750
Average Accuracy for VPpdio: 0.860
Processing VPpdip...
  Fold 1/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 1 Accuracy: 0.500
  Fold 2/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combine

  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)


Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 1 Accuracy: 0.800
  Fold 2/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 2 Accuracy: 0.850
  Fold 3/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 3 Accuracy: 0.750
  Fold 4/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)


  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_left, _ = pearsonr(epoch_scores, left_targets)
  corr_right, _ = pearsonr(epoch_scores, right_targets)
  corr_lef

Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 4 Accuracy: 0.850
Average Accuracy for VPpdir: 0.810
Processing VPpdis...
  Fold 1/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combined_features_trn: (60, 66)
Shape of combined_features_tst: (20, 66)
Fold 1 Accuracy: 0.500
  Fold 2/4
Shape of epochs_trn: (60, 80, 64, 108)
Shape of epochs_tst: (20, 80, 64, 108)
Shape of X_clean_trn: (60, 80, 64, 108)
Shape of X_clean_tst: (20, 80, 64, 108)
Shape of features_trn: (60, 80, 64, 6)
Shape of features_tst: (20, 80, 64, 6)
Shape of X_trn_epochs (this is fed into LDA): (4800, 384)
Shape of y_trn_epochs (this is fed into LDA): (4800,)
Shape of combine

In [20]:
side_tst = z_tst[:, 2, :]
z_left = side_tst[:, 0] == 1
z_right = side_tst[:, 1] == 1
print(z_right)
print(z_left)
print(side_tst)

[False False False False  True False False False  True False False False
 False False False False  True False False False]
[False False False False False False  True False False False False False
 False False  True False False False  True False]
[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [0 0]]
