## Decoding pipeline c-VEP
Below is the decoding pipeline used for c-VEP. The following pre-processing has already been done:
1. Bandpass filtering (1-40 Hz).
2. Epoching around stimulus events. The trials have a duration of 20 seconds, epoching is done 5 seconds before stimulus onset until 25 seconds after stimulus onset.
3. Resampling to 120 Hz. This reduces data size and speeds up processing without losing relevant frequency information

### Results
Only using rCCA and no ICA leads to an average of 99% for the overt paradigm and 60.5% for covert.

In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Jordy Thielen (jordy.thielen@donders.ru.nl)
"""

import os
import numpy as np
import joblib
import mne
import pandas as pd
import pyntbci

data_dir = '/Users/juliette/Desktop/thesis/preprocessing/c-VEP_preprocessing'
save_dir = '/Users/juliette/Desktop/thesis/results/c-VEP'
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]

tasks = ["overt", "covert"]

event = "dur"
onset_event = True
encoding_length = 0.3
ensemble = True
n_folds = 4

# Define performance arrays
accuracy = np.zeros((len(subjects), len(tasks), n_folds))
accuracy_se = np.zeros((len(subjects), len(tasks)))
accuracy_mean = np.zeros((len(subjects), len(tasks)))

# Loop participants
for i_subject, subject in enumerate(subjects):
    print(f"{subject}", end="\t")

    # Loop tasks
    for i_task, task in enumerate(tasks):
        print(f"{task}: ", end="")

        # Load data
        fn = os.path.join(data_dir, f"sub-{subject}_task-{task}.npz")
        tmp = np.load(fn)
        fs = int(tmp["fs"])
        X = tmp["X"]
        N = X.shape[0]  # number of trials
        D = X.shape[1] * X.shape[2]  # channels × time points
        ratio = N / D
        print(f"Samples (N): {N}, Features (D): {D}, Ratio: {ratio:.6f}")



        
        y = tmp["y"]
        print("Shape of y:", y.shape)
        V = tmp["V"]

        # Cross-validation
        folds = np.repeat(np.arange(n_folds), int(X.shape[0] / n_folds))
        for i_fold in range(n_folds):
            # Split data to train and test set
            X_trn, y_trn = X[folds != i_fold, :, :], y[folds != i_fold]
            X_tst, y_tst = X[folds == i_fold, :, :], y[folds == i_fold]
            print()

            # Train classifier
            rcca = pyntbci.classifiers.rCCA(stimulus=V, fs=fs, event=event, encoding_length=encoding_length,
                                            onset_event=onset_event, ensemble=ensemble)
            print("Shape of X_trn:", X_trn.shape)
            print("Shape of y_trn:", y_trn.shape)
            rcca.fit(X_trn, y_trn)

            # Apply classifier
            yh_tst = rcca.predict(X_tst)
            print("shape of X_tst:", X_tst.shape)

            # Compute accuracy
            accuracy[i_subject, i_task, i_fold] = np.mean(yh_tst == y_tst)
            
        # Compute mean and standard error
        fold_accuracies = accuracy[i_subject, i_task, :]
        accuracy_mean[i_subject, i_task] = fold_accuracies.mean()
        accuracy_se[i_subject, i_task] = np.round(fold_accuracies.std() / np.sqrt(n_folds), 2)
        
        print(f"{accuracy[i_subject, i_task, :].mean():.3f}", end="\t")
    print()

print(f"Average:\tovert: {accuracy[:, 0, :].mean():.3f}\tcovert: {accuracy[:, 1, :].mean():.3f}")

# np.savez(os.path.join(save_dir, "c-VEP_rcca.npz"), accuracy=accuracy, accuracy_mean=accuracy_mean, accuracy_se=accuracy_se)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


VPpdia	overt: Samples (N): 20, Features (D): 148800, Ratio: 0.000134
Shape of y: (20,)

Shape of X_trn: (15, 62, 2400)
Shape of y_trn: (15,)
shape of X_tst: (5, 62, 2400)

Shape of X_trn: (15, 62, 2400)
Shape of y_trn: (15,)
shape of X_tst: (5, 62, 2400)

Shape of X_trn: (15, 62, 2400)
Shape of y_trn: (15,)
shape of X_tst: (5, 62, 2400)

Shape of X_trn: (15, 62, 2400)
Shape of y_trn: (15,)
shape of X_tst: (5, 62, 2400)
1.000	covert: Samples (N): 80, Features (D): 148800, Ratio: 0.000538
Shape of y: (80,)

Shape of X_trn: (60, 62, 2400)
Shape of y_trn: (60,)
shape of X_tst: (20, 62, 2400)

Shape of X_trn: (60, 62, 2400)
Shape of y_trn: (60,)
shape of X_tst: (20, 62, 2400)

Shape of X_trn: (60, 62, 2400)
Shape of y_trn: (60,)
shape of X_tst: (20, 62, 2400)

Shape of X_trn: (60, 62, 2400)
Shape of y_trn: (60,)
shape of X_tst: (20, 62, 2400)
0.588	
VPpdib	overt: Samples (N): 20, Features (D): 146400, Ratio: 0.000137
Shape of y: (20,)

Shape of X_trn: (15, 61, 2400)
Shape of y_trn: (15,)
sh

KeyboardInterrupt: 

In [4]:
import numpy as np
import os

data_dir = '/Users/juliette/Desktop/thesis/preprocessing/c-VEP_preprocessing'
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]
tasks = ["overt", "covert"]

all_X = []
all_y = []

# Example if you have channel names per file
all_channel_names = []

for subject in subjects:
    for task in tasks:
        fn = os.path.join(data_dir, f"sub-{subject}_task-{task}.npz")
        if os.path.exists(fn):
            tmp = np.load(fn)
            channel_names = tmp["channels"]  # adjust if channel names stored under a different key
            all_channel_names.append(set(channel_names))

common_channels = set.intersection(*all_channel_names)
print(f"Number of common channels: {len(common_channels)}")

            
            

# Concatenate all data along trials axis (axis=0)
X_all = np.concatenate(all_X, axis=0)
y_all = np.concatenate(all_y, axis=0)

# Calculate total samples and features
n_samples = X_all.shape[0]  # total trials/samples
n_features = X_all.shape[1] * X_all.shape[2]  # channels × timepoints

ratio = n_samples / n_features

print(f"Total samples (trials): {n_samples}")
print(f"Total features (channels × timepoints): {n_features}")
print(f"Sample to feature ratio: {ratio:.6f}")


KeyError: 'channels is not a file in the archive'

# Applying ICA

In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Jordy Thielen (jordy.thielen@donders.ru.nl)
"""

import os
import numpy as np
import joblib
import mne
import pandas as pd
import pyntbci

data_dir = '/Users/juliette/Desktop/thesis/preprocessing/c-VEP_preprocessing/c-VEP_ICA'
save_dir = '/Users/juliette/Desktop/thesis/results/c-VEP/c-VEP_ICA'
subjects = [
    "VPpdia", "VPpdib", "VPpdic", "VPpdid", "VPpdie", "VPpdif", "VPpdig", "VPpdih", "VPpdii", "VPpdij", "VPpdik",
    "VPpdil", "VPpdim", "VPpdin", "VPpdio", "VPpdip", "VPpdiq", "VPpdir", "VPpdis", "VPpdit", "VPpdiu", "VPpdiv",
    "VPpdiw", "VPpdix", "VPpdiy", "VPpdiz", "VPpdiza", "VPpdizb", "VPpdizc"
]

tasks = ["overt", "covert"]

event = "dur"
onset_event = True
encoding_length = 0.3
ensemble = True
n_folds = 4

# Define performance arrays
accuracy = np.zeros((len(subjects), len(tasks), n_folds))
accuracy_se = np.zeros((len(subjects), len(tasks)))
accuracy_mean = np.zeros((len(subjects), len(tasks)))

# Loop participants
for i_subject, subject in enumerate(subjects):
    print(f"{subject}", end="\t")

    # Loop tasks
    for i_task, task in enumerate(tasks):
        print(f"{task}: ", end="")

        # Load data
        fn = os.path.join(data_dir, f"sub-{subject}_task-{task}_ICA.npz")
        tmp = np.load(fn)
        fs = int(tmp["fs"])
        X = tmp["X"]
        y = tmp["y"]
        V = tmp["V"]

        # Cross-validation
        folds = np.repeat(np.arange(n_folds), int(X.shape[0] / n_folds))
        for i_fold in range(n_folds):
            # Split data to train and test set
            X_trn, y_trn = X[folds != i_fold, :, :], y[folds != i_fold]
            X_tst, y_tst = X[folds == i_fold, :, :], y[folds == i_fold]

            # Train classifier
            rcca = pyntbci.classifiers.rCCA(stimulus=V, fs=fs, event=event, encoding_length=encoding_length,
                                            onset_event=onset_event, ensemble=ensemble)
            rcca.fit(X_trn, y_trn)

            # Apply classifier
            yh_tst = rcca.predict(X_tst)

            # Compute accuracy
            accuracy[i_subject, i_task, i_fold] = np.mean(yh_tst == y_tst)
            
        # Compute mean and standard error
        fold_accuracies = accuracy[i_subject, i_task, :]
        accuracy_mean[i_subject, i_task] = fold_accuracies.mean()
        accuracy_se[i_subject, i_task] = np.round(fold_accuracies.std() / np.sqrt(n_folds), 2)
        
        print(f"{accuracy[i_subject, i_task, :].mean():.3f}", end="\t")
    print()

print(f"Average:\tovert: {accuracy[:, 0, :].mean():.3f}\tcovert: {accuracy[:, 1, :].mean():.3f}")

np.savez(os.path.join(save_dir, "c-VEP_rcca_ICA.npz"), accuracy=accuracy, accuracy_mean=accuracy_mean, accuracy_se=accuracy_se)


VPpdia	overt: 1.000	covert: 0.562	
VPpdib	overt: 1.000	covert: 0.600	
VPpdic	overt: 1.000	covert: 0.588	
VPpdid	overt: 1.000	covert: 0.613	
VPpdie	overt: 1.000	covert: 0.637	
VPpdif	overt: 1.000	covert: 0.713	
VPpdig	overt: 1.000	covert: 0.725	
VPpdih	overt: 1.000	covert: 0.725	
VPpdii	overt: 1.000	covert: 0.688	
VPpdij	overt: 1.000	covert: 0.662	
VPpdik	overt: 1.000	covert: 0.725	
VPpdil	overt: 1.000	covert: 0.588	
VPpdim	overt: 1.000	covert: 0.600	
VPpdin	overt: 0.950	covert: 0.850	
VPpdio	overt: 1.000	covert: 0.675	
VPpdip	overt: 1.000	covert: 0.862	
VPpdiq	overt: 1.000	covert: 0.600	
VPpdir	overt: 1.000	covert: 0.800	
VPpdis	overt: 0.950	covert: 0.575	
VPpdit	overt: 1.000	covert: 0.775	
VPpdiu	overt: 1.000	covert: 0.700	
VPpdiv	overt: 1.000	covert: 0.800	
VPpdiw	overt: 0.950	covert: 0.550	
VPpdix	overt: 1.000	covert: 0.662	
VPpdiy	overt: 1.000	covert: 0.488	
VPpdiz	overt: 1.000	covert: 0.575	
VPpdiza	overt: 1.000	covert: 0.588	
VPpdizb	overt: 1.000	covert: 0.600	
VPpdizc	overt: 1.0