# BigP3BCI Demo
This notebook demonstrates a P300 classification pipeline using the BigP3BCI dataset.

## Setup
Load libraries and display versions.

In [None]:
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import mne
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix

from neurohub import (
    load_raw,
    bandpass,
    decimate,
    make_epochs,
    extract_features,
    lda_cv,
    plot_erp,
)

print("MNE", mne.__version__)

## Data location
Set the path to the dataset using the `NEURO_DATA_ROOT` environment variable (defaults to `~/neuro-data`).

In [None]:
root_env = os.environ.get("NEURO_DATA_ROOT")
if root_env:
    data_root = Path(root_env).expanduser()
else:
    repo_base = Path.cwd().parent / "data"
    data_root = (
        repo_base
        / "bigp3bci-an-open-diverse-and-machine-learning-ready-p300-based-brain-computer-interface-dataset-1.0.0"
    )
subject_dir = data_root / "bigP3BCI-data" / "StudyA" / "A_01" / "SE001"
train_path = subject_dir / "Train" / "CB" / "A_01_SE001_CB_Train01.edf"
test_path = subject_dir / "Test" / "CB" / "A_01_SE001_CB_Test06.edf"
print("Train file", train_path)
print("Test file", test_path)

## Load raw EEG
We read one calibration run and one test run from the dataset.

In [None]:
raw_train = load_raw(train_path)
raw_test = load_raw(test_path)
print(raw_train)

## Extract stimulus events
Events are stored in the `StimulusBegin` channel. The `StimulusType` channel encodes whether the flash contained the target (1) or not (0).

In [None]:
def annotate_events(raw):
    stim_begin = raw.get_data(picks=["StimulusBegin"])[0]
    stim_type = raw.get_data(picks=["StimulusType"])[0].astype(int)
    onsets = np.where(stim_begin > 0)[0] / raw.info["sfreq"]
    desc = np.where(stim_type[stim_begin > 0] > 0, "target", "non")
    raw.set_annotations(mne.Annotations(onsets, [0] * len(onsets), desc))
    drop = [
        "StimulusBegin",
        "StimulusType",
        "StimulusCode",
        "CurrentTarget",
        "FakeFeedback",
        "DisplayResults",
        "SelectedTarget",
        "SelectedRow",
        "SelectedColumn",
        "PhaseInSequence",
    ]
    raw.drop_channels([ch for ch in drop if ch in raw.ch_names])
    return desc


train_desc = annotate_events(raw_train)
test_desc = annotate_events(raw_test)
print("Train events", np.unique(train_desc, return_counts=True))

## Preprocessing
We band-pass filter from 0.1–30 Hz and resample to 128 Hz.

In [None]:
bandpass(raw_train)
bandpass(raw_test)
decimate(raw_train)
decimate(raw_test)

## Epoch extraction
We epoch from −0.2…0.8 s relative to each stimulus and apply baseline correction using the pre-stimulus period.

In [None]:
tmin, tmax = -0.2, 0.8
train_epochs = make_epochs(raw_train, tmin, tmax)
test_epochs = make_epochs(raw_test, tmin, tmax)
train_epochs

## ERP grand average
Plot the average waveform for target and non-target trials.

In [None]:
fig = plot_erp(train_epochs)
fig

## Feature extraction and classification
We vectorize the 250–450 ms window and train an LDA on calibration data, then evaluate on the test run.

In [None]:
X_train, y_train = extract_features(train_epochs)
X_test, y_test = extract_features(test_epochs)
cv_acc = lda_cv(X_train, y_train)
clf = LinearDiscriminantAnalysis().fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(f"CV accuracy: {cv_acc:.3f}, Test accuracy: {score:.3f}")

## Confusion matrix
Examine classifier performance on the test run.

In [None]:
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, cmap="Blues")
ax.set_xlabel("Predicted")
ax.set_ylabel("True")
ax.set_xticks([0, 1])
ax.set_xticklabels(["Non-target", "Target"])
ax.set_yticks([0, 1])
ax.set_yticklabels(["Non-target", "Target"])
for (i, j), v in np.ndenumerate(cm):
    ax.text(j, i, str(v), ha="center", va="center")
fig.colorbar(im, ax=ax)
plt.show()