01 - Data Exploration

BCI Competition IV Dataset 2a - Motor Imagery

This notebook explores the EEG data structure and basic signal characteristics.

Contents
1. Load and inspect raw data
2. Visualize EEG signals
3. Event structure analysis
4. Power spectral density
5. Compare classes

In [None]:
# Standard imports
import sys
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import mne

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

# Project imports
from preprocessing import load_raw_gdf, extract_events, CHANNEL_NAMES, EVENT_CODES
from visualization import set_style, CLASS_NAMES

# Configuration
set_style()
mne.set_log_level('WARNING')

# Paths
DATA_DIR = Path('../data/raw')
SUBJECT = 1  # Start with subject 1

print(f"MNE version: {mne.__version__}")

The dataset contains GDF files with:
- 22 EEG channels (10-20 system)
- 250 Hz sampling rate
- Training (T) and Evaluation (E) sessions

In [None]:
# Load training data for subject 1
gdf_path = DATA_DIR / f'A0{SUBJECT}T.gdf'

if not gdf_path.exists():
    print(f"Data file not found: {gdf_path}")
    print("Run: python scripts/download_data.py")
else:
    raw = load_raw_gdf(gdf_path)
    print(raw.info)

In [None]:
# Basic info
print(f"Channels: {len(raw.ch_names)}")
print(f"Sampling rate: {raw.info['sfreq']} Hz")
print(f"Duration: {raw.times[-1]:.1f} seconds")
print(f"\nChannel names: {raw.ch_names}")

In [None]:
# Plot 10 seconds of data
raw.plot(duration=10, n_channels=22, scalings='auto', title=f'Subject {SUBJECT} - Raw EEG');

In [None]:
# Channel locations
raw.plot_sensors(show_names=True, title='EEG Channel Locations');

- Cue appears at t=0
- Motor imagery period: 0-4s
- Inter-trial interval: variable

In [None]:
# Extract events
events, event_id = extract_events(raw)

print(f"Total motor imagery trials: {len(events)}")
print(f"\nEvent mapping: {event_id}")

# Count per class
for name, code in event_id.items():
    count = np.sum(events[:, 2] == code)
    print(f"  {name}: {count} trials")

In [None]:
# Visualize event timing
fig, ax = plt.subplots(figsize=(14, 3))

colors = ['blue', 'red', 'green', 'orange']
for (name, code), color in zip(event_id.items(), colors):
    mask = events[:, 2] == code
    times = events[mask, 0] / raw.info['sfreq']
    ax.eventplot([times], colors=[color], lineoffsets=list(event_id.keys()).index(name),
                 linelengths=0.8, label=name)

ax.set_xlabel('Time (s)')
ax.set_ylabel('Class')
ax.set_yticks(range(4))
ax.set_yticklabels(CLASS_NAMES)
ax.set_title('Motor Imagery Trial Timeline')
ax.legend(loc='upper right')
plt.tight_layout();

Key bands for motor imagery:
- Mu rhythm (8-12 Hz): Sensorimotor cortex
- Beta rhythm (13-30 Hz): Motor planning/execution

In [None]:
# Compute and plot PSD
spectrum = raw.compute_psd(fmin=1, fmax=50)
spectrum.plot(average=True, picks='eeg', xscale='linear');

# Add band annotations
ax = plt.gca()
ax.axvspan(8, 12, alpha=0.2, color='blue', label='Mu (8-12 Hz)')
ax.axvspan(13, 30, alpha=0.2, color='red', label='Beta (13-30 Hz)')
ax.legend();

In [None]:
# Topographic power distribution in mu band
fig = spectrum.plot_topomap(bands={'Mu (8-12 Hz)': (8, 12), 'Beta (13-30 Hz)': (13, 30)},
                            normalize=True);

In [None]:
# Apply bandpass filter first
raw_filt = raw.copy().filter(l_freq=8, h_freq=30, verbose=False)

# Create epochs
epochs = mne.Epochs(
    raw_filt,
    events=events,
    event_id=event_id,
    tmin=-0.5,
    tmax=4.0,
    baseline=None,
    preload=True,
    verbose=False
)

print(epochs)

In [None]:
# Plot average for each class at C3 and C4 (motor cortex)
fig, axes = plt.subplots(2, 2, figsize=(14, 8))

for idx, (class_name, event_code) in enumerate(event_id.items()):
    ax = axes[idx // 2, idx % 2]
    evoked = epochs[class_name].average()
    evoked.plot(picks=['C3', 'C4'], axes=ax, show=False, spatial_colors=True)
    ax.set_title(f'{class_name.replace("_", " ").title()}')
    ax.axvline(0, color='k', linestyle='--', alpha=0.5, label='Cue onset')

plt.tight_layout();
fig.suptitle('Average ERP at Motor Cortex (C3, C4)', y=1.02, fontsize=14);

In [None]:
# Time-frequency analysis for left vs right hand
freqs = np.arange(8, 31, 1)
n_cycles = freqs / 2

# Left hand at C3 (contralateral)
power_left = mne.time_frequency.tfr_morlet(
    epochs['left_hand'], freqs=freqs, n_cycles=n_cycles,
    picks='C4', return_itc=False, average=True, verbose=False
)

# Right hand at C4 (contralateral)
power_right = mne.time_frequency.tfr_morlet(
    epochs['right_hand'], freqs=freqs, n_cycles=n_cycles,
    picks='C3', return_itc=False, average=True, verbose=False
)

fig, axes = plt.subplots(1, 2, figsize=(14, 4))
power_left.plot([0], axes=axes[0], title='Left Hand Imagery at C4', show=False)
power_right.plot([0], axes=axes[1], title='Right Hand Imagery at C3', show=False)
plt.tight_layout();

Key observations:
1. Data contains 22 EEG channels at 250 Hz
2. ~144 trials per session, balanced across 4 classes
3. Mu and beta rhythms visible in sensorimotor regions
4. Contralateral differences visible in time-frequency