In [None]:
pip install mne

Collecting mne
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Downloading mne-1.9.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mne
Successfully installed mne-1.9.0


In [None]:
pip install pyedflib

Collecting pyedflib
  Downloading pyedflib-0.1.40.tar.gz (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyedflib
  Building wheel for pyedflib (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pyedflib: filename=pyedflib-0.1.40-cp311-cp311-linux_x86_64.whl size=2734979 sha256=23574bd687b3ef03c149421e88c2a489a7d7db798568f0e23172e4ba56d50485
  Stored in directory: /root/.cache/pip/wheels/8d/df/d6/88ce619bde055ebffebae5380645802eca490817853b60b45b
Successfully built pyedflib
Installing collected packages: pyedflib
Successfully installed pyedflib-0.1.40


In [None]:
import mne
import numpy as np
import torch
import os
import pyedflib
from google.colab import drive
from scipy.signal import butter, filtfilt
from sklearn.preprocessing import StandardScaler
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
base_dir = "/content/drive/My Drive/4106 Project/eeg-motor-movementimagery-dataset-1.0.0/files"

In [None]:
def bandpass_filter(data, sfreq, low=0.5, high=40.0, order=4):
    nyq = 0.5 * sfreq
    low /= nyq
    high /= nyq
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data)

def load_events(event_file):
    events = []
    with open(event_file, 'r', encoding='latin-1') as f: # Changed encoding to 'latin-1'
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 2:
                try:
                    timestamp = float(parts[0])
                    label = parts[1]
                    events.append((timestamp, label))
                except Exception as e:
                    print(f"Skipping line due to error: {e}")
    return events

In [None]:
edf_path = "/content/drive/My Drive/4106 Project/eeg-motor-movementimagery-dataset-1.0.0/files/S001/S001R01.edf"
raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)

# Print channel names
print("Channels:", raw.ch_names)

# Print annotations (if present)
print("Annotations:", raw.annotations)

# Convert annotations to events (if needed)
events, event_id = mne.events_from_annotations(raw)
print("Event dictionary:", event_id)
print("Events array:\n", events)

Channels: ['Fc5.', 'Fc3.', 'Fc1.', 'Fcz.', 'Fc2.', 'Fc4.', 'Fc6.', 'C5..', 'C3..', 'C1..', 'Cz..', 'C2..', 'C4..', 'C6..', 'Cp5.', 'Cp3.', 'Cp1.', 'Cpz.', 'Cp2.', 'Cp4.', 'Cp6.', 'Fp1.', 'Fpz.', 'Fp2.', 'Af7.', 'Af3.', 'Afz.', 'Af4.', 'Af8.', 'F7..', 'F5..', 'F3..', 'F1..', 'Fz..', 'F2..', 'F4..', 'F6..', 'F8..', 'Ft7.', 'Ft8.', 'T7..', 'T8..', 'T9..', 'T10.', 'Tp7.', 'Tp8.', 'P7..', 'P5..', 'P3..', 'P1..', 'Pz..', 'P2..', 'P4..', 'P6..', 'P8..', 'Po7.', 'Po3.', 'Poz.', 'Po4.', 'Po8.', 'O1..', 'Oz..', 'O2..', 'Iz..']
Annotations: <Annotations | 1 segment: T0 (1)>
Used Annotations descriptions: [np.str_('T0')]
Event dictionary: {np.str_('T0'): 1}
Events array:
 [[0 0 1]]


In [None]:
import os
import mne
import numpy as np

# Base folder
base_path = "/content/drive/My Drive/4106 Project/eeg-motor-movementimagery-dataset-1.0.0/files"

# Valid motor imagery recordings
valid_runs = ['R03', 'R04', 'R07', 'R08', 'R11', 'R12']

# Initialize data containers
X = []
y = []

# MNE parameters
tmin, tmax = 0.0, 2.0  # seconds for each epoch
# Desired number of time points per epoch
n_times = None

# Loop over all subject folders
for subject_folder in os.listdir(base_path):
    subject_path = os.path.join(base_path, subject_folder)
    if not os.path.isdir(subject_path):
        continue

    for edf_file in os.listdir(subject_path):
        if not edf_file.endswith(".edf"):
            continue

        if not any(run in edf_file for run in valid_runs):
            continue  # Skip rest or irrelevant runs

        edf_path = os.path.join(subject_path, edf_file)
        try:
            raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
            events, event_id = mne.events_from_annotations(raw)

            if len(events) == 0:
                print(f"No events found in {edf_file}")
                continue

            # Pick EEG channels only
            raw.pick_types(eeg=True)

            # Create epochs (2-second window from each event)
            epochs = mne.Epochs(raw, events, event_id=event_id,
                                tmin=tmin, tmax=tmax, baseline=None,
                                preload=True, verbose=False)

            labels = epochs.events[:, -1]  # Extract label IDs
            data = epochs.get_data()       # Shape: (n_epochs, n_channels, n_times)

            # Check and adjust the number of time points
            if n_times is None:
                n_times = data.shape[2]  # Set initial value
            elif data.shape[2] != n_times:
                # Either truncate or pad to match n_times
                if data.shape[2] > n_times:
                    data = data[:, :, :n_times]  # Truncate
                else:
                    pad_width = ((0, 0), (0, 0), (0, n_times - data.shape[2]))
                    data = np.pad(data, pad_width, mode='constant') # Pad with zeros

            X.append(data)
            y.append(labels)

            print(f"Processed {edf_file}: {data.shape[0]} samples")

        except Exception as e:
            print(f"Failed to process {edf_file}: {e}")

# Combine all data
if X:
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
    np.savez_compressed("/content/drive/My Drive/4106 Project/eeg_dataset.npz", X=X, y=y)
    print(f"\n✅ Compiled dataset: X shape = {X.shape}, y shape = {y.shape}")
else:
    print("⚠️ No valid samples were found.")

Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S107R11.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S107R07.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S107R03.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S107R12.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S107R04.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1')

  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R08.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R07.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R04.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R12.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R11.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)


Processed S100R03.edf: 24 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S106R12.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S106R08.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S106R07.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S106R11.edf: 30 samples
Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Processed S106R03.edf: 30 samples
Used Annotations descripti