# EEG Motor Movement/Imagery Classification Using Random Forest and Convolutional Neural Networks

### Imports

In [11]:
import os
import numpy as np
import mne
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
from scipy.signal import detrend

### Getting .env variables

In [12]:
load_dotenv()

INPUT_DIR = os.getenv("INPUT_DIR", "./data/raw")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./result")

SFREQ = int(os.getenv("SFREQ", 160))
WINDOW_SEC = float(os.getenv("WINDOW_SEC", 2))
OVERLAP = float(os.getenv("OVERLAP", 0.5))

DEBUG = bool(os.getenv("DEBUG", 1))
# DEBUG = False
print(f"DEBUGGING is {'ON' if DEBUG else 'OFF'}")

DEBUGGING is ON


In [13]:
window_samples = int(SFREQ * WINDOW_SEC)
print(f"Window samples: {window_samples}")

records_file = os.path.join(INPUT_DIR, "RECORDS")
print(f"Records file: {records_file}")

Window samples: 320
Records file: ./data/raw/RECORDS


## 1. Data Preparation

### 1.1. Loading RECORDS file and verifying EDF files' existence

In [14]:
with open(records_file, "r") as f:
    records = [line.strip() for line in f if line.strip()]

print(f"Number of RECORDS entries: {len(records)}")
for r in records[:4]:
    print(" ", r)
print("  ...\n ",records[-1])

edf_paths = []
missing = []

for rel in records:
    p = os.path.join(INPUT_DIR, rel)
    if os.path.exists(p):
        edf_paths.append(p)
    else:
        missing.append(p)

print(f"\nResolved EDF files: {len(edf_paths)}")
print(f"Missing EDF files: {len(missing)}")

if missing:
    print("Example missing path:", missing[0])

Number of RECORDS entries: 1526
  S001/S001R01.edf
  S001/S001R02.edf
  S001/S001R03.edf
  S001/S001R04.edf
  ...
  S109/S109R14.edf

Resolved EDF files: 1526
Missing EDF files: 0


### 1.2. Testing [first] EDF file loading and preprocessing

In [None]:
if DEBUG:
    test_edf = edf_paths[0]
    print("Testing EDF:", test_edf)

    raw = mne.io.read_raw_edf(test_edf, preload=False, verbose=False)

    print("\n--- EDF INFO ---")
    print("Channels:", len(raw.ch_names))
    print("Sampling freq:", raw.info["sfreq"])
    print("Duration (sec):", raw.times[-1])
    print("First 10 channels:", raw.ch_names[:10])

    assert len(raw.ch_names) >= 64, "Expected ~64 EEG channels"
    assert abs(raw.info["sfreq"] - SFREQ) < 1e-3, "Sampling frequency mismatch"

Testing EDF: ./data/raw/S001/S001R01.edf

--- EDF INFO ---
Channels: 64
Sampling freq: 160.0
Duration (sec): 60.99375
First 10 channels: ['Fc5.', 'Fc3.', 'Fc1.', 'Fcz.', 'Fc2.', 'Fc4.', 'Fc6.', 'C5..', 'C3..', 'C1..']


### 1.3. Preprocessing All EDF Files and Saving Processed Data

In [16]:
def load_and_summarize(edf_path: Path):
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose=False)
    descs = sorted(map(str, set(raw.annotations.description))) if raw.annotations is not None else []
    print("\n===", edf_path.name, "===")
    print("sfreq:", raw.info["sfreq"], "| duration:", raw.times[-1], "sec | ch:", len(raw.ch_names))
    print("Annotations count:", len(raw.annotations))
    print("Unique descriptions:", descs)

    events, event_id = mne.events_from_annotations(raw, verbose=False)
    event_id = {str(k): v for k, v in event_id.items()}
    print("event_id:", event_id)
    print("n_events:", len(events))
    print("first events:", events[:10])
    return raw, events, event_id

if DEBUG:
    raw, events, event_id = load_and_summarize(Path(test_edf))
else:
    for edf in edf_paths:
        raw, events, event_id = load_and_summarize(Path(edf))


=== S001R01.edf ===
sfreq: 160.0 | duration: 60.99375 sec | ch: 64
Annotations count: 1
Unique descriptions: ['T0']
event_id: {'T0': 1}
n_events: 1
first events: [[0 0 1]]
