# EEG Artifact Detection


##### **Core Required Packages**

| Package | Purpose | Installation Command |
|--------|---------|----------------------|
| `mne` | Core EEG processing framework | `pip install mne` |
| `mne-icalabel` | ICLabel classifier for automatic component labeling | `pip install mne-icalabel` |
| `numpy` | Numerical operations | `pip install numpy` |
| `scikit-learn` | Required by ICLabel backend | `pip install scikit-learn` |
| `picard` | Recommended ICA solver (faster/more robust than FastICA) | `pip install python-picard` |



In [None]:

# ────────────────────────────────────────────────
# IMPORT LIBRARIES
# ────────────────────────────────────────────────

import mne
from mne.preprocessing import ICA
from mne_icalabel import label_components
import numpy as np
from warnings import catch_warnings, filterwarnings
import os

# ────────────────────────────────────────────────
# CONFIGURATION PARAMETERS
# ────────────────────────────────────────────────
DATA_PATH = "trial001_eeg.fif"          # Path to your EEG file
LOW_PASS_FREQ = 100.0               # Hz
NOTCH_FREQ = 60.0                  # Hz (use 50.0 for EU datasets)
ICLABEL_THRESHOLDS = {
    'eye blink': 0.70,
    'heart beat': 0.70,
    'muscle artifact': 0.70,
    'line noise': 0.70,
    'channel noise': 0.70
}
N_COMPONENTS = 0.95                # Retain components explaining 99% variance
RANDOM_STATE = 99
INTERPOLATE_EXISTING_BADS = True   # Interpolate channels already marked bad in dataset

# ────────────────────────────────────────────────
# 1. LOAD AND PREPROCESS DATA
# ────────────────────────────────────────────────
print("Loading EEG data...")
# raw = mne.io.read_raw_egi(DATA_PATH, preload=True)  # For EGI .mff format
raw = mne.io.read_raw_fif(DATA_PATH, preload=True)  # Alternative: FIF format

# Preserve original bad channels from dataset header (if any)
original_bads = raw.info['bads'].copy()
print(f"Found {len(original_bads)} pre-marked bad channels: {original_bads}")

# Apply filters required for ICLabel compatibility
print(f"Applying filters: HP=1.0 Hz, LP={LOW_PASS_FREQ} Hz, Notch={NOTCH_FREQ} Hz")
raw.filter(l_freq=1.0, h_freq=LOW_PASS_FREQ, picks='eeg', n_jobs=1, verbose=False)
raw.notch_filter(freqs=NOTCH_FREQ, picks='eeg', method='spectrum_fit',
                 filter_length='auto', mt_bandwidth=1.0, p_value=0.05,
                 n_jobs=1, verbose=False)

# Re-reference to average (required for ICA stability)
raw.set_eeg_reference('average', verbose=False)

# ────────────────────────────────────────────────
# 2. FIT ICA AND CLASSIFY COMPONENTS
# ────────────────────────────────────────────────
print("Fitting ICA decomposition...")
with catch_warnings():
    filterwarnings('ignore', category=RuntimeWarning)
    ica = ICA(
        n_components=N_COMPONENTS,
        method='picard',
        fit_params=dict(ortho=False, extended=True),
        random_state=RANDOM_STATE,
        max_iter='auto'
    )
    ica.fit(raw)
print(f"ICA fitted with {ica.n_components_} components")

print("Classifying components with ICLabel...")
raw_eeg = raw.copy().pick('eeg')
labels_dict = label_components(raw_eeg, ica, method='iclabel')

# Identify components to exclude based on ICLabel probabilities
exclude_idx = []
for i, (label, prob_vec) in enumerate(zip(labels_dict['labels'], labels_dict['y_pred_proba'])):
    label_key = label.lower().strip()
    if label_key in ICLABEL_THRESHOLDS and np.max(prob_vec) >= ICLABEL_THRESHOLDS[label_key]:
        exclude_idx.append(i)

ica.exclude = sorted(set(exclude_idx))

# Report exclusions
print(f"Excluding {len(ica.exclude)} components based on ICLabel:")
for idx in ica.exclude:
    label = labels_dict['labels'][idx]
    prob = np.max(labels_dict['y_pred_proba'][idx])
    print(f"  Component {idx:02d}: {label:<18} (probability: {prob:.2f})")

# ────────────────────────────────────────────────
# 3. APPLY ICA AND HANDLE BAD CHANNELS
# ────────────────────────────────────────────────
print("Applying ICA to reconstruct cleaned data...")
cleaned = ica.apply(raw)

# ────────────────────────────────────────────────
# 4. SAVE RESULTS
# ────────────────────────────────────────────────
output_path = os.path.splitext(DATA_PATH)[0] + "_cleaned.fif"
cleaned.save(output_path, overwrite=True)
print(f"\n✓ Cleaning completed successfully")
print(f"✓ Cleaned data saved to: {output_path}")
print(f"✓ Components excluded: {ica.exclude if ica.exclude else 'None'}")

Loading EEG data...
Opening raw data file /mnt/movement/users/jaizor/xtra/derivatives/eeg/crop/bima_off/sub-001/bima_off_sub-001_trial001_eeg.fif...
    Range : 126209 ... 146461 =    252.418 ...   292.922 secs
Ready.
Reading 0 ... 20252  =      0.000 ...    40.504 secs...
Found 0 pre-marked bad channels: []
Applying filters: HP=1.0 Hz, LP=100.0 Hz, Notch=60.0 Hz
Fitting ICA decomposition...
Fitting ICA to data using 281 channels (please be patient, this may take a while)
Selecting by explained variance: 20 components
Fitting ICA took 3.5s.
ICA fitted with 20 components
Classifying components with ICLabel...
Excluding 7 components based on ICLabel:
  Component 03: heart beat         (probability: 0.91)
  Component 07: muscle artifact    (probability: 0.98)
  Component 09: heart beat         (probability: 0.79)
  Component 10: heart beat         (probability: 0.99)
  Component 11: heart beat         (probability: 0.94)
  Component 12: heart beat         (probability: 0.94)
  Component 1

  cleaned.save(output_path, overwrite=True)


In [5]:
cleaned.info

Unnamed: 0,General,General.1
,MNE object type,Info
,Measurement date,2025-01-03 at 20:10:05 UTC
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Sampling frequency,500.00 Hz
,Channels,Channels
,EEG,281
,Stimulus,10
,Head & sensor digitization,284 points
