# 02B Automated ICA Batch Processing

This notebook automates ICLabel-based ICA cleaning for the nine non-manual subjects so that downstream rheumatoid arthritis-focused ERP analyses can build on a unified preprocessing pipeline.



In [11]:
import sys
import logging
from pathlib import Path
from datetime import datetime
import json

import yaml
import pandas as pd
import numpy as np
import mne

NOTEBOOK_DIR = Path.cwd().resolve()
PROJECT_ROOT = NOTEBOOK_DIR.parent

# Ensure both project root and src directory are on sys.path for imports
for path_candidate in (PROJECT_ROOT, PROJECT_ROOT / "src"):
    path_str = str(path_candidate)
    if path_str not in sys.path:
        sys.path.insert(0, path_str)

from src.utils.pathing import find_project_root, ensure_src_on_path

project_root = find_project_root(PROJECT_ROOT)
ensure_src_on_path()

try:
    import mne_icalabel  # noqa: F401
    from mne_icalabel import label_components
    ICLABEL_AVAILABLE = True
except ModuleNotFoundError as err:
    ICLABEL_AVAILABLE = False
    LABEL_COMPONENTS_IMPORT_ERROR = err
else:
    LABEL_COMPONENTS_IMPORT_ERROR = None

if ICLABEL_AVAILABLE:
    from src.preprocessing.ica_pipeline import ICAProcessor
else:
    ICAProcessor = None

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("automated_ica")
logger.info("Project root resolved to %s", project_root)

if not ICLABEL_AVAILABLE:
    logger.warning(
        "mne-icalabel is not installed. Install it with `pip install mne-icalabel` "
        "before running the automated ICA cells."
    )



2025-11-18 06:33:54,655 [INFO] Project root resolved to /Users/leeyelim/Documents/EEG


### 1.2 Load configuration & subject sets


In [12]:
config_path = project_root / "config" / "analysis_config.yaml"
if not config_path.exists():
    raise FileNotFoundError(f"Config file not found at {config_path}")

with open(config_path, "r") as stream:
    config = yaml.safe_load(stream)

manual_subject = config["subjects"]["manual_ica_subject"]
selected_subjects = config["subjects"]["selected"]
automated_subjects = [sub for sub in selected_subjects if sub != manual_subject]

print(f"Manual ICA subject        : {manual_subject}")
print(f"Automated ICA subject pool: {automated_subjects}")
print(f"Total automated subjects  : {len(automated_subjects)}")

if not ICLABEL_AVAILABLE:
    raise ModuleNotFoundError(
        "mne-icalabel is required for automated ICA. Install it with `pip install mne-icalabel` "
        "and rerun the notebook."
    )



Manual ICA subject        : sub-003
Automated ICA subject pool: ['sub-015', 'sub-006', 'sub-010', 'sub-012', 'sub-007', 'sub-002', 'sub-011', 'sub-004', 'sub-014']
Total automated subjects  : 9


### 1.3 Load preprocessing summary (runs)


### 2.1 ICLabel/ARTIST parameters & helpers


In [None]:
summary_path = project_root / "data" / "preprocessed" / "preprocessing_summary.csv"
if not summary_path.exists():
    raise FileNotFoundError(f"Preprocessing summary not found at {summary_path}")

summary_df = pd.read_csv(summary_path)
summary_df["subject"] = summary_df["subject"].astype(str)
summary_df["session"] = summary_df["session"].astype(str)
summary_df["run"] = summary_df["run"].astype(str)

auto_runs_df = summary_df[summary_df["subject"].isin(automated_subjects)].copy()
auto_runs_df["run_index"] = (
    auto_runs_df["run"].str.extract(r"run-(\d+)", expand=False).astype(float).fillna(0).astype(int)
)
auto_runs_df.sort_values(["subject", "session", "run_index"], inplace=True)


### 2.2 Session-level ICA processing (merge → pre-annotate → ICA → re-ref → post-annotate → save)


### 2.3 Inventory ICA-cleaned outputs


### Session selection

Choose whether to process every eligible session or focus on a single subject/session pair. Set the config cell below before running the automation section.

### 3.2 Save automated ICA summary


In [None]:
RUN_MODE = "all"  # Options: "all" or "single"
TARGET_SUBJECT = "sub-004"
TARGET_SESSION = "ses-01"

RUN_MODE = RUN_MODE.lower().strip()
if RUN_MODE not in {"all", "single"}:
    raise ValueError("RUN_MODE must be 'all' or 'single'")

if RUN_MODE == "single":
    target_subject = TARGET_SUBJECT.strip()
    target_session = TARGET_SESSION.strip()
    if not target_subject or not target_session:
        raise ValueError("TARGET_SUBJECT and TARGET_SESSION must be set when RUN_MODE='single'")
    if target_subject == manual_subject:
        raise ValueError("sub-003 is reserved for manual ICA; choose a different subject.")
    mask = (auto_runs_df["subject"] == target_subject) & (auto_runs_df["session"] == target_session)
    auto_runs_df = auto_runs_df[mask]
    if auto_runs_df.empty:
        raise ValueError(f"No runs found for {target_subject} {target_session} in preprocessing summary.")
else:
    print(f"Processing all automated subjects (excluding manual subject {manual_subject}).")

if auto_runs_df.empty:
    raise RuntimeError("No runs left to process after applying the selection mask.")

automated_session_groups = [
    (subject, session, group.reset_index(drop=True))
    for (subject, session), group in auto_runs_df.groupby(["subject", "session"])
]

print(f"Total automated sessions in scope: {len(automated_session_groups)}")
print("Runs per subject/session:")
display(
    auto_runs_df.groupby(["subject", "session"])["run"].count().rename("n_runs")
)

auto_runs_df.head()


### 4.1 Cohen's d per session (time-resolved)


In [None]:
ICLABEL_CLASSES = [
    "brain",
    "muscle",
    "eye",
    "heart",
    "line_noise",
    "channel_noise",
    "other",
]

REJECT_TARGETS = {
    (label.lower().strip().replace(" ", "_"))
    for label in config["preprocessing"]["iclabel"]["reject_classes"]
}
REJECT_TARGETS.discard("brain")

THRESHOLD = config["preprocessing"]["iclabel"]["threshold"]
OUTPUT_ROOT = project_root / "data" / "preprocessed" / "after_ica"
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
RESULTS_DIR = project_root / "results"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

ARTIST_CFG = config["preprocessing"].get("artist", {})
REJECT_CRITERIA = ARTIST_CFG.get("reject_criteria", {})


def _to_float(value, default):
    if value is None:
        return default
    try:
        return float(value)
    except (TypeError, ValueError):
        return default


AMPLITUDE_THRESHOLD_UV = _to_float(REJECT_CRITERIA.get("eeg", 150e-6), 150e-6) * 1e6
MIN_BAD_DURATION_SEC = _to_float(ARTIST_CFG.get("min_duration_sec", 0.5), 0.5)


def canonicalize(label: str) -> str:
    if not label:
        return "unknown"
    return label.lower().strip().replace(" ", "_")


def build_session_output_paths(subject: str, session: str) -> dict:
    session_dir = OUTPUT_ROOT / subject / session
    session_dir.mkdir(parents=True, exist_ok=True)

    stem = f"{subject}_{session}_preprocessed_ica"
    return {
        "cleaned": session_dir / f"{stem}_cleaned.fif",
        "ica_model": session_dir / f"{stem}_model.fif",
        "results": session_dir / f"{stem}_results.json",
        "rejection": session_dir / f"{stem}_component_rejection.json",
    }


def merge_session_runs(session_df: pd.DataFrame):
    raws = []
    paths = []
    for _, row in session_df.sort_values("run_index").iterrows():
        run_path = Path(row["reref_path"])
        if not run_path.exists():
            raise FileNotFoundError(f"Re-referenced file missing: {run_path}")
        raw = mne.io.read_raw_fif(run_path, preload=True, verbose="ERROR")
        raw.load_data()
        raws.append(raw)
        paths.append(run_path)

    if not raws:
        raise RuntimeError("No runs available for session merge")

    if len(raws) == 1:
        merged = raws[0]
    else:
        merged = mne.concatenate_raws(raws, preload=True, verbose=False)

    return merged, paths


def _mask_to_segments(mask: np.ndarray, sfreq: float, min_samples: int):
    if min_samples <= 0:
        min_samples = 1
    padded = np.concatenate(([0], mask.astype(int), [0]))
    changes = np.diff(padded)
    starts = np.where(changes == 1)[0]
    ends = np.where(changes == -1)[0]
    for start, end in zip(starts, ends):
        if end - start >= min_samples:
            onset = start / sfreq
            duration = (end - start) / sfreq
            yield onset, duration


def annotate_high_amplitude(
    raw: mne.io.Raw,
    *,
    label: str,
    threshold_uv: float = AMPLITUDE_THRESHOLD_UV,
    min_duration_sec: float = MIN_BAD_DURATION_SEC,
) -> dict:
    if threshold_uv <= 0:
        return {"label": label, "n_segments": 0, "total_duration": 0.0}

    picks = mne.pick_types(raw.info, eeg=True, exclude=[])
    if len(picks) == 0:
        return {"label": label, "n_segments": 0, "total_duration": 0.0}

    data = raw.get_data(picks=picks)
    envelope = np.max(np.abs(data), axis=0)
    mask = envelope > (threshold_uv * 1e-6)
    min_samples = int(max(1, min_duration_sec * raw.info["sfreq"]))
    segments = list(_mask_to_segments(mask, raw.info["sfreq"], min_samples))

    if segments:
        new_annotations = mne.Annotations(
            onset=[seg[0] for seg in segments],
            duration=[seg[1] for seg in segments],
            description=[label] * len(segments),
        )
        if raw.annotations is None:
            raw.set_annotations(new_annotations)
        else:
            raw.set_annotations(raw.annotations + new_annotations)

    return {
        "label": label,
        "n_segments": len(segments),
        "total_duration": float(sum(seg[1] for seg in segments)),
        "threshold_uv": threshold_uv,
        "min_duration_s": min_duration_sec,
    }


def summarize_component_probabilities(scores_row, class_order=ICLABEL_CLASSES) -> dict:
    arr = np.asarray(scores_row).ravel()
    if arr.size == 0:
        return {}
    if arr.size == len(class_order):
        return {cls: float(arr[idx]) for idx, cls in enumerate(class_order)}
    if arr.size == 1:
        return {class_order[0]: float(arr[0])}
    limit = min(len(class_order), arr.size)
    return {class_order[idx]: float(arr[idx]) for idx in range(limit)}


In [None]:
mne.set_log_level("WARNING")
ica_processor = ICAProcessor(config)

records = []
failures = []

for subject, session, session_df in automated_session_groups:
    session_label = f"{subject}_{session}"
    logger.info("Processing %s (%d runs)", session_label, len(session_df))

    try:
        raw, run_paths = merge_session_runs(session_df)
        raw.load_data()

        pre_annotation = annotate_high_amplitude(raw, label="BAD_PRE_ICA")
        ica = ica_processor.run_ica(raw.copy(), subject=session_label)

        reject_components, classification_results = ica_processor.automated_ica_rejection(
            raw, ica, session_label
        )

        raw_cleaned = ica_processor.apply_ica_rejection(raw, ica, reject_components)
        raw_cleaned.load_data()
        raw_cleaned.set_eeg_reference("average", projection=False)
        post_annotation = annotate_high_amplitude(raw_cleaned, label="BAD_POST_ICA")

        output_paths = build_session_output_paths(subject, session)
        raw_cleaned.save(output_paths["cleaned"], overwrite=True, verbose="ERROR")
        ica.save(output_paths["ica_model"], overwrite=True)

        class_order = classification_results.get("classes", ICLABEL_CLASSES)
        decision_threshold = classification_results.get("rejection_threshold", THRESHOLD)

        classification_payload = classification_results.copy()
        classification_payload.update(
            {
                "subject": subject,
                "session": session,
                "run": "merged_session",
                "run_label": session_label,
                "n_runs_merged": len(session_df),
                "timestamp_utc": datetime.utcnow().isoformat() + "Z",
                "class_order": class_order,
                "threshold": decision_threshold,
                "reject_targets": classification_results.get(
                    "rejected_classes", sorted(REJECT_TARGETS)
                ),
                "annotation_summary": {
                    "pre_ica": pre_annotation,
                    "post_ica": post_annotation,
                },
                "source_runs": [str(path) for path in run_paths],
            }
        )

        with open(output_paths["results"], "w") as f:
            json.dump(classification_payload, f, indent=2)

        rejection_payload = {
            "subject": subject,
            "session": session,
            "run": "merged_session",
            "n_components": int(ica.n_components_),
            "rejected_components": list(reject_components),
            "n_rejected": len(reject_components),
            "method": "iclabel_automated",
            "threshold": decision_threshold,
            "n_runs_merged": len(session_df),
        }

        with open(output_paths["rejection"], "w") as f:
            json.dump(rejection_payload, f, indent=2)

        records.append(
            {
                "subject": subject,
                "session": session,
                "run": "merged_session",
                "n_runs_merged": len(session_df),
                "n_components": int(ica.n_components_),
                "n_rejected": len(reject_components),
                "threshold": decision_threshold,
                "cleaned_path": str(output_paths["cleaned"]),
                "ica_model_path": str(output_paths["ica_model"]),
                "classification_path": str(output_paths["results"]),
                "rejection_log_path": str(output_paths["rejection"]),
                "timestamp_utc": classification_payload["timestamp_utc"],
                "pre_bad_segments": pre_annotation["n_segments"],
                "pre_bad_seconds": pre_annotation["total_duration"],
                "post_bad_segments": post_annotation["n_segments"],
                "post_bad_seconds": post_annotation["total_duration"],
            }
        )

        logger.info(
            "Completed %s → %d/%d components rejected",
            session_label,
            len(reject_components),
            int(ica.n_components_),
        )

    except Exception as exc:  # pylint: disable=broad-except
        logger.exception("Failed to process %s", session_label)
        failures.append(
            {
                "session_label": session_label,
                "reason": type(exc).__name__,
                "message": str(exc),
            }
        )

if failures:
    logger.warning("Encountered %d failures during automated ICA", len(failures))
    failures
else:
    logger.info("Automated ICA completed for all sessions")


### 4.2 Group-level ERP metrics (SNR, SME, peak d)


In [13]:
# 4.2a. Group-level SME by condition with ROI-specific window override
import numpy as np
import pandas as pd
import mne
from pathlib import Path
from typing import Tuple, Optional

pre_root = project_root / 'data' / 'preprocessed' / 'after_rereferencing'
after_root = project_root / 'data' / 'preprocessed' / 'after_ica'
raw_events_root = project_root / 'ds002680'
VOLTS_TO_UV = 1e6

# Defaults from config
erp_cfg = config.get('erp_analysis', {})
baseline = tuple(erp_cfg.get('baseline', [-0.1, 0.0]))
post_window_default = tuple(erp_cfg.get('post_window', [0.3, 0.5]))

roi_map = {
    'Frontal ROI': ['FP1', 'FP2'],
    'Parietal ROI': ['P3', 'P3"', 'P4', 'P4"', 'PZ', 'PZ"', 'CZ'],
}
roi_cfg = erp_cfg.get('roi', {})

familiar_labels = {'animal_target','nonanimal_target','easy_target','difficult_target'}
new_labels = {'animal_distractor','nonanimal_distractor','easy_distractor','difficult_distractor'}
valid_labels = familiar_labels | new_labels


def _window_for_roi(roi_label: str) -> Tuple[float, float]:
    return (0.4, 0.5) if 'Parieto' in roi_label else post_window_default


def _load_run_events(subject: str, session: str, run_token: str, sfreq: float) -> np.ndarray:
    f = raw_events_root/subject/session/'eeg'/f"{subject}_{session}_task-gonogo_{run_token}_events.tsv"
    if not f.exists():
        return np.empty((0,3), dtype=int)
    df = pd.read_csv(f, sep='\t')
    df = df[df['value'].isin(valid_labels)]
    if df.empty:
        return np.empty((0,3), dtype=int)
    samples = (df['onset'].values * sfreq).round().astype(int)
    codes = np.array([1 if v in familiar_labels else 2 for v in df['value']], dtype=int)
    return np.column_stack([samples, np.zeros(len(samples), dtype=int), codes])


def _merge_stage(subject: str, session: str, stage_root: Path, suffix: str) -> Optional[mne.io.Raw]:
    ses_dir = stage_root/subject/session
    if not ses_dir.exists():
        return None
    if suffix == 'ica_cleaned':
        cand = sorted(ses_dir.glob(f"{subject}_{session}_preprocessed_ica*_cleaned.fif"))
        if cand:
            raw = mne.io.read_raw_fif(str(cand[0]), preload=True, verbose='ERROR'); raw.load_data(); return raw
    run_files = sorted(ses_dir.glob(f"{subject}_{session}_run-*_preprocessed_{suffix}.fif"))
    if not run_files:
        return None
    raws = [mne.io.read_raw_fif(str(p), preload=True, verbose='ERROR') for p in run_files]
    [r.load_data() for r in raws]
    return raws[0] if len(raws)==1 else mne.concatenate_raws(raws, preload=True, verbose=False)


def _build_session_events(subject: str, session: str, sfreq: float) -> np.ndarray:
    ses_dir = pre_root/subject/session
    run_files = sorted(ses_dir.glob(f"{subject}_{session}_run-*_preprocessed_after_rereferencing.fif"))
    if not run_files:
        return np.empty((0,3), dtype=int)
    evs, off = [], 0
    for p in run_files:
        raw_tmp = mne.io.read_raw_fif(str(p), preload=False, verbose='ERROR')
        run_tok = p.stem.split('run-')[-1].split('_')[0]
        e = _load_run_events(subject, session, f"run-{run_tok}", raw_tmp.info['sfreq'])
        if e.size:
            e[:,0] += off
            evs.append(e)
        off += raw_tmp.n_times
    if not evs:
        return np.empty((0,3), dtype=int)
    e = np.vstack(evs)
    return e[np.argsort(e[:,0])]


def _pick_roi(raw: mne.io.Raw, roi_label: str):
    desired = roi_map.get(roi_label, [])
    present = [ch for ch in desired if ch in raw.ch_names]
    if present:
        return mne.pick_channels(raw.ch_names, present, ordered=True), present
    key = 'parietal' if 'Parieto' in roi_label else 'frontal'
    fb = [ch for ch in roi_cfg.get(key, []) if ch in raw.ch_names]
    return (mne.pick_channels(raw.ch_names, fb, ordered=True), fb) if fb else (np.array([], dtype=int), [])


def _epochs(raw: mne.io.Raw, events: np.ndarray) -> Optional[mne.Epochs]:
    if events.size == 0:
        return None
    try:
        return mne.Epochs(raw, events, event_id={'familiar':1,'new':2}, tmin=-0.2, tmax=0.6,
                          baseline=baseline, preload=True, verbose='ERROR', event_repeated='drop')
    except Exception:
        return None


def _sme_condition(epochs: Optional[mne.Epochs], picks: np.ndarray, window: Tuple[float,float], condition: str) -> float:
    if epochs is None or len(picks)==0:
        return float('nan')
    try:
        sel = epochs[condition]
    except Exception:
        return float('nan')
    if len(sel)==0:
        return float('nan')
    t = sel.times
    post = (t >= window[0]) & (t <= window[1])
    if not np.any(post):
        return float('nan')
    X = sel.get_data()[:, picks, :].mean(axis=1)
    trial_vals = X[:, post].mean(axis=1)
    n = max(len(trial_vals), 1)
    ddof = 1 if n>1 else 0
    return float((np.std(trial_vals, ddof=ddof) / np.sqrt(n)) * VOLTS_TO_UV)

# Build sessions list
if 'summary_df' in globals():
    sessions = sorted({(r['subject'], r['session']) for _, r in summary_df.iterrows()})
else:
    sessions = []
    for subj_dir in sorted((pre_root).glob('sub-*')):
        for ses_dir in sorted(subj_dir.glob('ses-*')):
            sessions.append((subj_dir.name, ses_dir.name))

cond_records = []
for subject, session in sessions:
    raw_b = _merge_stage(subject, session, pre_root, 'after_rereferencing')
    raw_a = _merge_stage(subject, session, after_root, 'ica_cleaned')
    if raw_b is None or raw_a is None:
        continue
    ev = _build_session_events(subject, session, raw_b.info['sfreq'])
    if ev.size == 0:
        continue
    ep_b = _epochs(raw_b, ev)
    ep_a = _epochs(raw_a, ev)
    for roi_label in roi_map:
        win = _window_for_roi(roi_label)
        pb, used_b = _pick_roi(raw_b, roi_label)
        pa, used_a = _pick_roi(raw_a, roi_label)
        if len(pb)==0 or len(pa)==0:
            continue
        sme_f_b = _sme_condition(ep_b, pb, win, 'familiar')
        sme_n_b = _sme_condition(ep_b, pb, win, 'new')
        sme_f_a = _sme_condition(ep_a, pa, win, 'familiar')
        sme_n_a = _sme_condition(ep_a, pa, win, 'new')
        cond_records.append({
            'subject': subject,
            'session': session,
            'ROI': roi_label,
            'SME_familiar_before_uV': sme_f_b,
            'SME_familiar_after_uV': sme_f_a,
            'Delta_SME_familiar_uV': (sme_f_a - sme_f_b) if (np.isfinite(sme_f_a) and np.isfinite(sme_f_b)) else np.nan,
            'SME_new_before_uV': sme_n_b,
            'SME_new_after_uV': sme_n_a,
            'Delta_SME_new_uV': (sme_n_a - sme_n_b) if (np.isfinite(sme_n_a) and np.isfinite(sme_n_b)) else np.nan,
        })

cond_df = pd.DataFrame(cond_records)
out_path = RESULTS_DIR / 'group_erp_sme_by_condition.csv'
cond_df.to_csv(out_path, index=False)
print(f'Saved group-condition SME → {out_path} (rows={len(cond_df)})')

if not cond_df.empty:
    summ = cond_df.groupby('ROI')[[
        'SME_familiar_before_uV','SME_familiar_after_uV','Delta_SME_familiar_uV',
        'SME_new_before_uV','SME_new_after_uV','Delta_SME_new_uV']].mean().reset_index()
    summ_path = RESULTS_DIR / 'group_erp_sme_by_condition_summary.csv'
    summ.to_csv(summ_path, index=False)
    print(f'Saved group-condition SME summary → {summ_path}')

cond_df.head()


Saved group-condition SME → /Users/leeyelim/Documents/EEG/results/group_erp_sme_by_condition.csv (rows=40)
Saved group-condition SME summary → /Users/leeyelim/Documents/EEG/results/group_erp_sme_by_condition_summary.csv


Unnamed: 0,subject,session,ROI,SME_familiar_before_uV,SME_familiar_after_uV,Delta_SME_familiar_uV,SME_new_before_uV,SME_new_after_uV,Delta_SME_new_uV
0,sub-002,ses-01,Frontal ROI,0.944103,0.353415,-0.590687,0.987072,0.283027,-0.704044
1,sub-002,ses-01,Parietal ROI,0.213074,0.18025,-0.032824,0.201076,0.166987,-0.034089
2,sub-002,ses-02,Frontal ROI,0.998758,0.372097,-0.626661,1.045773,0.291014,-0.754759
3,sub-002,ses-02,Parietal ROI,0.226218,0.189009,-0.037208,0.212971,0.175413,-0.037557
4,sub-003,ses-01,Frontal ROI,1.233615,0.885281,-0.348334,2.306136,0.920543,-1.385593


In [14]:
import re
from collections import defaultdict

inventory_records = []
for subject_dir in sorted((OUTPUT_ROOT).glob('sub-*')):
    subject = subject_dir.name
    for session_dir in sorted(subject_dir.glob('ses-*')):
        session = session_dir.name
        cleaned_files = sorted(session_dir.glob(f"{subject}_{session}_*preprocessed_ica*_cleaned*.fif"))
        for path in cleaned_files:
            name = path.stem
            match = re.search(r'run-(\d+)', name)
            run_label = f"run-{match.group(1)}" if match else 'merged_session'
            inventory_records.append(
                {
                    'subject': subject,
                    'session': session,
                    'run': run_label,
                    'cleaned_path': str(path),
                }
            )

inventory_df = pd.DataFrame(inventory_records)
if inventory_df.empty:
    raise RuntimeError('No ICA-cleaned files found under data/preprocessed/after_ica')

inventory_output = RESULTS_DIR / 'ica_cleaned_inventory.csv'
inventory_df.to_csv(inventory_output, index=False)
print(f"Saved ICA cleaned inventory → {inventory_output}")
inventory_df.head()


Saved ICA cleaned inventory → /Users/leeyelim/Documents/EEG/results/ica_cleaned_inventory.csv


Unnamed: 0,subject,session,run,cleaned_path
0,sub-002,ses-01,run-10,/Users/leeyelim/Documents/EEG/data/preprocesse...
1,sub-002,ses-01,run-11,/Users/leeyelim/Documents/EEG/data/preprocesse...
2,sub-002,ses-01,run-12,/Users/leeyelim/Documents/EEG/data/preprocesse...
3,sub-002,ses-01,run-13,/Users/leeyelim/Documents/EEG/data/preprocesse...
4,sub-002,ses-01,run-1,/Users/leeyelim/Documents/EEG/data/preprocesse...


## Before/After ICA SNR Validation

The plan requires logging the SNR (in dB) before and after ICA for every subject. The cell below uses `ICAValidator` to compute the ERP-based SNR metric on the re-referenced data and the matching ICA-cleaned outputs. Results are stored under `results/ica_validation/ica_snr_summary.csv` for downstream analysis.


In [15]:
from src.preprocessing.ica_validation import ICAValidator

validator = ICAValidator(config)
ica_snr_summary = validator.compute_snr_summary(
    subjects=automated_subjects,
    summary_csv=summary_path,
    save_path=project_root / "results" / "ica_validation" / "ica_snr_summary.csv"
)

ica_snr_summary.head()


2025-11-18 06:35:56,773 [INFO] EEGDataLoader initialized
  Project root: /Users/leeyelim/Documents/EEG
  Config: /Users/leeyelim/Documents/EEG/config/analysis_config.yaml
  Raw dir: /Users/leeyelim/Documents/EEG/ds002680 (exists=True)
  Preprocessed dir: /Users/leeyelim/Documents/EEG/data/preprocessed (exists=True)
  Derivatives dir: /Users/leeyelim/Documents/EEG/data/derivatives (exists=True)


NameError: name 'summary_path' is not defined

In [16]:
if records:
    automated_summary = pd.DataFrame(records)
    summary_output = RESULTS_DIR / "ica_automated_summary.csv"
    automated_summary.to_csv(summary_output, index=False)
    print(f"Saved automated ICA summary → {summary_output}")
    automated_summary
else:
    print("No automated ICA runs completed. Check the failure log above for details.")



NameError: name 'records' is not defined

In [17]:
import mne
from pathlib import Path
import numpy as np

requested_roi = {
    'Frontal ROI': ['FP1', 'FP2'],
    'Parietal ROI': ['P3', 'P3"', 'P4', 'P4"', 'PZ', 'PZ"', 'CZ'],
}
config_roi = config['erp_analysis']['roi']

familiar_labels = {
    'animal_target', 'nonanimal_target', 'easy_target', 'difficult_target'
}
new_labels = {
    'animal_distractor', 'nonanimal_distractor', 'easy_distractor', 'difficult_distractor'
}
valid_labels = familiar_labels | new_labels
raw_events_root = project_root / 'ds002680'
pre_root = project_root / 'data' / 'preprocessed' / 'after_rereferencing'
after_root = project_root / 'data' / 'preprocessed' / 'after_ica'

summary_session_counts = (
    summary_df.groupby(['subject', 'session']).size().rename('n_runs').to_dict()
)


def load_run_events(subject: str, session: str, run_token: str, sfreq: float) -> np.ndarray:
    events_file = (
        raw_events_root
        / subject
        / session
        / 'eeg'
        / f"{subject}_{session}_task-gonogo_{run_token}_events.tsv"
    )
    if not events_file.exists():
        raise FileNotFoundError(f"Events TSV missing: {events_file}")
    df = pd.read_csv(events_file, sep='	')
    stim_df = df[df['value'].isin(valid_labels)].copy()
    if stim_df.empty:
        return np.empty((0, 3), dtype=int)
    samples = (stim_df['onset'].values * sfreq).round().astype(int)
    codes = np.array([1 if val in familiar_labels else 2 for val in stim_df['value']], dtype=int)
    events = np.column_stack([samples, np.zeros(len(samples), dtype=int), codes])
    return events


def merge_stage(subject: str, session: str, stage_root: Path, suffix: str) -> mne.io.Raw:
    session_dir = stage_root / subject / session
    if not session_dir.exists():
        raise FileNotFoundError(f"Missing stage directory: {session_dir}")

    candidates = sorted(session_dir.glob(f"{subject}_{session}_preprocessed_ica*_cleaned.fif"))
    if suffix == 'ica_cleaned' and candidates:
        raw = mne.io.read_raw_fif(str(candidates[0]), preload=True, verbose='ERROR')
        raw.load_data()
        return raw

    pattern = f"{subject}_{session}_run-*_preprocessed_{suffix}.fif"
    run_files = sorted(session_dir.glob(pattern))
    if not run_files:
        raise FileNotFoundError(f"No run files found for {subject} {session} at {session_dir}")

    raws = []
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        raws.append(raw)

    if len(raws) == 1:
        return raws[0]
    return mne.concatenate_raws(raws, preload=True, verbose=False)


def build_session_events(subject: str, session: str, sfreq: float) -> np.ndarray:
    session_dir = pre_root / subject / session
    run_files = sorted(session_dir.glob(f"{subject}_{session}_run-*_preprocessed_after_rereferencing.fif"))
    if not run_files:
        raise FileNotFoundError(f"No after_rereferencing runs for {subject} {session}")

    events_list = []
    sample_offset = 0
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        run_token = path.stem.split('run-')[-1].split('_')[0]
        run_events = load_run_events(subject, session, f"run-{run_token}", raw.info['sfreq'])
        if run_events.size > 0:
            run_events[:, 0] += sample_offset
            events_list.append(run_events)
        sample_offset += raw.n_times
    if not events_list:
        return np.empty((0, 3), dtype=int)
    events = np.vstack(events_list)
    order = np.argsort(events[:, 0])
    return events[order]


def pick_roi(raw: mne.io.Raw, roi_label: str):
    desired = requested_roi.get(roi_label, [])
    picks = mne.pick_channels(raw.ch_names, include=desired)
    used = list(desired)
    if len(picks) == 0:
        key = 'parietal' if 'Parieto' in roi_label else 'frontal'
        fallback = config_roi.get(key, [])
        picks = mne.pick_channels(raw.ch_names, include=fallback)
        used = list(fallback)
    return picks, used


def compute_cohens_d(raw: mne.io.Raw, events: np.ndarray, roi_picks, mask):
    event_id = {'familiar': 1, 'new': 2}
    epochs = mne.Epochs(
        raw,
        events,
        event_id=event_id,
        tmin=-0.2,
        tmax=0.6,
        baseline=(-0.1, 0.0),
        preload=True,
        detrend=None,
        event_repeated='drop',
        verbose='ERROR'
    )
    fam = epochs['familiar'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    new = epochs['new'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    mean_diff = fam.mean(axis=0) - new.mean(axis=0)
    std_fam = fam.std(axis=0, ddof=1)
    std_new = new.std(axis=0, ddof=1)
    n_fam = fam.shape[0]
    n_new = new.shape[0]
    pooled_sd = np.sqrt(((n_fam - 1) * std_fam**2 + (n_new - 1) * std_new**2) / max(n_fam + n_new - 2, 1))
    pooled_sd = np.where(pooled_sd == 0, np.nan, pooled_sd)
    d_vals = mean_diff / pooled_sd
    return d_vals

cohens_records = []
subjects_for_d = sorted(set(summary_df['subject']) | {manual_subject})
for subject in subjects_for_d:
    for session in sorted(summary_df[summary_df['subject'] == subject]['session'].unique()):
        try:
            raw_before = merge_stage(subject, session, pre_root, 'after_rereferencing')
        except FileNotFoundError:
            continue
        raw_before.load_data()
        events = build_session_events(subject, session, raw_before.info['sfreq'])
        if events.size == 0:
            continue
        try:
            raw_after = merge_stage(subject, session, after_root, 'ica_cleaned')
        except FileNotFoundError:
            continue
        raw_after.load_data()

        times = raw_before.times
        mask = times >= 0
        times_ms = (times[mask] * 1000).round(1)

        for stage_label, raw_stage in [('Before ICA', raw_before), ('After ICA', raw_after)]:
            for roi_label in requested_roi:
                picks, used_channels = pick_roi(raw_stage, roi_label)
                if len(picks) == 0:
                    continue
                try:
                    d_vals = compute_cohens_d(raw_stage, events, picks, mask)
                except Exception:
                    continue
                for t, d in zip(times_ms, d_vals):
                    cohens_records.append(
                        {
                            'subject': subject,
                            'session': session,
                            'Stage': stage_label,
                            'ROI': roi_label,
                            'Channels Used': ','.join(used_channels),
                            'Time (ms)': float(t),
                            "Cohen's d": float(d),
                        }
                    )

cohens_df = pd.DataFrame(cohens_records)
if cohens_df.empty:
    raise RuntimeError('Cohen’s d computation produced no results.')

cohens_output = RESULTS_DIR / 'cohens_d_summary.csv'
cohens_df.to_csv(cohens_output, index=False)
print(f"Saved Cohen's d timecourse summary → {cohens_output}")

idx = cohens_df.groupby(['subject', 'session', 'Stage', 'ROI'])['Cohen‘s d'].apply(lambda s: s.abs().idxmax())
peak_df = cohens_df.loc[idx]
peak_output = RESULTS_DIR / 'cohens_d_peaks.csv'
peak_df.to_csv(peak_output, index=False)
print(f"Saved Cohen's d peak summary → {peak_output}")
peak_df.head()


NameError: name 'summary_df' is not defined

In [18]:
import mne
from pathlib import Path
import numpy as np

requested_roi = {
    'Frontal ROI': ['FP1', 'FP2'],
    'Parietal ROI': ['P3', 'P3"', 'P4', 'P4"', 'PZ', 'PZ"', 'CZ'],
}
config_roi = config['erp_analysis']['roi']

familiar_labels = {
    'animal_target', 'nonanimal_target', 'easy_target', 'difficult_target'
}
new_labels = {
    'animal_distractor', 'nonanimal_distractor', 'easy_distractor', 'difficult_distractor'
}
valid_labels = familiar_labels | new_labels
raw_events_root = project_root / 'ds002680'
pre_root = project_root / 'data' / 'preprocessed' / 'after_rereferencing'
after_root = project_root / 'data' / 'preprocessed' / 'after_ica'

summary_session_counts = (
    summary_df.groupby(['subject', 'session']).size().rename('n_runs').to_dict()
)


def load_run_events(subject: str, session: str, run_token: str, sfreq: float) -> np.ndarray:
    events_file = (
        raw_events_root
        / subject
        / session
        / 'eeg'
        / f"{subject}_{session}_task-gonogo_{run_token}_events.tsv"
    )
    if not events_file.exists():
        raise FileNotFoundError(f"Events TSV missing: {events_file}")
    df = pd.read_csv(events_file, sep='	')
    stim_df = df[df['value'].isin(valid_labels)].copy()
    if stim_df.empty:
        return np.empty((0, 3), dtype=int)
    samples = (stim_df['onset'].values * sfreq).round().astype(int)
    codes = np.array([1 if val in familiar_labels else 2 for val in stim_df['value']], dtype=int)
    events = np.column_stack([samples, np.zeros(len(samples), dtype=int), codes])
    return events


def merge_stage(subject: str, session: str, stage_root: Path, suffix: str) -> mne.io.Raw:
    session_dir = stage_root / subject / session
    if not session_dir.exists():
        raise FileNotFoundError(f"Missing stage directory: {session_dir}")

    candidates = sorted(session_dir.glob(f"{subject}_{session}_preprocessed_ica*_cleaned.fif"))
    if suffix == 'ica_cleaned' and candidates:
        raw = mne.io.read_raw_fif(str(candidates[0]), preload=True, verbose='ERROR')
        raw.load_data()
        return raw

    pattern = f"{subject}_{session}_run-*_preprocessed_{suffix}.fif"
    run_files = sorted(session_dir.glob(pattern))
    if not run_files:
        raise FileNotFoundError(f"No run files found for {subject} {session} at {session_dir}")

    raws = []
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        raws.append(raw)

    if len(raws) == 1:
        return raws[0]
    return mne.concatenate_raws(raws, preload=True, verbose=False)


def build_session_events(subject: str, session: str, sfreq: float) -> np.ndarray:
    session_dir = pre_root / subject / session
    run_files = sorted(session_dir.glob(f"{subject}_{session}_run-*_preprocessed_after_rereferencing.fif"))
    if not run_files:
        raise FileNotFoundError(f"No after_rereferencing runs for {subject} {session}")

    events_list = []
    sample_offset = 0
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        run_token = path.stem.split('run-')[-1].split('_')[0]
        run_events = load_run_events(subject, session, f"run-{run_token}", raw.info['sfreq'])
        if run_events.size > 0:
            run_events[:, 0] += sample_offset
            events_list.append(run_events)
        sample_offset += raw.n_times
    if not events_list:
        return np.empty((0, 3), dtype=int)
    events = np.vstack(events_list)
    order = np.argsort(events[:, 0])
    return events[order]


def pick_roi(raw: mne.io.Raw, roi_label: str):
    desired = requested_roi.get(roi_label, [])
    existing = [ch for ch in desired if ch in raw.ch_names]
    if existing:
        picks = mne.pick_channels(raw.ch_names, existing, ordered=True)
        return picks, existing
    key = 'parietal' if 'Parieto' in roi_label else 'frontal'
    fallback = [ch for ch in config_roi.get(key, []) if ch in raw.ch_names]
    if fallback:
        picks = mne.pick_channels(raw.ch_names, fallback, ordered=True)
        return picks, fallback
    return np.array([], dtype=int), []


def compute_cohens_d(raw: mne.io.Raw, events: np.ndarray, roi_picks, mask):
    event_id = {'familiar': 1, 'new': 2}
    epochs = mne.Epochs(
        raw,
        events,
        event_id=event_id,
        tmin=-0.2,
        tmax=0.6,
        baseline=(-0.1, 0.0),
        preload=True,
        detrend=None,
        event_repeated='drop',
        verbose='ERROR'
    )
    fam = epochs['familiar'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    new = epochs['new'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    mean_diff = fam.mean(axis=0) - new.mean(axis=0)
    std_fam = fam.std(axis=0, ddof=1)
    std_new = new.std(axis=0, ddof=1)
    n_fam = fam.shape[0]
    n_new = new.shape[0]
    pooled_sd = np.sqrt(((n_fam - 1) * std_fam**2 + (n_new - 1) * std_new**2) / max(n_fam + n_new - 2, 1))
    pooled_sd = np.where(pooled_sd == 0, np.nan, pooled_sd)
    d_vals = mean_diff / pooled_sd
    return d_vals

cohens_records = []
subjects_for_d = sorted(set(summary_df['subject']) | {manual_subject})
for subject in subjects_for_d:
    subject_sessions = summary_df.loc[summary_df['subject'] == subject, 'session'].unique()
    for session in sorted(subject_sessions):
        try:
            raw_before = merge_stage(subject, session, pre_root, 'after_rereferencing')
        except FileNotFoundError:
            continue
        raw_before.load_data()
        events = build_session_events(subject, session, raw_before.info['sfreq'])
        if events.size == 0:
            continue
        try:
            raw_after = merge_stage(subject, session, after_root, 'ica_cleaned')
        except FileNotFoundError:
            continue
        raw_after.load_data()

        times = raw_before.times
        mask = times >= 0
        times_ms = (times[mask] * 1000).round(1)

        for stage_label, raw_stage in [('Before ICA', raw_before), ('After ICA', raw_after)]:
            for roi_label in requested_roi:
                picks, used_channels = pick_roi(raw_stage, roi_label)
                if len(picks) == 0:
                    continue
                try:
                    d_vals = compute_cohens_d(raw_stage, events, picks, mask)
                except Exception:
                    continue
                for t, d in zip(times_ms, d_vals):
                    cohens_records.append(
                        {
                            'subject': subject,
                            'session': session,
                            'Stage': stage_label,
                            'ROI': roi_label,
                            'Channels Used': ','.join(used_channels),
                            'Time (ms)': float(t),
                            "Cohen's d": float(d),
                        }
                    )

cohens_df = pd.DataFrame(cohens_records)
if cohens_df.empty:
    raise RuntimeError("Cohen's d computation produced no results.")

cohens_output = RESULTS_DIR / 'cohens_d_summary.csv'
cohens_df.to_csv(cohens_output, index=False)
print(f"Saved Cohen's d timecourse summary → {cohens_output}")

idx = cohens_df.groupby(['subject', 'session', 'Stage', 'ROI'])['Cohen‘s d'].apply(lambda s: s.abs().idxmax())
peak_df = cohens_df.loc[idx]
peak_output = RESULTS_DIR / 'cohens_d_peaks.csv'
peak_df.to_csv(peak_output, index=False)
print(f"Saved Cohen's d peak summary → {peak_output}")
peak_df.head()


NameError: name 'summary_df' is not defined

In [19]:
import mne
from pathlib import Path
import numpy as np

requested_roi = {
    'Frontal ROI': ['FP1', 'FP2'],
    'Parietal ROI': ['P3', 'P3"', 'P4', 'P4"', 'PZ', 'PZ"', 'CZ'],
}
config_roi = config['erp_analysis']['roi']

familiar_labels = {
    'animal_target', 'nonanimal_target', 'easy_target', 'difficult_target'
}
new_labels = {
    'animal_distractor', 'nonanimal_distractor', 'easy_distractor', 'difficult_distractor'
}
valid_labels = familiar_labels | new_labels
raw_events_root = project_root / 'ds002680'
pre_root = project_root / 'data' / 'preprocessed' / 'after_rereferencing'
after_root = project_root / 'data' / 'preprocessed' / 'after_ica'

summary_session_counts = (
    summary_df.groupby(['subject', 'session']).size().rename('n_runs').to_dict()
)


def load_run_events(subject: str, session: str, run_token: str, sfreq: float) -> np.ndarray:
    events_file = (
        raw_events_root
        / subject
        / session
        / 'eeg'
        / f"{subject}_{session}_task-gonogo_{run_token}_events.tsv"
    )
    if not events_file.exists():
        raise FileNotFoundError(f"Events TSV missing: {events_file}")
    df = pd.read_csv(events_file, sep='	')
    stim_df = df[df['value'].isin(valid_labels)].copy()
    if stim_df.empty:
        return np.empty((0, 3), dtype=int)
    samples = (stim_df['onset'].values * sfreq).round().astype(int)
    codes = np.array([1 if val in familiar_labels else 2 for val in stim_df['value']], dtype=int)
    events = np.column_stack([samples, np.zeros(len(samples), dtype=int), codes])
    return events


def merge_stage(subject: str, session: str, stage_root: Path, suffix: str) -> mne.io.Raw:
    session_dir = stage_root / subject / session
    if not session_dir.exists():
        raise FileNotFoundError(f"Missing stage directory: {session_dir}")

    candidates = sorted(session_dir.glob(f"{subject}_{session}_preprocessed_ica*_cleaned.fif"))
    if suffix == 'ica_cleaned' and candidates:
        raw = mne.io.read_raw_fif(str(candidates[0]), preload=True, verbose='ERROR')
        raw.load_data()
        return raw

    pattern = f"{subject}_{session}_run-*_preprocessed_{suffix}.fif"
    run_files = sorted(session_dir.glob(pattern))
    if not run_files:
        raise FileNotFoundError(f"No run files found for {subject} {session} at {session_dir}")

    raws = []
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        raws.append(raw)

    if len(raws) == 1:
        return raws[0]
    return mne.concatenate_raws(raws, preload=True, verbose=False)


def build_session_events(subject: str, session: str, sfreq: float) -> np.ndarray:
    session_dir = pre_root / subject / session
    run_files = sorted(session_dir.glob(f"{subject}_{session}_run-*_preprocessed_after_rereferencing.fif"))
    if not run_files:
        raise FileNotFoundError(f"No after_rereferencing runs for {subject} {session}")

    events_list = []
    sample_offset = 0
    for path in run_files:
        raw = mne.io.read_raw_fif(str(path), preload=True, verbose='ERROR')
        raw.load_data()
        run_token = path.stem.split('run-')[-1].split('_')[0]
        run_events = load_run_events(subject, session, f"run-{run_token}", raw.info['sfreq'])
        if run_events.size > 0:
            run_events[:, 0] += sample_offset
            events_list.append(run_events)
        sample_offset += raw.n_times
    if not events_list:
        return np.empty((0, 3), dtype=int)
    events = np.vstack(events_list)
    order = np.argsort(events[:, 0])
    return events[order]


def pick_roi(raw: mne.io.Raw, roi_label: str):
    desired = requested_roi.get(roi_label, [])
    existing = [ch for ch in desired if ch in raw.ch_names]
    if existing:
        picks = mne.pick_channels(raw.ch_names, existing, ordered=True)
        return picks, existing
    key = 'parietal' if 'Parieto' in roi_label else 'frontal'
    fallback = [ch for ch in config_roi.get(key, []) if ch in raw.ch_names]
    if fallback:
        picks = mne.pick_channels(raw.ch_names, fallback, ordered=True)
        return picks, fallback
    return np.array([], dtype=int), []


def compute_cohens_d(raw: mne.io.Raw, events: np.ndarray, roi_picks, mask):
    event_id = {'familiar': 1, 'new': 2}
    epochs = mne.Epochs(
        raw,
        events,
        event_id=event_id,
        tmin=-0.2,
        tmax=0.6,
        baseline=(-0.2, 0.0),
        preload=True,
        detrend=None,
        event_repeated='drop',
        verbose='ERROR'
    )
    fam = epochs['familiar'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    new = epochs['new'].get_data()[:, roi_picks, :].mean(axis=1)[:, mask]
    mean_diff = fam.mean(axis=0) - new.mean(axis=0)
    std_fam = fam.std(axis=0, ddof=1)
    std_new = new.std(axis=0, ddof=1)
    n_fam = fam.shape[0]
    n_new = new.shape[0]
    pooled_sd = np.sqrt(((n_fam - 1) * std_fam**2 + (n_new - 1) * std_new**2) / max(n_fam + n_new - 2, 1))
    pooled_sd = np.where(pooled_sd == 0, np.nan, pooled_sd)
    d_vals = mean_diff / pooled_sd
    return d_vals

cohens_records = []
subjects_for_d = sorted(set(summary_df['subject']) | {manual_subject})
for subject in subjects_for_d:
    subject_sessions = summary_df.loc[summary_df['subject'] == subject, 'session'].unique()
    for session in sorted(subject_sessions):
        try:
            raw_before = merge_stage(subject, session, pre_root, 'after_rereferencing')
        except FileNotFoundError:
            continue
        raw_before.load_data()
        events = build_session_events(subject, session, raw_before.info['sfreq'])
        if events.size == 0:
            continue
        try:
            raw_after = merge_stage(subject, session, after_root, 'ica_cleaned')
        except FileNotFoundError:
            continue
        raw_after.load_data()

        times = raw_before.times
        mask = times >= 0
        times_ms = (times[mask] * 1000).round(1)

        for stage_label, raw_stage in [('Before ICA', raw_before), ('After ICA', raw_after)]:
            for roi_label in requested_roi:
                picks, used_channels = pick_roi(raw_stage, roi_label)
                if len(picks) == 0:
                    continue
                try:
                    d_vals = compute_cohens_d(raw_stage, events, picks, mask)
                except Exception:
                    continue
                for t, d in zip(times_ms, d_vals):
                    cohens_records.append(
                        {
                            'subject': subject,
                            'session': session,
                            'Stage': stage_label,
                            'ROI': roi_label,
                            'Channels Used': ','.join(used_channels),
                            'Time (ms)': float(t),
                            "Cohen's d": float(d),
                        }
                    )

cohens_df = pd.DataFrame(cohens_records)
if cohens_df.empty:
    raise RuntimeError("Cohen's d computation produced no results.")

cohens_output = RESULTS_DIR / 'cohens_d_summary.csv'
cohens_df.to_csv(cohens_output, index=False)
print(f"Saved Cohen's d timecourse summary → {cohens_output}")

idx = cohens_df.groupby(['subject', 'session', 'Stage', 'ROI'])['Cohen‘s d'].apply(lambda s: s.abs().idxmax())
peak_df = cohens_df.loc[idx]
peak_output = RESULTS_DIR / 'cohens_d_peaks.csv'
peak_df.to_csv(peak_output, index=False)
print(f"Saved Cohen's d peak summary → {peak_output}")
peak_df.head()


NameError: name 'summary_df' is not defined