## **Multi EEG-ICA Preproccesing Framework**

In [None]:
# FUNCTIONS

# Libraries
import os
import mne
import json
import scipy
import traceback
import numpy as np
from tqdm import tqdm
from scipy.stats import kurtosis
from typing import List, Dict, Tuple
from mne_icalabel import label_components


# Force non-interactive backend
import matplotlib
matplotlib.use('Agg')  # ← This prevents plots from showing
import matplotlib.pyplot as plt

mne.set_log_level('WARNING')


# =============================================================================
# 1. DEFINE EventTrimmer CLASS
# =============================================================================
class EventTrimmer:
    """
    Trim EEG data based on trigger channels, preserve stim channels, align events,
    and optionally save cropping metadata to JSON.
    """
    def __init__(
        self,
        raw_filtered: mne.io.Raw,
        output_path: str,
        subject: str,
        trigger_pairs: tuple = ('1a', '6a'),
        pre_time: float = 2.0,
        post_time: float = 2.0,
        stim_channels: List[str] = None,
        events_base: str = None,
        project_id: str = "Sbj01",
        plot: bool = False,
        save: bool = True,
        save_json: bool = True,
        align_events: bool = True
    ):
        self.raw = raw_filtered
        self.output_path = output_path
        self.subject = subject
        self.first_trig, self.last_trig = trigger_pairs
        self.pre_time = pre_time
        self.post_time = post_time
        self.stim_channels = stim_channels or ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']
        self.events_base = events_base
        self.project_id = project_id
        self.plot = plot
        self.save = save
        self.save_json = save_json
        self.align_events = align_events

        if self.events_base is None:
            self.events_base = os.path.join(os.path.dirname(self.output_path), "Events")

        self.raw_cropped = None
        self.stim_raw = None
        self.onsets = {}
        self.tmin_trim = None
        self.tmax_trim = None

    def detect_onsets(self) -> Dict[str, np.ndarray]:
        sfreq = self.raw.info['sfreq']
        picks = [self.first_trig, self.last_trig]
        picks = [ch for ch in picks if ch in self.raw.ch_names]
        if len(picks) < 2:
            raise ValueError(f"Trigger channels missing: {picks}")

        data, times = self.raw[picks, :]
        sig_1a = data[0] if len(data) > 0 else np.zeros_like(times)
        sig_6a = data[1] if len(data) > 1 else np.zeros_like(times)

        def find_onsets(signal):
            diff = np.diff((signal > 0.5).astype(int))
            return np.where(diff == 1)[0] + 1

        onsets_1a = find_onsets(sig_1a) / sfreq
        onsets_6a = find_onsets(sig_6a) / sfreq
        self.onsets = {self.first_trig: onsets_1a, self.last_trig: onsets_6a}

        self._log(f"✅ {self.first_trig}: {len(onsets_1a)} onsets | First: {onsets_1a[0]:.3f}s")
        self._log(f"✅ {self.last_trig}: {len(onsets_6a)} onsets | Last: {onsets_6a[-1]:.3f}s")
        return self.onsets

    def _log(self, msg: str):
        """Use parent's log if available, otherwise print"""
        if hasattr(self, 'output_path') and hasattr(self, 'subject'):
            log_file = os.path.join(self.output_path, f"{self.subject}_preproc_log.txt")
            with open(log_file, 'a') as f:
                f.write(msg + '\n')
        print(msg)

    def trim_data(self) -> mne.io.Raw:
        onsets = self.onsets
        sfreq = self.raw.info['sfreq']
        tmax_orig = self.raw.times[-1]

        self.tmin_trim = max(0.0, onsets[self.first_trig][0] - self.pre_time)
        self.tmax_trim = min(tmax_orig, onsets[self.last_trig][-1] + self.post_time)

        self._log(f"\n✂️ Trimming data to: {self.tmin_trim:.3f}s → {self.tmax_trim:.3f}s "
                  f"({self.tmax_trim - self.tmin_trim:.2f}s)")

        self.raw_cropped = self.raw.copy().crop(tmin=self.tmin_trim, tmax=self.tmax_trim)

        if self.plot:
            self._plot_triggers_and_crop()

        if self.save_json:
            self._save_cropping_json()

        return self.raw_cropped

    def _plot_triggers_and_crop(self):
        raw = self.raw
        times = raw.times
        sig_1a = raw.get_data(picks=[self.first_trig])[0] if self.first_trig in raw.ch_names else np.zeros_like(times)
        sig_6a = raw.get_data(picks=[self.last_trig])[0] if self.last_trig in raw.ch_names else np.zeros_like(times)

        plt.figure(figsize=(16, 6))
        plt.plot(times, sig_6a, color='lavender', linewidth=1, label=self.last_trig)
        plt.vlines(self.onsets[self.first_trig], ymin=1.0, ymax=1.4, color='indigo', alpha=0.7, label=f"{self.first_trig} onsets")
        plt.vlines(self.onsets[self.last_trig], ymin=-0.2, ymax=0.2, color='darkviolet', alpha=0.7, label=f"{self.last_trig} onsets")
        plt.axvspan(self.tmin_trim, self.tmax_trim, color='azure', alpha=0.3, label="Cropped Region")
        plt.axvline(self.onsets[self.first_trig][0], color='purple', linestyle='--', linewidth=2, label=f"First '{self.first_trig}'")
        plt.axvline(self.onsets[self.last_trig][-1], color='indigo', linestyle='--', linewidth=2, label=f"Last '{self.last_trig}'")

        plt.title(f"Trigger Channels '{self.first_trig}' and '{self.last_trig}' — Trim Region", fontsize=14)
        plt.xlabel("Time (s)")
        plt.ylabel("Digital Signal")
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.close()

    def _save_cropping_json(self):
        json_path = os.path.join(self.output_path, f"{self.subject}_cropping_info.json")
        crop_info = {
            "subject": self.subject,
            "tmin_trim": float(self.tmin_trim),
            "tmax_trim": float(self.tmax_trim),
            "duration_seconds": float(self.tmax_trim - self.tmin_trim),
            "first_trigger": self.first_trig,
            "last_trigger": self.last_trig,
            "pre_time_seconds": self.pre_time,
            "post_time_seconds": self.post_time,
            "onset_first_trigger_first": float(self.onsets[self.first_trig][0]),
            "onset_last_trigger_last": float(self.onsets[self.last_trig][-1]),
            "n_onsets_first_trigger": int(len(self.onsets[self.first_trig])),
            "n_onsets_last_trigger": int(len(self.onsets[self.last_trig])),
            "sampling_frequency": float(self.raw.info['sfreq']),
            "stim_channels_preserved": [ch for ch in self.stim_channels if ch in self.raw_cropped.ch_names],
            "cropped_n_samples": int(self.raw_cropped.n_times),
            "meas_date": self.raw_cropped.info.get('meas_date', None)
        }

        with open(json_path, 'w') as f:
            json.dump(crop_info, f, indent=2, default=str)
        self._log(f"📄 Cropping metadata saved to: {json_path}")

    def preserve_stim_channels(self) -> mne.io.RawArray:
        ch_names = [ch for ch in self.stim_channels if ch in self.raw_cropped.ch_names]
        if not ch_names:
            raise ValueError("No stim channels found after cropping.")
        data = self.raw_cropped.get_data(picks=ch_names)
        info = mne.create_info(ch_names, sfreq=self.raw_cropped.info['sfreq'], ch_types='stim')
        self.stim_raw = mne.io.RawArray(data, info)
        self.stim_raw.set_meas_date(self.raw_cropped.info['meas_date'])
        self._log(f"✅ Preserved stim channels: {ch_names}")
        return self.stim_raw

    def align_external_events(self):
        if not self.align_events or not self.save:
            self._log("⏭️ Skipping external event alignment (disabled).")
            return

        events_fif = os.path.join(self.events_base, f"{self.project_id}_events_mne_binary-eve.fif")
        events_json = os.path.join(self.events_base, f"{self.project_id}_event_id_binary.json")

        # Fixed: Gracefully handle missing files instead of crashing
        for path in [events_fif, events_json]:
            if not os.path.exists(path):
                self._log(f"⏭️ Skipping event alignment - missing file: {path}")
                return

        events_full = mne.read_events(events_fif)
        with open(events_json, 'r') as f:
            event_id = json.load(f)

        sfreq = self.raw.info['sfreq']
        event_times_sec = events_full[:, 0] / sfreq
        in_window = (event_times_sec >= self.tmin_trim) & (event_times_sec <= self.tmax_trim)
        events_cropped = events_full[in_window].copy()
        events_cropped[:, 0] -= int(self.tmin_trim * sfreq)

        event_id_clean = {k.replace('-', '').replace(' ', ''): v for k, v in event_id.items()}

        out_events = os.path.join(self.output_path, f"{self.subject}_events_mne_binary-eve.fif")
        out_id = os.path.join(self.output_path, f"{self.subject}_event_id_binary.json")

        mne.write_events(out_events, events_cropped, overwrite=True)
        with open(out_id, 'w') as f:
            json.dump(event_id_clean, f, indent=2)

        self._log("\n✅ EVENT ALIGNMENT COMPLETE")
        self._log(f"   Original events: {len(events_full)}")
        self._log(f"   After trimming:  {len(events_cropped)}")
        self._log(f"   Duration: {self.tmin_trim:.2f}s → {self.tmax_trim:.2f}s")
        self._log(f"   Events saved to: {out_events}")
        self._log(f"   Event ID saved to: {out_id}")
        self._log(f"   Event IDs: {event_id_clean}")

    def run(self) -> tuple[mne.io.Raw, mne.io.RawArray]:
        self._log("🔍 Starting event-based trimming...")
        self.detect_onsets()
        self.trim_data()
        self.preserve_stim_channels()
        self.align_external_events()
        self._log("✅ Trimming & event alignment complete.")
        return self.raw_cropped, self.stim_raw


# =============================================================================
# 2. DEFINE EEGICAProcessor CLASS
# =============================================================================
class EEGICAProcessor:
    def __init__(
        self,
        subject: str,
        session: str,
        task: str,
        mff_path: str,
        gpsc_file: str,
        events_base: str,
        project_id: str,
        base_output_path: str,
        trigger_pairs: Tuple[str, str] = ('1a', '6a'),
        pre_time: float = 2.0,
        post_time: float = 2.0,
        plot: bool = False,
        random_state: int = 99,
        log_to_file: bool = True
    ):
        self.subject = subject
        self.session = session
        self.task = task
        self.mff_path = mff_path
        self.gpsc_file = gpsc_file
        self.events_base = events_base
        self.project_id = project_id
        self.trigger_pairs = trigger_pairs
        self.pre_time = pre_time
        self.post_time = post_time
        self.plot = plot
        self.random_state = random_state
        self.log_to_file = log_to_file

        # Set up output path and subfolders
        self.output_path = os.path.join(base_output_path, subject, f"{task}_{session}")
        os.makedirs(self.output_path, exist_ok=True)
        os.makedirs(os.path.join(self.output_path, "plots"), exist_ok=True)

        self.log_file = os.path.join(self.output_path, f"{subject}_preproc_log.txt")
        if self.log_to_file:
            self._log(f"Initialized EEGICAProcessor for {subject}/{session}")

        self.raw = None
        self.raw_filtered = None
        self.raw_cropped = None
        self.stim_raw = None
        self.cleaned_data = None
        self.ica_obj = None



    def _log(self, msg: str, detail: str = "normal"):
        """
        Log messages:
        - 'normal': to both console and file
        - 'debug':  only to file
        """
        # Always write to file
        with open(self.log_file, 'a') as f:
            f.write(msg + '\n')
        
        # Only print to console if 'normal'
        if detail == "normal":
            print(msg)

    def load_and_montage(self):
        self._log("Loading raw data from .mff...")
        self.raw = mne.io.read_raw_egi(self.mff_path, preload=True)
        channel_map = {str(i): f'E{i}' for i in range(1, 281)}
        channel_map['REF CZ'] = 'Cz'
        self.raw.rename_channels(channel_map)
        self._log("Parsing and applying montage from .gpsc...")
        channels = self._parse_gpsc(self.gpsc_file)
        if not channels:
            raise ValueError("No valid channels in .gpsc file")
        gpsc_array = np.array([ch[1:4] for ch in channels])
        mean_pos = np.mean(gpsc_array, axis=0)
        self._log(f"Original mean position (mm): {mean_pos}")
        channels_normalized = [(ch[0], ch[1] - mean_pos[0], ch[2] - mean_pos[1], ch[3] - mean_pos[2]) for ch in channels]
        ch_pos = {ch[0]: np.array(ch[1:4]) / 1000.0 for ch in channels_normalized}
        fid_nz = ch_pos.get('FidNz')
        fid_t9 = ch_pos.get('FidT9')
        fid_t10 = ch_pos.get('FidT10')
        montage = mne.channels.make_dig_montage(ch_pos=ch_pos, nasion=fid_nz, lpa=fid_t9, rpa=fid_t10, coord_frame='head')
        self.raw.set_montage(montage, on_missing='warn')
        self._log("Montage applied.")

    def _parse_gpsc(self, filepath: str):
        channels = []
        with open(filepath, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 4:
                    continue
                name = parts[0]
                try:
                    x, y, z = map(float, parts[1:4])
                    channels.append((name, x, y, z))
                except ValueError:
                    continue
        return channels

    def filter_data(self):
        self._log("Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...")
        self.raw_filtered = self.raw.copy().filter(
            l_freq=1.0, h_freq=100.0, picks=['eeg'], method='fir', phase='zero',
            fir_window='hamming', fir_design='firwin', n_jobs=1
        )
        nyquist = self.raw_filtered.info["sfreq"] / 2
        notch_freqs = np.arange(60, nyquist, 60)
        self.raw_filtered.notch_filter(
            freqs=notch_freqs, picks='eeg', method='spectrum_fit',
            filter_length='auto', mt_bandwidth=1.0, p_value=0.05
        )
        if np.std(self.raw_filtered.get_data(picks=['Cz'])[0]) < 1e-6:
            self.raw_filtered.info['bads'].append('Cz')
            self._log("Marked Cz as bad (flat signal).")

    def trim_events(self):
        # ✅ Improved: Include TT*, *a, and specific event codes 'c', 'o', 'x'
        stim_chs = [
            ch for ch in self.raw_filtered.ch_names
            if 'TT' in ch or ch.endswith('a') or ch in ['c', 'o', 'x']
        ]
        self._log(f"Stim channels: {stim_chs}")

        trimmer = EventTrimmer(
            raw_filtered=self.raw_filtered,
            output_path=self.output_path,
            subject=self.subject,
            trigger_pairs=self.trigger_pairs,
            pre_time=self.pre_time,
            post_time=self.post_time,
            stim_channels=stim_chs,
            events_base=self.events_base,
            project_id=self.project_id,
            plot=self.plot,
            save_json=True,
            align_events=True
        )
        self.raw_cropped, self.stim_raw = trimmer.run()

    def detect_bad_channels(self):
        self._log("Detecting bad channels using MAD...")
        raw_eeg = self.raw_cropped.copy().pick("eeg")
        data_uV = raw_eeg.get_data() * 1e6
        mad_scores = [scipy.stats.median_abs_deviation(row, scale=1) for row in data_uV]
        median_mad = np.nanmedian(mad_scores)
        mad_mad = scipy.stats.median_abs_deviation(mad_scores, scale=1)
        z_scores = 0.6745 * (np.array(mad_scores) - median_mad) / mad_mad
        bads = [raw_eeg.ch_names[i] for i in np.where(np.abs(z_scores) >= 9.5)[0]]
        self.raw_cropped.info['bads'].extend(bads)
        self._log(f"Bad channels (MAD): {bads}")

    def run_automatic_ica_cleaning(self, eeg_data, n_components=0.99, random_state=99, cmap='plasma'):
        """Full ICA cleaning pipeline with artifact detection."""
        results = {}

        # === 1. Data Preparation ===
        raw = eeg_data.copy()
        vveog_anode, vveog_cathode = 'E31', 'E19'
        heog_anode, heog_cathode = 'E41', 'E274'
        ecg_anode, ecg_cathode = 'E227', 'E229'
        emg_chs = ['E280', 'E52']
        ft_chs = ['E31', 'E19']  # Used for frontal low-frequency artifacts

        raw.pick_types(eeg=True, eog=True, ecg=True, emg=True)
        raw.set_channel_types({'E280': 'emg', 'E52': 'emg'})
        raw_filtered = raw.copy()

        # === 2. Add Bipolar Reference Channels ===
        if all(ch in raw_filtered.ch_names for ch in [vveog_anode, vveog_cathode]):
            raw_filtered = mne.set_bipolar_reference(
                raw_filtered, anode=vveog_anode, cathode=vveog_cathode,
                ch_name='vVEOG', drop_refs=False
            ).set_channel_types({'vVEOG': 'eog'})
            self._log("Created vVEOG (E31-E19) for blink detection", detail="debug")

        if all(ch in raw_filtered.ch_names for ch in [heog_anode, heog_cathode]):
            raw_filtered = mne.set_bipolar_reference(
                raw_filtered, anode=heog_anode, cathode=heog_cathode,
                ch_name='BLINK_H', drop_refs=False
            ).set_channel_types({'BLINK_H': 'eog'})
            self._log("Created BLINK_H (E41-E274) for horizontal eye movement", detail="debug")

        if ecg_anode in raw_filtered.ch_names and ecg_cathode in raw_filtered.ch_names:
            raw_filtered = mne.set_bipolar_reference(
                raw_filtered, anode=ecg_anode, cathode=ecg_cathode,
                ch_name='ECG_BIO', drop_refs=False
            ).set_channel_types({'ECG_BIO': 'ecg'})
            self._log("Created ECG_BIO (E227-E229) for cardiac artifact detection", detail="debug")

        # === 3. Fit ICA ===
        self._log("Fitting ICA with Extended Infomax...", detail="debug")
        ica = mne.preprocessing.ICA(
            n_components=n_components,
            random_state=random_state,
            method='picard',
            fit_params=dict(ortho=False, extended=True),
            max_iter='auto'
        )
        try:
            ica.fit(raw_filtered)
            self._log(f"ICA fitted with {ica.n_components_} components", detail="debug")
        except Exception as e:
            self._log(f"❌ ICA fitting failed: {e}", detail="normal")
            raise

        # === 4. Artifact Detection ===
        ica.exclude = []
        blink_idx = []
        h_saccade_idx = []
        ecg_idx = []
        muscle_idx = []
        frontal_lf_idx = []  # Renamed from "drift"
        line_noise_idx = []

        # --- Blink Detection ---
        if 'vVEOG' in raw_filtered.ch_names:
            try:
                idx, _ = ica.find_bads_eog(raw_filtered, ch_name='vVEOG', measure='zscore', threshold=4.0)
                blink_idx = [int(i) for i in idx]
                ica.exclude.extend(blink_idx)
                self._log(f"Blink components detected: {blink_idx}", detail="debug")
            except Exception as e:
                self._log(f"Blink detection failed: {e}", detail="debug")

        # --- Horizontal Eye Movement ---
        if 'BLINK_H' in raw_filtered.ch_names:
            try:
                idx, _ = ica.find_bads_eog(raw_filtered, ch_name='BLINK_H', measure='zscore', threshold=4.0)
                h_saccade_idx = [int(i) for i in idx if i not in ica.exclude]
                ica.exclude.extend(h_saccade_idx)
                self._log(f"Horizontal eye components: {h_saccade_idx}", detail="debug")
            except Exception as e:
                self._log(f"Horizontal eye detection failed: {e}", detail="debug")

        # --- ECG Detection ---
        if 'ECG_BIO' in raw_filtered.ch_names:
            try:
                idx, _ = ica.find_bads_ecg(raw_filtered, ch_name='ECG_BIO', method='correlation', measure='zscore', threshold=4.5)
                ecg_idx = [int(i) for i in idx]
                ica.exclude.extend(ecg_idx)
                self._log(f"ECG components detected: {ecg_idx}", detail="debug")
            except Exception as e:
                self._log(f"ECG detection failed: {e}", detail="debug")

        # --- Muscle Artifacts ---
        self._log("Detecting muscle artifacts (30–100 Hz)...", detail="debug")
        for ch_name in emg_chs:
            if ch_name not in raw_filtered.ch_names:
                continue
            try:
                idx, _ = ica.find_bads_eog(
                    raw_filtered, ch_name=ch_name,
                    measure='zscore', l_freq=30, h_freq=100,
                    threshold=3.5
                )
                idx = [int(i) for i in idx]
                new_idx = [i for i in idx if i not in ica.exclude]
                muscle_idx.extend(new_idx)
                if new_idx:
                    self._log(f"Muscle components linked to {ch_name}: {new_idx}", detail="debug")
            except Exception as e:
                self._log(f"EMG detection failed for {ch_name}: {e}", detail="debug")
        muscle_idx = list(set(muscle_idx))
        ica.exclude.extend(muscle_idx)

        # --- Low-Frequency Frontal Artifacts (replaces "drift") ---
        self._log("Detecting low-frequency frontal artifacts (1–10 Hz) using E31/E19...", detail="debug")
        for ch_name in ft_chs:
            if ch_name not in raw_filtered.ch_names:
                continue
            try:
                idx, _ = ica.find_bads_eog(
                    raw_filtered, ch_name=ch_name,
                    measure='zscore', l_freq=1.0, h_freq=10.0, threshold=5.0
                )
                idx = [int(i) for i in idx]
                new_idx = [i for i in idx if i not in ica.exclude]
                frontal_lf_idx.extend(new_idx)
                if new_idx:
                    self._log(f"Frontal LF components linked to {ch_name}: {new_idx}", detail="debug")
            except Exception as e:
                self._log(f"Frontal LF detection failed for {ch_name}: {e}", detail="debug")
        frontal_lf_idx = list(set(frontal_lf_idx))
        ica.exclude.extend(frontal_lf_idx)

        # --- Line Noise (Improved: Sharp Peak Detection) ---
        self._log("Detecting line noise (58–62 Hz) with sharp peak criteria...", detail="debug")
        try:
            sfreq = raw.info['sfreq']
            src_data = ica.get_sources(raw_filtered).get_data()
            for i in range(ica.n_components_):
                if i in ica.exclude:
                    continue
                psd, freqs = mne.time_frequency.psd_array_welch(
                    src_data[i], sfreq=sfreq, fmin=1, fmax=100, verbose=False
                )
                line_band = (freqs >= 58) & (freqs <= 62)
                ref_band = (freqs >= 1) & (freqs <= 100)
                flank_band = ((freqs >= 50) & (freqs < 58)) | ((freqs > 62) & (freqs <= 70))

                if psd[ref_band].mean() == 0:
                    continue

                line_ratio = psd[line_band].mean() / psd[ref_band].mean()
                peak_in_line = psd[line_band].max()
                nearby_avg = psd[flank_band].mean()
                if nearby_avg == 0:
                    continue
                peak_prominence = peak_in_line / nearby_avg

                if line_ratio > 0.8 and peak_prominence > 5.0:
                    line_noise_idx.append(i)
                    self._log(f"  C{i} → line noise (ratio: {line_ratio:.2f}, prominence: {peak_prominence:.1f})", detail="debug")
            ica.exclude.extend(line_noise_idx)
            self._log(f"Line noise components: {sorted(line_noise_idx)}", detail="debug")
        except Exception as e:
            self._log(f"Line noise detection failed: {e}", detail="debug")

        # --- ICLabel: High-Confidence Artifact Detection ---
        self._log("Running mne-icalabel...", detail="debug")
        results['icalabel_excluded'] = []
        results['icalabel_confidences'] = {}
        results['icalabel_labels'] = {}  # Now stores labels
        try:
            labels_dict = label_components(raw_filtered, ica, method="iclabel")
            labels = labels_dict["labels"]
            probas = labels_dict["y_pred_proba"]
            CONFIDENCE_THRESHOLDS = {
                'eye blink': 0.90,
                'heart beat': 0.95,
                'muscle artifact': 0.95,
                'line noise': 0.95,
                'channel noise': 0.95
            }

            detected = []
            for i, (label, prob) in enumerate(zip(labels, probas)):
                lbl = label.lower().strip()
                if lbl in CONFIDENCE_THRESHOLDS and prob > CONFIDENCE_THRESHOLDS[lbl]:
                    detected.append((i, lbl, prob))
            new_excluded = [i for i, _, _ in detected if i not in ica.exclude]
            ica.exclude.extend(new_excluded)
            results['icalabel_excluded'] = sorted(new_excluded)
            results['icalabel_confidences'] = {i: probas[i] for i in new_excluded}
            results['icalabel_labels'] = {i: labels[i] for i in new_excluded}

            # Console: show type and confidence
            if new_excluded:
                iclabel_strs = [
                    f"C{i}({labels[i]}: {probas[i].max():.2f})"
                    for i in sorted(new_excluded)
                ]
                self._log(f"ICLabel added {len(new_excluded)}: {', '.join(iclabel_strs)}", detail="normal")
            else:
                self._log("ICLabel: No components added", detail="normal")

        except Exception as e:
            self._log(f"❌ ICLabel failed: {e}", detail="normal")

        # --- Signal Metrics ---
        try:
            src_data = ica.get_sources(raw_filtered).get_data()
            extreme_excluded = []
            for i in range(ica.n_components_):
                if i in ica.exclude:
                    continue
                x = src_data[i]
                var = np.var(x)
                kurt = kurtosis(x)
                ptp = np.ptp(x)
                is_dead_flat = var < 1e-14
                is_pure_spikes = kurt > 10000
                is_saturated = ptp > 100000
                if is_dead_flat or is_pure_spikes or is_saturated:
                    ica.exclude.append(i)
                    extreme_excluded.append(i)
                    self._log(f"Excluded component {i} via signal metrics", detail="debug")
            self._log(f"Signal metrics excluded: {extreme_excluded}", detail="debug")
        except Exception as e:
            self._log(f"Signal metrics failed: {e}", detail="debug")

        # === Apply ICA ===
        self._log(f"Applying ICA, excluding {len(ica.exclude)} components", detail="debug")
        cleaned_data = ica.apply(eeg_data.copy())

        # === FINAL PROFESSIONAL SUMMARY ===
        self._log("\n" + "━" * 60)
        self._log("🧩 ICA ARTIFACT REJECTION SUMMARY")
        self._log("━" * 60)
        self._log(f"{'Total components':<18} {ica.n_components_}")
        self._log(f"{'Excluded':<18} {len(ica.exclude)}")
        self._log("")
        self._log(f"{'Blink':<18} {sorted(blink_idx)}")
        self._log(f"{'Horizontal eye':<18} {sorted(h_saccade_idx)}")
        self._log(f"{'ECG':<18} {sorted(ecg_idx)}")
        self._log(f"{'Muscle':<18} {sorted(muscle_idx)}")
        self._log(f"{'Frontal LF':<18} {sorted(frontal_lf_idx)}")  # Updated name
        self._log(f"{'Line noise':<18} {sorted(line_noise_idx)}")
        if results.get('icalabel_excluded'):
            conf_str = ", ".join([
                f"C{i}({results['icalabel_labels'][i]}: {results['icalabel_confidences'][i].max():.2f})"
                for i in sorted(results['icalabel_excluded'])
            ])
            self._log(f"{'ICLabel':<18} {conf_str}")
        else:
            self._log(f"{'ICLabel':<18} []")
        self._log(f"{'Signal metrics':<18} {sorted(extreme_excluded) if 'extreme_excluded' in locals() else []}")
        self._log("")
        self._log(f"🔧 Final exclude list: {sorted(ica.exclude)}")
        self._log("━" * 60)

        # === Save ICA Plots ===

        if ica.exclude:
            try:
                fig_components = ica.plot_components(cmap=cmap, show=False)
                # ✅ Handle single figure (when only 1 page)
                if not isinstance(fig_components, list):
                    fig_components = [fig_components]

                plots_saved = []
                for i, fig in enumerate(fig_components):
                    comp_fig_path = os.path.join(self.output_path, "plots", f"{self.subject}_ica_components_page{i}.png")
                    # ✅ fig is already a Figure — no need for .get_figure()
                    fig.savefig(comp_fig_path, dpi=150, bbox_inches='tight')
                    plt.close(fig)  # ← Close the figure directly
                    plots_saved.append(comp_fig_path)
                self._log(f"🖼️ Saved {len(plots_saved)} ICA component page(s)")
            except Exception as e:
                self._log(f"⚠️ Failed to save ICA plots: {e}")

        # === Return ICA Object ===
        ica_object = {
            'ica_model': ica,
            'original_data': eeg_data,
            'filtered_data': raw_filtered,
            'auto_excluded': ica.exclude.copy(),
            'detection_results': {
                'blink_indices': sorted(blink_idx),
                'horizontal_eye_movement_indices': sorted(h_saccade_idx),
                'ecg_indices': sorted(ecg_idx),
                'muscle_indices': sorted(muscle_idx),
                'frontal_lf_indices': sorted(frontal_lf_idx),  # Updated key
                'line_noise_components': sorted(line_noise_idx),
                'icalabel_excluded': sorted(results.get('icalabel_excluded', [])),
                'signal_metrics_excluded': sorted(extreme_excluded) if 'extreme_excluded' in locals() else []
            },
            'parameters': {
                'n_components': n_components,
                'random_state': random_state,
                'vertical_eog': f"{vveog_anode}-{vveog_cathode} (vVEOG)",
                'horizontal_eog': f"{heog_anode}-{heog_cathode} (BLINK_H)",
                'ecg_derivation': f"{ecg_anode}-{ecg_cathode}",
                'emg_channels': emg_chs
            }
        }
        return cleaned_data, ica_object

    def run_ica_cleaning(self):
        self._log("Running automatic ICA cleaning...")
        cleaned, ica_obj = self.run_automatic_ica_cleaning(
            self.raw_cropped,
            n_components=0.99,
            random_state=self.random_state,
            cmap='viridis'
        )
        self.cleaned_data = cleaned
        self.ica_obj = ica_obj

    def plot_psd_comparison(self):
        """Plot and save PSD comparison before vs after ICA."""
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)
        
        self.raw_cropped.compute_psd(fmax=120, picks='eeg', exclude='bads').plot(axes=ax1, show=False)
        ax1.set_title('Before ICA', fontsize=12)
        ax1.set_xlabel('')
        
        self.cleaned_data.compute_psd(fmax=120, picks='eeg', exclude='bads').plot(axes=ax2, show=False)
        ax2.set_title('After ICA', fontsize=12)
        
        fig.suptitle('Power Spectral Density: Before vs. After ICA', fontsize=16)
        
        # Avoid tight_layout warning
        plt.subplots_adjust(top=0.94, hspace=0.3)

        fig_path = os.path.join(self.output_path, "plots", f"{self.subject}_psd_comparison.png")
        fig.savefig(fig_path, dpi=150, bbox_inches='tight')
        self._log(f"📊 PSD comparison saved to: {fig_path}")
        plt.close(fig)

    def save_data(self):
        sub_id = self.subject.split('-')[1]
        fname = f"sub-{sub_id}_ses-{self.session}_task-{self.task}_eeg_ica_cleaned_raw.fif"
        full_path = os.path.join(self.output_path, fname)
        self.cleaned_data.save(full_path, overwrite=True)
        self._log(f"Cleaned data saved to: {full_path}")

    def run(self):
        self._log("🔄 Starting preprocessing...")
        
        # Step 1: Load and apply montage
        self.load_and_montage()
        self._log("✅ Loaded and montaged")
        
        # Step 2: Filter data (1–100 Hz + notch)
        self.filter_data()
        self._log("✅ Filtered data")
        
        # Step 3: Trim to task period and align events
        self.trim_events()
        self._log("✅ Trimmed events")
        
        # Step 4: Detect bad channels using MAD
        self.detect_bad_channels()
        self._log("✅ Detected bad channels")
        
        # 🔁 Step 5: Apply CAR *before* interpolation (CORRECT ORDER)
        self._log("🔧 Applying Common Average Reference (CAR) using only good channels...")
        self.raw_cropped = self.raw_cropped.set_eeg_reference('average', verbose=False)
        self._log("✅ Referenced (CAR applied)")

        # 🛠️ Step 6: Interpolate bad channels *after* referencing
        bads = self.raw_cropped.info['bads']
        if len(bads) > 0:
            self._log(f"🔧 Interpolating bad channels after CAR: {bads}")
            self.raw_cropped.interpolate_bads(reset_bads=True)
            self._log("✅ Bad channels interpolated")
        else:
            self._log("✅ No bad channels to interpolate")


        # Step 7: Run ICA cleaning (Picard recommended)
        self._log("🔧 Running ICA cleaning...")
        self.run_ica_cleaning()
        self._log("✅ ICA cleaning done")
        
        # Step 8: Save PSD comparison for QC
        self.plot_psd_comparison()
        self._log("✅ PSD comparison saved")
        
        # Step 9: Save cleaned data
        self.save_data()
        self._log("✅ Cleaned data saved")
        
        # Final message
        self._log("✅ FULL PREPROCESSING COMPLETE\n")



In [14]:
# REST HAND/FOOT BATCH PROCESSING LOOP (EYES CLOSED REST PERIOD)
# =============================================================================
if __name__ == "__main__":
    import re
    import os

    # 📁 PATHS
    base_data = '/home/jaizor/jaizor/xtra/data/PD_Rest/ON'  # ← REST data folder
    base_output = '/home/jaizor/jaizor/xtra/derivatives/eeg'
    gpsc_file = '/home/jaizor/jaizor/xtra/data/ghw280_from_egig.gpsc'
    events_base = '/home/jaizor/jaizor/xtra/data/Events'

    # 👥 AUTO-DETECT SUBJECTS FROM FILENAMES
    def get_subjects_from_data_folder(data_dir: str, pattern: str = r"PD_(\d{3})_rest_hand_foot_DBSON\.mff") -> List[str]:
        subjects = []
        pattern = re.compile(pattern)
        for fname in os.listdir(data_dir):
            match = pattern.match(fname)
            if match:
                subj_num = int(match.group(1))
                subjects.append(f"sub-{subj_num:02d}")
        return sorted(subjects, key=lambda x: int(x.split('-')[1]))

    subjects = get_subjects_from_data_folder(base_data)
    print(f"📁 Found {len(subjects)} subjects: {subjects}")

    # ⚙️ TASK CONFIG
    session = 'DBSON'
    task = 'rest_hand_foot'  # ← Task name for output folder
    project_id_prefix = 'Sbj'
    plot_qc = False
    trigger_pairs = ('c', '3a')  # ✅ EYES CLOSED (c) → FOOT TASK START (3a) = REST PERIOD
    error_log_path = os.path.join(base_output, "processing_errors.txt")

    success_count = 0
    failure_count = 0

    # 🔄 PROCESS EACH SUBJECT
    for sub in tqdm(subjects, desc="EEG Rest Preprocessing (Eyes Closed)", unit="subject"):
        try:
            subj_num = int(sub[4:])  # e.g., 'sub-01' → 1
            mff_filename = f"PD_{subj_num:03d}_{task}_{session}.mff"
            mff_path = os.path.join(base_data, mff_filename)

            if not os.path.exists(mff_path):
                raise FileNotFoundError(f"MFF file not found: {mff_path}")

            processor = EEGICAProcessor(
                subject=sub,
                session=session,
                task=task,
                mff_path=mff_path,
                gpsc_file=gpsc_file,
                events_base=events_base,
                project_id=f"{project_id_prefix}{subj_num}",
                base_output_path=base_output,
                trigger_pairs=trigger_pairs,  # ✅ ('c', '3a') for rest period
                plot=plot_qc,
                random_state=99,
                log_to_file=True
            )
            processor.run()
            tqdm.write(f"✅ SUCCESS: {sub}")
            success_count += 1
        except Exception as e:
            error_msg = f"{sub}: {str(e)}\n{traceback.format_exc()}\n{'-'*60}\n"
            tqdm.write(f"❌ FAILED: {sub} — {e}")
            with open(error_log_path, 'a') as f:
                f.write(error_msg)
            failure_count += 1

        # Update progress bar postfix
        if hasattr(tqdm, '_instances'):
            for instance in list(tqdm._instances):
                instance.set_postfix(SUCCESS=success_count, FAILED=failure_count)

    print(f"\n✅ Processing Complete: {success_count} Success, {failure_count} Failures")

📁 Found 10 subjects: ['sub-01', 'sub-02', 'sub-03', 'sub-05', 'sub-06', 'sub-07', 'sub-09', 'sub-10', 'sub-12', 'sub-14']


EEG Rest Preprocessing (Eyes Closed):   0%|          | 0/10 [00:00<?, ?subject/s]

Initialized EEGICAProcessor for sub-01/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 392.660s
✅ 3a: 2 onsets | Last: 967.362s

✂️ Trimming data to: 391.660s → 968.362s (576.70s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-01/rest_hand_foot_DBSON/sub-01_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
⏭️ Skipping event alignment - missing file: /home/jaizor/jaizor/xtra/data/Events/Sbj1_events_mne_binary-eve.fif
✅ Trimming & event alignment complete.
✅ Trimmed events
Detecting bad channels using MAD...
Bad

EEG Rest Preprocessing (Eyes Closed):  10%|█         | 1/10 [04:49<43:21, 289.02s/subject, FAILED=0, SUCCESS=1]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-01/rest_hand_foot_DBSON/sub-01_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-01
Initialized EEGICAProcessor for sub-02/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'c', 'TT255', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 2 onsets | First: 247.960s
✅ 3a: 2 onsets | Last: 840.742s

✂️ Trimming data to: 246.960s → 841.742s (594.78s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-02/rest_hand_foot_DBSON/sub-02_cropping_info.json
✅ Preserved stim channels: ['TT140', 'c', 'TT255', 'x', 'o', '1

EEG Rest Preprocessing (Eyes Closed):  20%|██        | 2/10 [12:19<51:11, 383.92s/subject, FAILED=0, SUCCESS=2]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-02/rest_hand_foot_DBSON/sub-02_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-02
Initialized EEGICAProcessor for sub-03/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 86.416s
✅ 3a: 2 onsets | Last: 567.016s

✂️ Trimming data to: 85.416s → 568.016s (482.60s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-03/rest_hand_foot_DBSON/sub-03_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a'

EEG Rest Preprocessing (Eyes Closed):  30%|███       | 3/10 [13:00<26:31, 227.41s/subject, FAILED=0, SUCCESS=3]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-03/rest_hand_foot_DBSON/sub-03_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-03
Initialized EEGICAProcessor for sub-05/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['c', 'TT255', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 31.184s
✅ 3a: 2 onsets | Last: 522.484s

✂️ Trimming data to: 30.184s → 523.484s (493.30s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-05/rest_hand_foot_DBSON/sub-05_cropping_info.json
✅ Preserved stim channels: ['c', 'TT255', 'x', 'o', '1a', '2a', '3a']
⏭️ S

EEG Rest Preprocessing (Eyes Closed):  40%|████      | 4/10 [15:39<20:02, 200.39s/subject, FAILED=0, SUCCESS=4]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-05/rest_hand_foot_DBSON/sub-05_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-05
Initialized EEGICAProcessor for sub-06/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 60.532s
✅ 3a: 2 onsets | Last: 549.526s

✂️ Trimming data to: 59.532s → 550.526s (490.99s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-06/rest_hand_foot_DBSON/sub-06_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a'

EEG Rest Preprocessing (Eyes Closed):  50%|█████     | 5/10 [18:13<15:18, 183.63s/subject, FAILED=0, SUCCESS=5]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-06/rest_hand_foot_DBSON/sub-06_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-06
Initialized EEGICAProcessor for sub-07/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 43.656s
✅ 3a: 2 onsets | Last: 564.648s

✂️ Trimming data to: 42.656s → 565.648s (522.99s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-07/rest_hand_foot_DBSON/sub-07_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a'

EEG Rest Preprocessing (Eyes Closed):  60%|██████    | 6/10 [18:48<08:52, 133.23s/subject, FAILED=0, SUCCESS=6]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-07/rest_hand_foot_DBSON/sub-07_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-07
Initialized EEGICAProcessor for sub-09/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 82.966s
✅ 3a: 2 onsets | Last: 548.012s

✂️ Trimming data to: 81.966s → 549.012s (467.05s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-09/rest_hand_foot_DBSON/sub-09_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a'

EEG Rest Preprocessing (Eyes Closed):  70%|███████   | 7/10 [20:25<06:03, 121.18s/subject, FAILED=0, SUCCESS=7]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-09/rest_hand_foot_DBSON/sub-09_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-09
Initialized EEGICAProcessor for sub-10/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 42.374s
✅ 3a: 2 onsets | Last: 566.236s

✂️ Trimming data to: 41.374s → 567.236s (525.86s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-10/rest_hand_foot_DBSON/sub-10_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', 'c', 'x', 'o', '1a'

EEG Rest Preprocessing (Eyes Closed):  80%|████████  | 8/10 [23:32<04:44, 142.14s/subject, FAILED=0, SUCCESS=8]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-10/rest_hand_foot_DBSON/sub-10_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-10
Initialized EEGICAProcessor for sub-12/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['c', 'TT255', 'x', 'o', '1a', '2a', '3a']
🔍 Starting event-based trimming...
✅ c: 1 onsets | First: 172.508s
✅ 3a: 2 onsets | Last: 657.616s

✂️ Trimming data to: 171.508s → 658.616s (487.11s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-12/rest_hand_foot_DBSON/sub-12_cropping_info.json
✅ Preserved stim channels: ['c', 'TT255', 'x', 'o', '1a', '2a', '3a']

✅

EEG Rest Preprocessing (Eyes Closed):  90%|█████████ | 9/10 [25:37<02:16, 136.99s/subject, FAILED=0, SUCCESS=9]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-12/rest_hand_foot_DBSON/sub-12_ses-DBSON_task-rest_hand_foot_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-12
Initialized EEGICAProcessor for sub-14/DBSON
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...


EEG Rest Preprocessing (Eyes Closed): 100%|██████████| 10/10 [26:05<00:00, 156.53s/subject, FAILED=1, SUCCESS=9]

Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: []
🔍 Starting event-based trimming...
❌ FAILED: sub-14 — Trigger channels missing: []

✅ Processing Complete: 9 Success, 1 Failures





In [10]:
# BIMA BATCH PROCESSING LOOP
# =============================================================================
if __name__ == "__main__":
    base_data = '/home/jaizor/jaizor/xtra/data/PD_Bima'
    base_output = '/home/jaizor/jaizor/xtra/derivatives/eeg'
    gpsc_file = '/home/jaizor/jaizor/xtra/data/ghw280_from_egig.gpsc'
    events_base = '/home/jaizor/jaizor/xtra/data/Events'


    subjects = ['sub-01', 'sub-02', 'sub-03', 'sub-05', 'sub-06', 'sub-07', 'sub-08', 'sub-09', 'sub-10', 'sub-11', 'sub-12', 'sub-14']

    session = 'DBSOFF'
    task = 'bima'
    project_id_prefix = 'Sbj'
    plot_qc = False  
    trigger_pairs = ('1a', '6a')
    error_log_path = os.path.join(base_output, "processing_errors.txt")

    success_count = 0
    failure_count = 0

    for sub in tqdm(subjects, desc="ParallelGroup EEG Processing", unit="subject"):
        try:
            subj_num = sub[4:]
            mff_path = f'{base_data}/PD_{subj_num.zfill(3)}_bima_DBSOFF.mff'

            if not os.path.exists(mff_path):
                raise FileNotFoundError(f"MFF file not found: {mff_path}")

            processor = EEGICAProcessor(
                subject=sub,
                session=session,
                task=task,
                mff_path=mff_path,
                gpsc_file=gpsc_file,
                events_base=events_base,
                project_id=f"{project_id_prefix}{subj_num}",
                base_output_path=base_output,
                trigger_pairs=trigger_pairs,
                plot=plot_qc,
                random_state=99,
                log_to_file=True
            )
            processor.run()
            tqdm.write(f"✅ SUCCESS: {sub}")
            success_count += 1
        except Exception as e:
            error_msg = f"{sub}: {str(e)}\n{traceback.format_exc()}\n{'-'*60}\n"
            tqdm.write(f"❌ FAILED: {sub} — {e}")
            with open(error_log_path, 'a') as f:
                f.write(error_msg)
            failure_count += 1

        if hasattr(tqdm, '_instances'):
            for instance in list(tqdm._instances):
                instance.set_postfix(SUCCESS=success_count, FAILED=failure_count)

    print(f"\n✅ Processing Complete: {success_count} Success, {failure_count} Failures")


ParallelGroup EEG Processing:   0%|          | 0/12 [00:00<?, ?subject/s]

Initialized EEGICAProcessor for sub-01/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a', '7a', '8a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 252.418s
✅ 6a: 200 onsets | Last: 679.362s

✂️ Trimming data to: 251.418s → 680.362s (428.94s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-01/bima_DBSOFF/sub-01_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a', '7a', '8a']

✅ EVENT ALIGNMENT COMPLETE
   Original events: 585
   After trimming:  578
   Duration: 251.42s → 680.36s
   Events saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-01/bima_

ParallelGroup EEG Processing:   8%|▊         | 1/12 [04:03<44:34, 243.10s/subject, FAILED=0, SUCCESS=1]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-01/bima_DBSOFF/sub-01_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-01
Initialized EEGICAProcessor for sub-02/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a', '7a', '8a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 117.290s
✅ 6a: 200 onsets | Last: 610.220s

✂️ Trimming data to: 116.290s → 611.220s (494.93s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-02/bima_DBSOFF/sub-02_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a',

ParallelGroup EEG Processing:  17%|█▋        | 2/12 [10:23<53:56, 323.67s/subject, FAILED=0, SUCCESS=2]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-02/bima_DBSOFF/sub-02_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-02
Initialized EEGICAProcessor for sub-03/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a', '7a', '8a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 116.236s
✅ 6a: 200 onsets | Last: 618.380s

✂️ Trimming data to: 115.236s → 619.380s (504.14s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-03/bima_DBSOFF/sub-03_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a',

ParallelGroup EEG Processing:  25%|██▌       | 3/12 [11:01<29:02, 193.56s/subject, FAILED=0, SUCCESS=3]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-03/bima_DBSOFF/sub-03_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-03
Initialized EEGICAProcessor for sub-05/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a', 'TT185']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 15.882s
✅ 6a: 200 onsets | Last: 538.914s

✂️ Trimming data to: 14.882s → 539.914s (525.03s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-05/bima_DBSOFF/sub-05_cropping_info.json
✅ Preserved stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a', 'TT185']



ParallelGroup EEG Processing:  33%|███▎      | 4/12 [13:49<24:25, 183.14s/subject, FAILED=0, SUCCESS=4]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-05/bima_DBSOFF/sub-05_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-05
Initialized EEGICAProcessor for sub-06/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 46.094s
✅ 6a: 200 onsets | Last: 569.138s

✂️ Trimming data to: 45.094s → 570.138s (525.04s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-06/bima_DBSOFF/sub-06_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']



ParallelGroup EEG Processing:  42%|████▏     | 5/12 [17:17<22:26, 192.43s/subject, FAILED=0, SUCCESS=5]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-06/bima_DBSOFF/sub-06_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-06
Initialized EEGICAProcessor for sub-07/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 35.862s
✅ 6a: 200 onsets | Last: 558.890s

✂️ Trimming data to: 34.862s → 559.890s (525.03s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-07/bima_DBSOFF/sub-07_cropping_info.json
✅ Preserved stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']

✅ EVENT ALIGNMENT 

ParallelGroup EEG Processing:  50%|█████     | 6/12 [21:14<20:44, 207.39s/subject, FAILED=0, SUCCESS=6]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-07/bima_DBSOFF/sub-07_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-07
Initialized EEGICAProcessor for sub-08/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 176.160s
✅ 6a: 200 onsets | Last: 699.120s

✂️ Trimming data to: 175.160s → 700.120s (524.96s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-08/bima_DBSOFF/sub-08_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']

ParallelGroup EEG Processing:  58%|█████▊    | 7/12 [23:02<14:35, 175.06s/subject, FAILED=0, SUCCESS=7]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-08/bima_DBSOFF/sub-08_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-08
Initialized EEGICAProcessor for sub-09/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 45.390s
✅ 6a: 200 onsets | Last: 568.400s

✂️ Trimming data to: 44.390s → 569.400s (525.01s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-09/bima_DBSOFF/sub-09_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']



ParallelGroup EEG Processing:  67%|██████▋   | 8/12 [25:03<10:31, 157.78s/subject, FAILED=0, SUCCESS=8]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-09/bima_DBSOFF/sub-09_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-09
Initialized EEGICAProcessor for sub-10/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 62.100s
✅ 6a: 200 onsets | Last: 585.146s

✂️ Trimming data to: 61.100s → 586.146s (525.05s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-10/bima_DBSOFF/sub-10_cropping_info.json
✅ Preserved stim channels: ['TT140', 'TT255', '1a', '2a', '3a', '4a', '5a', '6a']



ParallelGroup EEG Processing:  75%|███████▌  | 9/12 [27:42<07:54, 158.23s/subject, FAILED=0, SUCCESS=9]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-10/bima_DBSOFF/sub-10_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-10
Initialized EEGICAProcessor for sub-11/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 32.116s
✅ 6a: 200 onsets | Last: 554.828s

✂️ Trimming data to: 31.116s → 555.828s (524.71s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-11/bima_DBSOFF/sub-11_cropping_info.json
✅ Preserved stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']

✅ EVENT ALIGNMENT 

ParallelGroup EEG Processing:  83%|████████▎ | 10/12 [28:38<04:13, 126.65s/subject, FAILED=0, SUCCESS=10]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-11/bima_DBSOFF/sub-11_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-11
Initialized EEGICAProcessor for sub-12/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 180 onsets | First: 67.734s
✅ 6a: 160 onsets | Last: 485.044s

✂️ Trimming data to: 66.734s → 486.044s (419.31s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-12/bima_DBSOFF/sub-12_cropping_info.json
✅ Preserved stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']

✅ EVENT ALIGNMENT 

ParallelGroup EEG Processing:  92%|█████████▏| 11/12 [30:14<01:57, 117.26s/subject, FAILED=0, SUCCESS=11]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-12/bima_DBSOFF/sub-12_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-12
Initialized EEGICAProcessor for sub-14/DBSOFF
🔄 Starting preprocessing...
Loading raw data from .mff...
Parsing and applying montage from .gpsc...
Original mean position (mm): [100.83802817  94.83802817 166.92605634]
Montage applied.
✅ Loaded and montaged
Applying bandpass (1–100 Hz) and notch (60 Hz harmonics) filters...
Marked Cz as bad (flat signal).
✅ Filtered data
Stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']
🔍 Starting event-based trimming...
✅ 1a: 200 onsets | First: 124.126s
✅ 6a: 200 onsets | Last: 646.836s

✂️ Trimming data to: 123.126s → 647.836s (524.71s)
📄 Cropping metadata saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-14/bima_DBSOFF/sub-14_cropping_info.json
✅ Preserved stim channels: ['1a', 'TT255', '2a', '3a', '4a', '5a', '6a']

✅ EVENT ALIGNMEN

ParallelGroup EEG Processing: 100%|██████████| 12/12 [34:11<00:00, 170.92s/subject, FAILED=0, SUCCESS=12]

Cleaned data saved to: /home/jaizor/jaizor/xtra/derivatives/eeg/sub-14/bima_DBSOFF/sub-14_ses-DBSOFF_task-bima_eeg_ica_cleaned_raw.fif
✅ Cleaned data saved
✅ FULL PREPROCESSING COMPLETE

✅ SUCCESS: sub-14

✅ Processing Complete: 12 Success, 0 Failures



