In [None]:
import mne
import os
import gc
import psutil

import utils.config as config
from utils.helpers import perform_ica_cleaning
from utils.helpers import save_epochs
from utils.config import DATASETS

%matplotlib qt

### Defining constants and preparing stuff ###

DATASET = DATASETS["Braboszcz2017"]

bids_root = os.path.join(DATASET.path, "study1/raw/eeg/")
path_epochs = os.path.join(config.EPOCHS_PATH, "internal_task/braboszcz2017/")

# EEG settings
subjects = DATASET.subjects
sessions = DATASET.sessions

process = psutil.Process()


### Defining functions ###

def load_task_data(bids_root, subject_id, tasks):
    """
    Load specific task EEG data for a subject from a BIDS-like dataset.
    
    Parameters:
    - bids_root: str, path to the root of the dataset.
    - subject_id: str, subject identifier (e.g., '088').
    - tasks: list of str, the task names to load (e.g., ['med2', 'think2']).
    
    Returns:
    - raw_dict: dict, containing MNE Raw objects for each task.
    """
    # Define the subject's EEG folder
    subject_path = os.path.join(bids_root, f"sub-{subject_id}", "eeg")
    
    if not os.path.exists(subject_path):
        raise FileNotFoundError(f"Subject folder not found: {subject_path}")
    
    raw_dict = {}  # Dictionary to hold Raw objects for each task
    
    for task in tasks:
        # Locate the BDF file for the specific task
        bdf_file = os.path.join(subject_path, f"sub-{subject_id}_task-{task}_eeg.bdf")
        if not os.path.exists(bdf_file):
            print(f"WARNING: BDF file not found for task '{task}'. Skipping...")
            continue
        
        # Load the BDF file
        raw = mne.io.read_raw_bdf(bdf_file, preload=True)
        print(f"Loaded BDF file for task '{task}': {bdf_file}")
        
        # Optionally, load metadata from JSON/TSV files
        # Metadata file paths
        json_file = os.path.join(subject_path, f"sub-{subject_id}_task-{task}_eeg.json")
        tsv_file = os.path.join(subject_path, f"sub-{subject_id}_task-{task}_channels.tsv")
        
        metadata = {}
        if os.path.exists(json_file):
            with open(json_file, 'r') as f:
                metadata['json'] = f.read()
            print(f"Loaded metadata JSON for task '{task}'")
        
        if os.path.exists(tsv_file):
            metadata['tsv'] = tsv_file  # Store TSV file path for later use if needed
            print(f"Loaded metadata TSV for task '{task}'")
        
        # Add the loaded data and metadata to the dictionary
        raw_dict[task] = {
            'raw': raw,
            'metadata': metadata
        }
    
    if not raw_dict:
        raise ValueError(f"No data was loaded for subject {subject_id} with tasks {tasks}.")
    
    return raw_dict

In [None]:
# Load in the epochs and do ICA
for subject in subjects:
    file_path = os.path.join(path_epochs, f'sub-{subject}_epo.fif')
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"No epochs found for subject {subject} at {file_path}")

    print(f"Loading {file_path}")
    epochs = mne.read_epochs(file_path, preload=True)

    # Skip if epochs are not loaded
    if epochs is None:
        continue

    # Extract epochs for each session
    epochs_med2 = epochs["med2"]
    epochs_think2 = epochs["think2"]

    # Perform ICA on med2 epochs
    cleaned_epochs_med2, ica_med2 = perform_ica_cleaning(epochs_med2, subject)

    # Perform ICA on think2 epochs
    cleaned_epochs_think2, ica_think2 = perform_ica_cleaning(epochs_think2, subject)

    # concatenate the epochs
    epochs_concat = mne.concatenate_epochs([cleaned_epochs_med2, cleaned_epochs_think2])

    # Save the epochs
    save_epochs(epochs_concat, path_epochs, subject, suffix="ica_cleaned")
    
    del epochs
    gc.collect()
    print(f"Memory after cleanup: {process.memory_info().rss / 1e6:.2f} MB")