In [None]:
# data/loaders.py
import numpy as np
import pandas as pd
from typing import Tuple, Optional, Dict, Any
from sklearn.model_selection import train_test_split
import nibabel as nib
from pathlib import Path

class FMRIDataLoader:
    """Loader for fMRI data with multimodal stimuli"""
    
    def __init__(self, config: Dict[str, Any]):
        self.config = config
        self.data_path = Path(config['data_path'])
        
    def load_fmri_data(self, subject_id: str) -> Tuple[np.ndarray, np.ndarray]:
        """Load fMRI data for a specific subject"""
        fmri_file = self.data_path / f"sub-{subject_id}" / "func" / f"sub-{subject_id}_task-stimuli_bold.nii.gz"
        
        if not fmri_file.exists():
            raise FileNotFoundError(f"fMRI data not found: {fmri_file}")
            
        img = nib.load(str(fmri_file))
        data = img.get_fdata()
        
        # Reshape from 4D (x, y, z, time) to 2D (time, voxels)
        n_timepoints = data.shape[-1]
        data_2d = data.reshape(-1, n_timepoints).T
        
        return data_2d, img.affine
    
    def load_stimulus_labels(self, subject_id: str) -> np.ndarray:
        """Load stimulus labels"""
        label_file = self.data_path / f"sub-{subject_id}" / f"sub-{subject_id}_task-stimuli_events.tsv"
        
        if not label_file.exists():
            raise FileNotFoundError(f"Label file not found: {label_file}")
            
        events = pd.read_csv(label_file, sep='\t')
        return events['stimulus_type'].values
    
    def create_train_test_split(self, 
                              X: np.ndarray, 
                              y: np.ndarray, 
                              test_size: float = 0.2,
                              random_state: int = 42) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """Create train/test split"""
        return train_test_split(X, y, test_size=test_size, 
                              random_state=random_state, stratify=y)