In [None]:
import os
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# --- 1. Define Your Paths ---
# (Update these paths to match your system)
BIDS_ROOT = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/ds005589'
IMAGE_DIR = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/images'
CAPTIONS_FILE = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/captions.txt'

# --- 2. Define Your Subject List ---
ALL_SUBJECTS = ['sub-02', 'sub-03', 'sub-05', 'sub-09', 'sub-14', 'sub-15', 
                'sub-17', 'sub-19', 'sub-20', 'sub-23', 'sub-24', 'sub-28', 'sub-29']

# --- 3. Define Image Transforms (e.g., for CLIP) ---
# (You would get the specific transforms from your model)
image_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- 4. Create the 3 Datasets (Train/Val/Test) ---
# This perfectly follows the paper's "split by session" rule.

print("Creating Training Dataset...")
train_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-01', 'ses-02', 'ses-03'], # 3 sessions for training
    image_transform=image_transforms
)

print("\nCreating Validation Dataset...")
val_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-04'], # 1 session for validation
    image_transform=image_transforms
)

print("\nCreating Test Dataset...")
test_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-05'], # 1 session for testing
    image_transform=image_transforms
)

# --- 5. Create PyTorch DataLoaders ---
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# --- 6. Test the loader ---
print("\nTesting the training loader...")
eeg_batch, image_batch, caption_batch = next(iter(train_loader))

print(f"EEG batch shape:   {eeg_batch.shape}")
print(f"Image batch shape: {image_batch.shape}")
print(f"Caption batch (first item): '{caption_batch[0]}'")

In [None]:
import os
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# --- 1. Define Your Paths ---
# (Update these paths to match your system)
BIDS_ROOT = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/ds005589'
IMAGE_DIR = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/images'
CAPTIONS_FILE = '/ocean/projects/cis250019p/gandotra/11785-gp-eeg/captions.txt'

# --- 2. Define Your Subject List ---
ALL_SUBJECTS = ['sub-02', 'sub-03', 'sub-05', 'sub-09', 'sub-14', 'sub-15', 
                'sub-17', 'sub-19', 'sub-20', 'sub-23', 'sub-24', 'sub-28', 'sub-29']

# --- 3. Define Image Transforms (e.g., for CLIP) ---
# (You would get the specific transforms from your model)
image_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- 4. Create the 3 Datasets (Train/Val/Test) ---
# This perfectly follows the paper's "split by session" rule.

print("Creating Training Dataset...")
train_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-01', 'ses-02', 'ses-03'], # 3 sessions for training
    image_transform=image_transforms
)

print("\nCreating Validation Dataset...")
val_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-04'], # 1 session for validation
    image_transform=image_transforms
)

print("\nCreating Test Dataset...")
test_dataset = EEGMultimodalDataset(
    bids_root=BIDS_ROOT,
    images_dir=IMAGE_DIR,
    captions_path=CAPTIONS_FILE,
    subject_list=ALL_SUBJECTS,
    session_list=['ses-05'], # 1 session for testing
    image_transform=image_transforms
)

# --- 5. Create PyTorch DataLoaders ---
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# --- 6. Test the loader ---
print("\nTesting the training loader...")
eeg_batch, image_batch, caption_batch = next(iter(train_loader))

print(f"EEG batch shape:   {eeg_batch.shape}")
print(f"Image batch shape: {image_batch.shape}")
print(f"Caption batch (first item): '{caption_batch[0]}'")