In [4]:

import torch
from torch.utils.data import Dataset
import numpy as np


class SimpleEEGDataset(Dataset):
    """Simple dataset to load pre-processed numpy arrays"""
    def __init__(self, features_path='zuco/features.npy', labels_path='zuco/labels.npy', connections_path='zuco/connections.npy'):
        # Load the numpy arrays
        self.features = np.load(features_path)  # Shape: (50, 12, 250)
        self.labels = np.load(labels_path)      # Shape: (50,)
        self.connections = np.load(connections_path)  # Shape: (50, 12, 12)
        
        print(f"Loaded dataset:")
        print(f"Features shape: {self.features.shape}")
        print(f"Labels shape: {self.labels.shape}")
        print(f"Connections shape: {self.connections.shape}")
        
        # Verify shapes match
        assert self.features.shape[0] == self.labels.shape[0] == self.connections.shape[0], \
            "Number of samples must match across features, labels, and connections"
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, idx):
        # Return feature matrix, adjacency matrix, and label
        feature_matrix = torch.as_tensor(self.features[idx], dtype=torch.float32)
        adjacency_matrix = torch.as_tensor(self.connections[idx], dtype=torch.float32)
        label = torch.as_tensor(self.labels[idx], dtype=torch.long)
        return feature_matrix, adjacency_matrix, label


In [6]:
dataset = SimpleEEGDataset(
    features_path= 'zuco/features.npy',
    labels_path='zuco/labels.npy',
    connections_path= 'zuco/connections.npy'
    )

Loaded dataset:
Features shape: (100, 12, 250)
Labels shape: (100,)
Connections shape: (100, 12, 12)


In [8]:
print(dataset.labels.shape)

(100,)


In [None]:
import h5py
from zuco_loader import load_matlab_string


with h5py.File('zuco/resultsYAC_TSR.mat', 'r') as f:

    # 2. Get sentence data
    sentence_data = f['sentenceData']
    contentData = sentence_data['content']
    
    # 3. Loop through ANY sentence you want
    for idx in range(len(contentData)):  # This goes through ALL sentences
        obj_reference_content = contentData[idx][0]  # Get reference to sentence idx
        sentence_string = load_matlab_string(f[obj_reference_content])  # Get actual sentence
        
        # Now you have the sentence!
        print(f"Sentence {idx}: {sentence_string}")


Sentence 0: <HDF5 dataset "b": shape (121, 1), type "<u2">
Sentence 1: <HDF5 dataset "c": shape (43, 1), type "<u2">
