# SHD & SSC Dataset Loader from Zenodo

Download neuromorphic datasets (SHD-Norm and SSC-Norm) from Zenodo and create data loaders for training.

In [2]:
import requests
from tqdm import tqdm
import os

def download_dataset(url, filename):
    """Download dataset from URL with progress bar."""
    if os.path.exists(filename):
        print(f" {filename} already exists, skipping download.")
        return
    
    print(f" Downloading {filename}...")
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024

    with open(filename, "wb") as file, tqdm(
        desc=f"Downloading {filename}",
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(block_size):
            file.write(data)
            bar.update(len(data))
    
    print(f" Downloaded: {filename}")

# Dataset URLs from Zenodo
datasets = {
    "shd_norm.mat": "https://zenodo.org/record/16153275/files/shd_norm.mat",
    "ssc_norm.h5": "https://zenodo.org/record/16153275/files/ssc_norm.h5"
}

# Download both datasets
print("=== Downloading Neuromorphic Datasets from Zenodo ===")
for filename, url in datasets.items():
    download_dataset(url, filename)

print("\n All datasets downloaded successfully!")

=== Downloading Neuromorphic Datasets from Zenodo ===
 Downloading shd_norm.mat...


Downloading shd_norm.mat: 100%|██████████| 117M/117M [00:23<00:00, 5.31MB/s] 



 Downloaded: shd_norm.mat
 Downloading ssc_norm.h5...


Downloading ssc_norm.h5: 100%|██████████| 33.4M/33.4M [00:07<00:00, 4.89MB/s]

 Downloaded: ssc_norm.h5

 All datasets downloaded successfully!





## Setup and Imports

Import necessary libraries for loading and preprocessing neuromorphic datasets.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from scipy import io
import h5py
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Dataset Loading and DataLoader Creation

Load both SHD-Norm and SSC-Norm datasets and create PyTorch DataLoaders for easy batch processing.

In [4]:
# === Dataset class for spike data ===
class SpikeDataset(Dataset):
    def __init__(self, X, Y, dataset_name="unknown"):
        self.X = X
        self.Y = Y
        self.dataset_name = dataset_name
        
    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.Y[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# === Load SHD-Norm dataset (.mat format) ===
def load_shd_norm():
    """Load SHD-Norm dataset from .mat file."""
    if not os.path.exists("shd_norm.mat"):
        print(" shd_norm.mat not found. Please run the download cell first.")
        return None, None
    
    print(" Loading SHD-Norm dataset...")
    data = io.loadmat("shd_norm.mat")
    X = data["X"]  # shape: (samples, neurons, time_steps)
    Y = data["Y"].ravel()
    
    print(f"    Dataset shape: X={X.shape}, Y={Y.shape}")
    print(f"    Number of classes: {len(np.unique(Y))}")
    print(f"    Class distribution: {np.bincount(Y)}")
    
    return X, Y

# === Load SSC-Norm dataset (.h5 format) ===
def load_ssc_norm():
    """Load SSC-Norm dataset from .h5 file."""
    if not os.path.exists("ssc_norm.h5"):
        print(" ssc_norm.h5 not found. Please run the download cell first.")
        return None, None
    
    print(" Loading SSC-Norm dataset...")
    with h5py.File("ssc_norm.h5", 'r') as f:
        X = f["X"][:].astype(np.float32)  # shape: (samples, neurons, time_steps)
        Y = f["Y"][:].ravel()
    
    print(f"    Dataset shape: X={X.shape}, Y={Y.shape}")
    print(f"    Number of classes: {len(np.unique(Y))}")
    print(f"    Class distribution: {np.bincount(Y)}")
    
    return X, Y

# === Create DataLoaders ===
def create_dataloaders(X, Y, dataset_name, batch_size=128, train_ratio=0.6, val_ratio=0.15, test_ratio=0.15):
    """
    Create train, validation, and test DataLoaders from dataset.
    
    Args:
        X: Input spike data
        Y: Labels
        dataset_name: Name of the dataset for logging
        batch_size: Batch size for DataLoaders
        train_ratio: Proportion of data for training
        val_ratio: Proportion of data for validation
        test_ratio: Proportion of data for testing
    
    Returns:
        train_loader, val_loader, test_loader
    """
    if X is None or Y is None:
        return None, None, None
    
    N = len(Y)
    
    # Define splits
    train_end = int(N * train_ratio)
    val_end = int(N * (train_ratio + val_ratio))
    test_end = int(N * (train_ratio + val_ratio + test_ratio))
    
    # Split indices
    train_indices = np.arange(0, train_end)
    val_indices = np.arange(train_end, val_end)
    test_indices = np.arange(val_end, test_end)
    
    # Shuffle training indices
    np.random.shuffle(train_indices)
    
    # Create datasets
    train_dataset = SpikeDataset(X[train_indices], Y[train_indices], f"{dataset_name}_train")
    val_dataset = SpikeDataset(X[val_indices], Y[val_indices], f"{dataset_name}_val")
    test_dataset = SpikeDataset(X[test_indices], Y[test_indices], f"{dataset_name}_test")
    
    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
    
    print(f"     {dataset_name} DataLoaders created:")
    print(f"      Train: {len(train_dataset)} samples ({len(train_loader)} batches)")
    print(f"      Val:   {len(val_dataset)} samples ({len(val_loader)} batches)")
    print(f"      Test:  {len(test_dataset)} samples ({len(test_loader)} batches)")
    
    return train_loader, val_loader, test_loader

# === Load both datasets ===
print("=== Loading Neuromorphic Datasets ===")

# Load SHD-Norm
X_shd, Y_shd = load_shd_norm()
shd_train_loader, shd_val_loader, shd_test_loader = create_dataloaders(
    X_shd, Y_shd, "SHD-Norm", batch_size=128
)

print()

# Load SSC-Norm  
X_ssc, Y_ssc = load_ssc_norm()
ssc_train_loader, ssc_val_loader, ssc_test_loader = create_dataloaders(
    X_ssc, Y_ssc, "SSC-Norm", batch_size=128
)

print("\n All datasets loaded and DataLoaders created successfully!")

=== Loading Neuromorphic Datasets ===
 Loading SHD-Norm dataset...
    Dataset shape: X=(5460, 224, 100), Y=(5460,)
    Number of classes: 20
    Class distribution: [273 273 273 273 273 273 273 273 273 273 273 273 273 273 273 273 273 273
 273 273]
     SHD-Norm DataLoaders created:
      Train: 3276 samples (25 batches)
      Val:   819 samples (7 batches)
      Test:  819 samples (7 batches)

 Loading SSC-Norm dataset...
     SHD-Norm DataLoaders created:
      Train: 3276 samples (25 batches)
      Val:   819 samples (7 batches)
      Test:  819 samples (7 batches)

 Loading SSC-Norm dataset...
    Dataset shape: X=(40390, 285, 100), Y=(40390,)
    Number of classes: 35
    Class distribution: [1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154
 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154 1154
 1154 1154 1154 1154 1154 1154 1154]
    Dataset shape: X=(40390, 285, 100), Y=(40390,)
    Number of classes: 35
    Class distribution: [1154 1154 115