In [None]:
import json
from pathlib import Path
import torch
from torch.utils.data import Dataset, DataLoader
import soundfile as sf

class AudioEffectsDataset(Dataset):
    def __init__(self, metadata_json, transform=None):
        with open(metadata_json, 'r') as f:
            metadata = json.load(f)
        self.samples = []
        for orig_file, data in metadata.items():
            for chunk_dict in data["Chunk Paths"]:
                raw_path = list(chunk_dict.values())[0]
                self.samples.append({
                    "audio_path": raw_path,
                    "effect": "raw",
                    "original_file": data["File"]
                })
            for chunk_effects in data["Effect File Outputs"]:
                for eff_file in chunk_effects:
                    parts = Path(eff_file).stem.split('_')
                    effect_label = parts[-2] if len(parts) >= 2 else "unknown"
                    self.samples.append({
                        "audio_path": eff_file,
                        "effect": effect_label,
                        "original_file": data["File"]
                    })
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        audio_path = sample["audio_path"]
        audio, sr = sf.read(audio_path)
        sample["audio"] = torch.tensor(audio, dtype=torch.float32)
        sample["sr"] = sr
        if self.transform:
            sample = self.transform(sample)
        return sample

dataset = AudioEffectsDataset("labels.json")
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [103]:
for batch in dataloader:
    print(batch)
    break

RuntimeError: stack expects each tensor to be equal size, but got [341909] at entry 0 and [332488] at entry 1