# Fast Audio Clip Dataset for PyTorch

- Reads WAV audio at ~10M samples/sec (from an SSD; no resampling)
- Splits audio files into short clips (very fast)
- Supports resampling and downmixing to mono
- Supports WAV, FLAC, OGG audio

In [1]:
%load_ext autoreload
%autoreload 2

## Import dependencies

In [2]:
from time import time
from tqdm.auto import tqdm
from pathlib import Path
from IPython.display import display, Audio

from torch.utils.data import DataLoader

from beatbrain.datasets.audio import AudioClipDataset

## Define constants

In [3]:
AUDIO_DIR = Path("../data/edm/wav/")
MAX_SEGMENT_LENGTH = 5
MIN_SEGMENT_LENGTH = 5
SAMPLE_RATE = 22050
MONO = True

## Create dataset

In [4]:
dataset = AudioClipDataset(AUDIO_DIR, max_segment_length=MAX_SEGMENT_LENGTH, min_segment_length=MIN_SEGMENT_LENGTH, sample_rate=SAMPLE_RATE, mono=MONO)

## Preview audio clip

In [5]:
audio, sr = dataset[0]
display(Audio(audio, rate=sr))

## Benchmark read performance

In [7]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, pin_memory=True)
start = time()
for audio, sr in tqdm(dataloader):
    pass
elapsed = time() - start
print(f"Total time: {elapsed:.2f}s")
print(f"{len(dataset) * SAMPLE_RATE / (elapsed * 1e6):.5f} M samples/sec")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1062.0), HTML(value='')))


Total time: 50.93s
14.70195 M samples/sec
