## 🎧 Using Built-in Audio Datasets in PyTorch (torchaudio)
This notebook demonstrates loading and inspecting the SPEECHCOMMANDS dataset using `torchaudio.datasets`.

In [None]:
import torch
import torchaudio
from torchaudio.datasets import SPEECHCOMMANDS
import os
import random
import matplotlib.pyplot as plt
import torchaudio.transforms as T

### 1. Load the SPEECHCOMMANDS Dataset

In [None]:
# Define the root path for dataset download
root = './data'

# Load training subset
dataset = SPEECHCOMMANDS(root=root, download=True, subset='training')

print("Number of samples:", len(dataset))

# Inspect one example
waveform, sample_rate, label, *_ = random.choice(dataset)
print(f"Label: {label}, Sample rate: {sample_rate}, Waveform shape: {waveform.shape}")

### 2. Visualize the Waveform

In [None]:
plt.figure(figsize=(10, 3))
plt.plot(waveform.t().numpy())
plt.title(f"Waveform of '{label}'")
plt.xlabel("Time")
plt.ylabel("Amplitude")
plt.grid(True)
plt.show()

### 3. Convert to Mel Spectrogram

In [None]:
mel_spectrogram = T.MelSpectrogram(sample_rate=sample_rate, n_mels=64)
mel_spec = mel_spectrogram(waveform)

# Plot mel spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(mel_spec.log2()[0,:,:].numpy(), cmap='viridis', aspect='auto')
plt.title(f"Mel Spectrogram of '{label}'")
plt.xlabel("Frame")
plt.ylabel("Mel Bin")
plt.colorbar()
plt.show()