# 🎙️ Speech Dataset EDA
This notebook visualizes audio waveforms, spectrograms, and MFCCs for speech-to-text preprocessing.

In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt

# Load sample audio
audio_path = '../data/train/audio/sample.wav'  # Replace with actual file
y, sr = librosa.load(audio_path, sr=16000)
print(f'Sample rate: {sr}, Duration: {len(y)/sr:.2f} seconds')

In [2]:
# Plot waveform
plt.figure(figsize=(14,4))
librosa.display.waveshow(y, sr=sr)
plt.title('Audio Waveform')
plt.show()

In [3]:
# Plot spectrogram
D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
plt.figure(figsize=(10,4))
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()

In [4]:
# Plot MFCCs
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
plt.figure(figsize=(10,4))
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
plt.colorbar()
plt.title('MFCCs')
plt.show()