# Read Audio Files

This notebook demonstrates how to:
- Load and inspect WAV audio files using librosa and pydub
- Display waveforms and spectrograms
- Normalize audio to a target peak level (dBFS)
- Play audio inline in Jupyter

## 1. Imports and Configuration

In [None]:
import os
import sys

# Add project root to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(".")))

import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from pydub import AudioSegment

from src.utils import Config

In [None]:
# Load configuration
cfg = Config()
cfg.print_paths()

## 2. List Available Audio Samples

In [None]:
# Get samples directory from config
SAMPLES_DIR = cfg.get_audio_assets_dir() / "samples"

print(f"Samples directory: {SAMPLES_DIR}")
print("\nAvailable audio files:")

wav_files = [f for f in os.listdir(SAMPLES_DIR) if f.endswith('.wav')]
wav_files.sort()
for f in wav_files:
    print(f"  {f}")

## 3. Load and Inspect a Single Audio File

In [None]:
# Select an audio file to analyze
file = SAMPLES_DIR / wav_files[0]  # Change index to select different file

if not os.path.exists(file):
    print(f'ERROR: File "{file}" was NOT found.')
else:
    # Load audio with librosa
    audio_sample, sr = librosa.load(file)
    samples = len(audio_sample)
    
    print(f"File       : {os.path.basename(file)}")
    print(f"Sample rate: {sr} Hz")
    print(f"Samples    : {samples}")
    print(f"Duration   : {1000 * samples / sr:.0f} ms")
    print()
    
    # Play audio inline
    ipd.display(ipd.Audio(str(file)))

## 4. Display Waveform

In [None]:
%matplotlib inline
plt.figure(figsize=(14, 5))
librosa.display.waveshow(audio_sample, sr=sr, color="yellow")
plt.title(f"Waveform: {os.path.basename(file)}")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.show()

## 5. Display Spectrogram

In [None]:
# Compute Short-Time Fourier Transform
X = librosa.stft(audio_sample)
Xdb = librosa.amplitude_to_db(abs(X))

plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar(format='%+2.0f dB')
plt.title(f"Spectrogram: {os.path.basename(file)}")
plt.show()

## 6. Audio Normalization (dBFS)

Normalize audio to a target peak level using pydub. This is useful for:
- Ensuring consistent volume across samples
- Data augmentation (creating samples at different volume levels)

In [None]:
def maximize_audio(audio_sample: AudioSegment, target_max_dBFS: float = -0.1) -> AudioSegment:
    """Normalize audio to a target peak level.
    
    Args:
        audio_sample: PyDub AudioSegment to normalize
        target_max_dBFS: Target peak level in dBFS (0 = maximum, negative = quieter)
    
    Returns:
        Normalized AudioSegment
    """
    gain = -audio_sample.max_dBFS + target_max_dBFS
    return audio_sample.apply_gain(gain)


# Load with pydub for dBFS analysis
audio_pydub = AudioSegment.from_file(str(file))
print(f"Original peak level: {audio_pydub.max_dBFS:.2f} dBFS")

# Normalize to -0.1 dBFS (near maximum without clipping)
audio_normalized = maximize_audio(audio_pydub, target_max_dBFS=-0.1)
print(f"Normalized peak level: {audio_normalized.max_dBFS:.2f} dBFS")

## 7. Compare Original vs Normalized Waveform

In [None]:
# Export normalized audio to temporary file for visualization
temp_dir = cfg.get_playground_dir() / "demo-read-audiofiles"
os.makedirs(temp_dir, exist_ok=True)
temp_file = temp_dir / "temp_normalized.wav"
audio_normalized.export(str(temp_file), format="wav")

# Load normalized audio for plotting
audio_normalized_np, sr_norm = librosa.load(str(temp_file))

# Plot comparison
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

axes[0].set_title(f"Original (peak: {audio_pydub.max_dBFS:.2f} dBFS)")
librosa.display.waveshow(audio_sample, sr=sr, ax=axes[0], color="yellow", alpha=0.7)
axes[0].set_ylabel("Amplitude")

axes[1].set_title(f"Normalized (peak: {audio_normalized.max_dBFS:.2f} dBFS)")
librosa.display.waveshow(audio_normalized_np, sr=sr_norm, ax=axes[1], color="cyan", alpha=0.7)
axes[1].set_ylabel("Amplitude")
axes[1].set_xlabel("Time (s)")

plt.tight_layout()
plt.show()

# Play normalized audio
print("\nNormalized audio:")
ipd.display(ipd.Audio(str(temp_file)))

# Clean up
os.remove(temp_file)