# Loudness Normalization Demo

This notebook demonstrates batch audio processing for loudness normalization.

**Purpose:**
- Load audio samples from a source directory
- Normalize each sample to multiple target peak levels (dBFS)
- Export augmented samples for training data preparation

This is a key data augmentation technique - by creating versions of each sample
at different volume levels, we can train models that are robust to input volume variations.

## 1. Imports and Configuration

In [None]:
import os
import sys
from pathlib import Path

# Add project root to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(".")))

import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from pydub import AudioSegment

from src.utils import Config

In [None]:
# Load configuration
cfg = Config()
cfg.print_paths()

## 2. Utility Functions

In [None]:
def find_files_by_extension(dir_path: Path, extension: str, recursive: bool = True) -> list[Path]:
    """Find all files with a given extension in a directory.
    
    Args:
        dir_path: Directory to search
        extension: File extension to match (e.g., '.wav')
        recursive: Whether to search subdirectories
    
    Returns:
        List of matching file paths
    """
    dir_path = Path(dir_path)
    if not extension.startswith('.'):
        extension = '.' + extension
    
    if recursive:
        return sorted(dir_path.rglob(f'*{extension}'))
    else:
        return sorted(dir_path.glob(f'*{extension}'))


def normalize_audio(audio_sample: AudioSegment, target_max_dBFS: float = -0.1) -> AudioSegment:
    """Normalize audio to a target peak level.
    
    Args:
        audio_sample: PyDub AudioSegment to normalize
        target_max_dBFS: Target peak level in dBFS (0 = maximum, negative = quieter)
    
    Returns:
        Normalized AudioSegment
    """
    gain = -audio_sample.max_dBFS + target_max_dBFS
    return audio_sample.apply_gain(gain)

## 3. Find Source Audio Files

In [None]:
# Source directory: audio assets samples
SOURCE_DIR = cfg.get_audio_assets_dir() / "samples"

# Find all WAV files
source_files = find_files_by_extension(SOURCE_DIR, '.wav', recursive=True)
print(f"Source directory: {SOURCE_DIR}")
print(f"Found {len(source_files)} '.wav' files.")
print("\nFirst 10 files:")
for f in source_files[:10]:
    print(f"  {f.name}")

## 4. Demonstrate Single File Normalization

Before batch processing, let's visualize what normalization does to a single file.

In [None]:
# Select a sample file for demonstration
if len(source_files) == 0:
    print("No source files found. Please add WAV files to the samples directory.")
else:
    demo_file = source_files[0]
    print(f"Demo file: {demo_file.name}")
    
    # Load with pydub
    audio_original = AudioSegment.from_file(str(demo_file))
    print(f"Original peak level: {audio_original.max_dBFS:.2f} dBFS")
    print(f"Duration: {len(audio_original)} ms")
    print(f"Channels: {audio_original.channels}")
    print(f"Sample rate: {audio_original.frame_rate} Hz")

In [None]:
# Target dBFS levels for normalization
# -0.1 dBFS = near maximum (loudest)
# -48 dBFS = very quiet
TARGET_DBFS_LEVELS = [-0.1, -6, -12, -24, -48]

print("Normalizing to different levels:")
print(f"{'Level':<12} {'Original dBFS':<15} {'Target dBFS':<15} {'Actual dBFS':<15}")
print("-" * 57)

normalized_samples = {}
for target_dBFS in TARGET_DBFS_LEVELS:
    # Re-load original for each level (normalization is cumulative otherwise)
    audio = AudioSegment.from_file(str(demo_file))
    original_dBFS = audio.max_dBFS
    
    # Normalize
    audio_normalized = normalize_audio(audio, target_dBFS)
    normalized_samples[target_dBFS] = audio_normalized
    
    level_name = f"{target_dBFS:+.1f} dBFS"
    print(f"{level_name:<12} {original_dBFS:<15.2f} {target_dBFS:<15.1f} {audio_normalized.max_dBFS:<15.2f}")

## 5. Visualize Normalized Waveforms

In [None]:
# Create temporary files for visualization
temp_dir = cfg.get_playground_dir() / "demo-loudness-normalisation" / "temp"
os.makedirs(temp_dir, exist_ok=True)

fig, axes = plt.subplots(len(TARGET_DBFS_LEVELS), 1, figsize=(14, 3 * len(TARGET_DBFS_LEVELS)))

for idx, (target_dBFS, audio_normalized) in enumerate(normalized_samples.items()):
    # Export to temp file
    temp_file = temp_dir / f"temp_{target_dBFS}.wav"
    audio_normalized.export(str(temp_file), format="wav")
    
    # Load with librosa for visualization
    y, sr = librosa.load(str(temp_file))
    
    # Plot waveform
    ax = axes[idx]
    librosa.display.waveshow(y, sr=sr, ax=ax, color="cyan", alpha=0.7)
    ax.set_title(f"Target: {target_dBFS:+.1f} dBFS (actual: {audio_normalized.max_dBFS:.2f} dBFS)")
    ax.set_ylabel("Amplitude")
    ax.set_ylim(-1, 1)  # Fixed scale for comparison
    
    # Clean up temp file
    os.remove(temp_file)

axes[-1].set_xlabel("Time (s)")
plt.tight_layout()
plt.show()

# Clean up temp directory
os.rmdir(temp_dir)

## 6. Batch Processing (Dry Run)

This cell demonstrates the batch processing logic without actually creating files.
Set `DRY_RUN = False` to actually generate the normalized files.

In [None]:
# Configuration for batch processing
DRY_RUN = True  # Set to False to actually create files

# Output directory for normalized files (in playground)
OUTPUT_DIR = cfg.get_playground_dir() / "demo-loudness-normalisation" / "output"

# Target levels for batch processing
BATCH_TARGET_LEVELS = [-0.1, -3, -6, -12, -24]

print(f"Source directory: {SOURCE_DIR}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Target dBFS levels: {BATCH_TARGET_LEVELS}")
print(f"Dry run: {DRY_RUN}")
print()
print(f"Will generate {len(source_files) * len(BATCH_TARGET_LEVELS)} files from {len(source_files)} source files.")

In [None]:
def batch_normalize(source_files: list[Path], 
                    source_dir: Path,
                    output_dir: Path, 
                    target_levels: list[float],
                    dry_run: bool = True) -> dict:
    """Batch normalize audio files to multiple target levels.
    
    Args:
        source_files: List of source audio file paths
        source_dir: Base source directory (for relative path calculation)
        output_dir: Output directory for normalized files
        target_levels: List of target dBFS levels
        dry_run: If True, don't actually create files
    
    Returns:
        Dictionary with processing statistics
    """
    stats = {
        'processed': 0,
        'skipped': 0,
        'created': 0,
        'errors': 0
    }
    
    prefix = "[DRY RUN] " if dry_run else ""
    
    for file_idx, source_file in enumerate(source_files):
        if not source_file.exists():
            print(f"{prefix}ERROR: Source file does not exist: {source_file}")
            stats['errors'] += 1
            continue
        
        # Load audio once per source file
        audio_original = None
        
        for target_dBFS in target_levels:
            # Generate output filename
            # e.g., "sample.wav" -> "sample_maxdBFS-00.1.wav"
            level_str = f"maxdBFS-{-target_dBFS:04.1f}"
            output_name = source_file.stem + f"_{level_str}" + source_file.suffix
            output_file = output_dir / output_name
            
            # Skip if output already exists
            if output_file.exists():
                stats['skipped'] += 1
                continue
            
            # Lazy load audio
            if audio_original is None:
                try:
                    audio_original = AudioSegment.from_file(str(source_file))
                except Exception as e:
                    print(f"{prefix}ERROR loading {source_file}: {e}")
                    stats['errors'] += 1
                    break
            
            if dry_run:
                if file_idx < 3:  # Only print first few in dry run
                    print(f"{prefix}Would create: {output_file.name}")
            else:
                # Create output directory if needed
                os.makedirs(output_dir, exist_ok=True)
                
                # Normalize and export
                audio_normalized = normalize_audio(audio_original, target_dBFS)
                audio_normalized.export(str(output_file), format="wav")
            
            stats['created'] += 1
        
        stats['processed'] += 1
        
        # Progress update
        if (file_idx + 1) % 50 == 0:
            print(f"{prefix}Processed {file_idx + 1}/{len(source_files)} files...")
    
    return stats


# Run batch processing
print("Starting batch normalization...")
print("=" * 50)
stats = batch_normalize(
    source_files=source_files,
    source_dir=SOURCE_DIR,
    output_dir=OUTPUT_DIR,
    target_levels=BATCH_TARGET_LEVELS,
    dry_run=DRY_RUN
)
print("=" * 50)
print(f"\nSummary:")
print(f"  Files processed: {stats['processed']}")
print(f"  Files created:   {stats['created']}")
print(f"  Files skipped:   {stats['skipped']}")
print(f"  Errors:          {stats['errors']}")

if DRY_RUN:
    print("\nThis was a dry run. Set DRY_RUN = False to actually create files.")