In [None]:
"""
audio_denoising_pipeline.ipynb

Audio denoising pipeline for longitudinal multimodal poultry monitoring dataset.

Applies noise reduction to raw WAV audio recordings captured across poultry barns.
Parameters were calibrated using Audacity through visual inspection of spectrograms
and auditory validation, then implemented programmatically using noisereduce and
librosa for automated batch processing.

Input folder structure expected:
    Sample_Dataset/
    └── Audio/
          └── *.wav

Output files are saved in:
    Sample_Dataset/
    └── Audio_Denoised/
          └── *_denoised.wav

Parameters:
    - Noise reduction  : 12 dB
    - Sensitivity      : 12
    - Frequency smoothing : 4 bands
    - Sample rate      : 48000 Hz
    - Bit depth        : 24-bit

Dependencies:
    pip install noisereduce librosa tqdm soundfile
"""

import os
import librosa
import soundfile as sf
import noisereduce as nr
from tqdm import tqdm

In [None]:
# =============================================================================
# PARAMETERS
# =============================================================================

SAMPLE_RATE        = 48000   # Recording sample rate (48 kHz)
NOISE_REDUCE_DB    = 12      # Noise reduction strength in dB
SENSITIVITY        = 12      # Sensitivity level for noise gate threshold
FREQ_SMOOTH_BANDS  = 4       # Number of frequency smoothing bands

INPUT_DIR  = os.path.join("Sample_Dataset", "Audio")
OUTPUT_DIR = os.path.join("Sample_Dataset", "Audio_Denoised")

In [None]:
# =============================================================================
# SINGLE FILE DENOISING
# =============================================================================

def denoise_audio(input_path, output_path):
    # Load audio file at original sample rate
    audio, sr = librosa.load(input_path, sr=SAMPLE_RATE, mono=False)

    noise_sample = audio[..., :int(sr * 0.5)]

    audio_denoised = nr.reduce_noise(
        y                = audio,
        sr               = sr,
        y_noise          = noise_sample,
        prop_decrease    = NOISE_REDUCE_DB / 20,   # convert dB to proportion
        n_fft            = 1024,
        freq_mask_smooth_hz = FREQ_SMOOTH_BANDS * 100,  # frequency smoothing bands
        time_mask_smooth_ms = SENSITIVITY * 10,          # sensitivity as time mask
        stationary       = False,
        n_jobs           = 1
    )

    # Write denoised audio to output path preserving original sample rate
    sf.write(output_path, audio_denoised.T if audio_denoised.ndim > 1 else audio_denoised, sr)

In [None]:
# =============================================================================
# BATCH PIPELINE
# =============================================================================

def run_batch_pipeline(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    audio_files = [f for f in os.listdir(input_dir) if f.lower().endswith(".wav")]

    if not audio_files:
        print(f"[INFO] No .wav files found in: {input_dir}")
        return

    print(f"\n{'='*60}")
    print(f"  Audio Denoising Pipeline")
    print(f"  Input  : {input_dir}")
    print(f"  Output : {output_dir}")
    print(f"  Files  : {len(audio_files)}")
    print(f"  Sample rate       : {SAMPLE_RATE} Hz")
    print(f"  Noise reduction   : {NOISE_REDUCE_DB} dB")
    print(f"  Sensitivity       : {SENSITIVITY}")
    print(f"  Frequency bands   : {FREQ_SMOOTH_BANDS}")
    print(f"{'='*60}\n")

    for idx, filename in enumerate(tqdm(audio_files, desc="Overall progress", unit="file"), start=1):
        input_path  = os.path.join(input_dir, filename)
        base_name   = os.path.splitext(filename)[0]
        output_path = os.path.join(output_dir, f"{base_name}_denoised.wav")

        print(f"[{idx}/{len(audio_files)}] {filename}")
        denoise_audio(input_path, output_path)
        print(f"  Saved → {base_name}_denoised.wav\n")

    print(f"{'='*60}")
    print(f"  Done. {len(audio_files)} file(s) denoised.")
    print(f"  Output saved to: {output_dir}")
    print(f"{'='*60}\n")

In [None]:
# =============================================================================
# RUN
# =============================================================================

run_batch_pipeline(INPUT_DIR, OUTPUT_DIR)