# Biodenoising Denoise Demo (ZIP input)

This notebook lets you:
- Upload a ZIP archive of audio files (e.g., WAV/FLAC)
- Extract into a timestamped folder
- Run denoising using `denoise.py` on the extracted folder

Works in Jupyter/Colab. If using Colab, enable GPU for speed.


In [None]:
# Optional: install biodenoising (marius/fixes) + runtime deps on Colab
from __future__ import annotations
import sys
import subprocess
import importlib

IN_COLAB = "google.colab" in sys.modules
BRANCH_URL = "git+https://github.com/earthspecies/biodenoising@marius/fixes"

# Base deps
if IN_COLAB:
    try:
        import torch  # type: ignore
        import torchaudio  # type: ignore
        import soundfile  # type: ignore
        import yaml  # type: ignore
        import pandas  # type: ignore
    except Exception:
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", "torch", "torchaudio", "--index-url", "https://download.pytorch.org/whl/cu121"], check=True)
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", "soundfile", "pyyaml", "pandas", "numpy", "scipy", "tqdm", "librosa"], check=True)

# Ensure biodenoising is installed/updated from branch
try:
    import biodenoising  # type: ignore
    print("Found biodenoising. Ensuring branch version...")
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "--upgrade", "--no-cache-dir", BRANCH_URL], check=True)
    biodenoising = importlib.reload(biodenoising)  # type: ignore
    print("Ensured biodenoising from marius/fixes branch.")
except Exception:
    print("Installing biodenoising from GitHub branch marius/fixes...")
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "--no-cache-dir", BRANCH_URL], check=True)
    import biodenoising  # type: ignore
    print("Installed biodenoising from marius/fixes branch.")


In [None]:
# Setup: environment and paths (Colab/Jupyter-friendly)
from __future__ import annotations
import os
import sys
import time
from pathlib import Path

# Use current working directory so this works on Colab (/content) and Jupyter
WORKSPACE = Path.cwd().resolve()
print(f"CWD: {Path.cwd()}")
print("Python:", sys.version)

# Create base dirs for this demo
BASE = WORKSPACE / "scripts"
UPLOADS_DIR = BASE / "denoise_uploads"
OUTPUTS_DIR = BASE / "denoise_outputs"
for d in (UPLOADS_DIR, OUTPUTS_DIR):
    d.mkdir(parents=True, exist_ok=True)
print("Uploads dir:", UPLOADS_DIR)
print("Outputs dir:", OUTPUTS_DIR)


In [None]:
# Upload ZIP (Colab or Jupyter)
from __future__ import annotations
from datetime import datetime
import zipfile

zip_path = None
try:
    from google.colab import files as colab_files  # type: ignore
    print("Detected Colab. Use chooser to upload a ZIP.")
    uploaded = colab_files.upload()
    if uploaded:
        name = next(iter(uploaded.keys()))
        data = uploaded[name]
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_path = UPLOADS_DIR / f"upload_{ts}.zip"
        with open(zip_path, "wb") as f:
            f.write(data)
        print("Saved:", zip_path)
except Exception as e:
    print("Colab uploader unavailable:", repr(e))

if zip_path is None:
    try:
        import ipywidgets as widgets  # type: ignore
        from IPython.display import display  # type: ignore

        file_uploader = widgets.FileUpload(accept=".zip", multiple=False)
        display(file_uploader)
        print("Use the widget above, then re-run this cell once.")
        if file_uploader.value:
            item = list(file_uploader.value.values())[0]
            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
            zip_path = UPLOADS_DIR / f"upload_{ts}.zip"
            with open(zip_path, "wb") as f:
                f.write(item["content"])  # type: ignore[index]
            print("Saved:", zip_path)
    except Exception as e:
        print("ipywidgets uploader unavailable:", repr(e))

zip_path


In [None]:
# Extract ZIP to a timestamped folder under scripts/denoise_uploads
from __future__ import annotations
import time
import zipfile
from pathlib import Path

assert zip_path is not None and Path(zip_path).exists(), "zip_path must be set to an existing file"

extract_ts = time.strftime("%Y%m%d_%H%M%S")
extract_dir = UPLOADS_DIR / f"unzipped_{extract_ts}"
extract_dir.mkdir(parents=True, exist_ok=True)

with zipfile.ZipFile(zip_path, "r") as zf:
    zf.extractall(extract_dir)

AUDIO_EXTS = {".wav", ".flac", ".mp3", ".ogg", ".m4a", ".aac"}
audio_files = [p for p in extract_dir.rglob("*") if p.suffix.lower() in AUDIO_EXTS]
print(f"Extracted to: {extract_dir}")
print(f"Found {len(audio_files)} audio files (recursively)")

extract_dir


In [None]:
# Configure and run denoising via denoise.py
from __future__ import annotations
from pathlib import Path
import torch  # type: ignore

from biodenoising import denoiser as dn  # package import
from biodenoising.denoiser import denoise as denoise_cli  # packaged denoise module

# Build args using the denoise parser defaults
args = denoise_cli.parser.parse_args([])

# Required IO
args.noisy_dir = str(extract_dir)
run_ts = time.strftime("%Y%m%d_%H%M%S")
run_out_dir = OUTPUTS_DIR / f"denoise_run_{run_ts}"
run_out_dir.mkdir(parents=True, exist_ok=True)
args.out_dir = str(run_out_dir)

# Device/method
args.method = getattr(args, "method", "biodenoising16k_dns48")
args.device = "cuda" if torch.cuda.is_available() else "cpu"

# Optional flags (sane defaults for batch denoising)
args.transform = getattr(args, "transform", "none")
args.keep_original_sr = getattr(args, "keep_original_sr", True)
args.noise_reduce = getattr(args, "noise_reduce", False)
args.selection_table = getattr(args, "selection_table", False)
args.window_size = getattr(args, "window_size", 0)
args.batch_size = getattr(args, "batch_size", 1)
args.num_workers = getattr(args, "num_workers", 2)

print({
    "noisy_dir": args.noisy_dir,
    "out_dir": args.out_dir,
    "method": args.method,
    "device": args.device,
    "transform": args.transform,
    "keep_original_sr": args.keep_original_sr,
    "window_size": args.window_size,
})

# Patch get_dataset in packaged denoise to avoid NameError on global args
if not hasattr(denoise_cli, "_patched_get_dataset"):
    def _patched_get_dataset(noisy_dir, sample_rate, channels, keep_original_sr):
        resample_to_sr = sample_rate if not keep_original_sr else None
        if noisy_dir:
            files = dn.audio.find_audio_files(noisy_dir)
        else:
            print("No noisy_dir provided; skipping denoising.")
            return None
        return dn.audio.Audioset(
            files,
            with_path=True,
            sample_rate=sample_rate,
            channels=channels,
            convert=True,
            resample_to_sr=resample_to_sr,
        )
    denoise_cli._patched_get_dataset = True
    denoise_cli.get_dataset = _patched_get_dataset

# Run denoising
# The CLI's entrypoint is denoise.denoise(args)
denoise_cli.denoise(args, local_out_dir=args.out_dir)
print("Denoising completed.")
run_out_dir


In [None]:
# Inspect outputs
from __future__ import annotations
from pathlib import Path

out_dir = run_out_dir
print("Output dir:", out_dir)

if out_dir.exists():
    enhanced = sorted(Path(out_dir).rglob("*.wav"))
    print(f"Enhanced WAV files: {len(enhanced)}")
    for p in enhanced[:10]:
        print("-", p.relative_to(out_dir))
else:
    print("No outputs found.")
