In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Wave (ocean) sound --> slow 'wave curve' (amplitude envelope) extractor.

- Loads audio (mono), optional high-pass to remove DC/rumble
- Extracts Hilbert envelope (amplitude)
- Smooths and downsamples the envelope to a low rate (default 50 Hz)
- Produces:
    * Broad envelope (nice overall loudness)
    * Swell-scale envelopes with cutoffs at 0.3 Hz and 0.1 Hz
    * Mid-scale "splash" band on envelope (1-5 Hz)
- Exports a figure and a CSV with time + curves

Usage:
    python wave_envelope.py input.wav --out-prefix my_wave --swell-1 0.3 --swell-2 0.1

Dependencies:
    pip install numpy scipy soundfile matplotlib
    (optional) librosa for resampling; otherwise SciPy polyphase is used
"""

import argparse
import os
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
from scipy.signal import butter, sosfiltfilt, hilbert, resample_poly

try:
    import librosa
    HAVE_LIBROSA = True
except Exception:
    HAVE_LIBROSA = False


# ----------------------- DSP helpers -----------------------

def to_mono(x: np.ndarray) -> np.ndarray:
    return x.mean(axis=1) if x.ndim == 2 else x

def resample_audio(x: np.ndarray, sr: int, target_sr: int) -> tuple[np.ndarray, int]:
    if sr == target_sr:
        return x, sr
    if HAVE_LIBROSA:
        y = librosa.resample(y=x, orig_sr=sr, target_sr=target_sr, res_type="kaiser_best")
        return y.astype(np.float32), target_sr
    # Fallback: polyphase (good quality)
    # Compute rational approximation
    from math import gcd
    g = gcd(sr, target_sr)
    up, down = target_sr // g, sr // g
    y = resample_poly(x, up, down)
    return y.astype(np.float32), target_sr

def butter_sos(cut, fs, btype, order=4):
    nyq = fs * 0.5
    Wn = np.array(cut, ndmin=1) / nyq
    return butter(order, Wn, btype=btype, output="sos")

def highpass(x, fs, cut=20.0, order=2):
    sos = butter_sos(cut, fs, 'highpass', order=order)
    return sosfiltfilt(sos, x)

def lowpass(x, fs, cut=5.0, order=4):
    sos = butter_sos(cut, fs, 'lowpass', order=order)
    return sosfiltfilt(sos, x)

def bandpass(x, fs, lo, hi, order=4):
    sos = butter_sos([lo, hi], fs, 'bandpass', order=order)
    return sosfiltfilt(sos, x)

def envelope_hilbert(x):
    analytic = hilbert(x)
    return np.abs(analytic)

def compress_log(x, eps=1e-8):
    y = np.log10(eps + x)
    # Normalize to [0,1] for nice plotting/CSV comparability
    y = (y - y.min()) / (y.max() - y.min() + 1e-12)
    return y

def downsample_signal(x, fs, target_fs=50):
    """Polyphase resample envelope to a low rate for accurate sub-Hz filtering."""
    if fs == target_fs:
        return x, fs
    from math import gcd
    g = gcd(int(fs), int(target_fs))
    up, down = int(target_fs) // g, int(fs) // g
    y = resample_poly(x, up, down)
    return y.astype(np.float32), target_fs

# ----------------------- Main routine -----------------------

def process(
    wav_path: str,
    out_prefix: str,
    target_sr_audio: int = 22050,
    env_ds_fs: int = 50,
    hp_cut: float = 20.0,
    lpf_broad: float = 10.0,
    swell_cut_1: float = 0.3,   # Hz
    swell_cut_2: float = 0.1,   # Hz
    mid_band: tuple[float, float] = (1.0, 5.0),  # Hz on the envelope
    min_sec: float | None = None,                # start time (s) to cut
    max_sec: float | None = None                 # end time (s) to cut
):
    # --- Load
    x, sr = sf.read(wav_path, always_2d=False)
    x = to_mono(x).astype(np.float32)

    # --- Cut to segment if requested
    if min_sec is not None or max_sec is not None:
        start = int((min_sec or 0) * sr)
        end   = int((max_sec or len(x)/sr) * sr)
        x = x[start:end]

    # --- Optional clean-up on the raw audio
    if hp_cut is not None and hp_cut > 0:
        x = highpass(x, fs=sr, cut=hp_cut, order=2)

    # --- Resample audio for consistent processing cost
    x, sr = resample_audio(x, sr, target_sr_audio)

    # --- Envelope via Hilbert
    env = envelope_hilbert(x)

    # --- Light broadband smoothing on envelope (removes very fast ripple)
    env_broad = lowpass(env, fs=sr, cut=lpf_broad, order=4)

    # --- Downsample the envelope to low rate for precise sub-Hz filters
    env_ds, fs_ds = downsample_signal(env_broad, fs=sr, target_fs=env_ds_fs)

    # --- Swell-scale low-pass curves
    env_swell_1 = lowpass(env_ds, fs=fs_ds, cut=swell_cut_1, order=4)  # e.g., 0.3 Hz
    env_swell_2 = lowpass(env_ds, fs=fs_ds, cut=swell_cut_2, order=4)  # e.g., 0.1 Hz

    # --- Mid-scale "splash" band on envelope (e.g., 1–5 Hz)
    lo, hi = mid_band
    env_mid = bandpass(env_ds, fs=fs_ds, lo=lo, hi=hi, order=4)

    # --- Compressed/normalized versions for plotting/CSV
    env_broad_n = compress_log(env_ds)
    swell_1_n   = compress_log(env_swell_1)
    swell_2_n   = compress_log(env_swell_2)
    env_mid_n   = compress_log(np.abs(env_mid) + 1e-8)

    # --- Time bases
    t_ds = np.arange(len(env_ds)) / fs_ds

    # --- Save CSV
    csv_path = f"{out_prefix}_curves.csv"
    data = np.column_stack([t_ds, env_broad_n, swell_1_n, swell_2_n, env_mid_n])
    header = "time_s,env_broad,env_swell_0p3hz,env_swell_0p1hz,env_mid_1_5hz"
    np.savetxt(csv_path, data, delimiter=",", header=header, comments="")
    print(f"[OK] Saved curves CSV → {csv_path}")

    # --- Plot
    fig = plt.figure(figsize=(12, 6))
    ax = plt.gca()
    ax.plot(t_ds, env_broad_n, label=f"Broad env (~{lpf_broad} Hz LP + {env_ds_fs} Hz DS)", linewidth=1.1, alpha=0.3)
    ax.plot(t_ds, swell_1_n, label=f"Swell LP @ {swell_cut_1} Hz", linewidth=2.0)
    ax.plot(t_ds, swell_2_n, label=f"Swell LP @ {swell_cut_2} Hz", linewidth=2.0)
    ax.plot(t_ds, env_mid_n,  label=f"Mid band {mid_band[0]}–{mid_band[1]} Hz (env)", linewidth=1.1, alpha=0.3)
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Normalized amplitude (log-compressed)")
    ax.set_title("Wave Sound Envelope Curves")
    ax.grid(True, alpha=0.25)
    ax.legend(loc="upper right", ncols=2)
    fig.tight_layout()

    fig_path = f"{out_prefix}_curves.png"
    plt.savefig(fig_path, dpi=160)
    plt.close(fig)
    print(f"[OK] Saved plot → {fig_path}")

    return {
        "csv": csv_path,
        "png": fig_path,
        "fs_env": fs_ds,
        "length_s": float(t_ds[-1]) if len(t_ds) else 0.0
    }


In [2]:
# In a notebook: replace argparse with direct variables
wav_path = "../data/sea_test.wav"   # path to your audio file
out_prefix = "sea_60-120"     # prefix for CSV/plot outputs

# Parameters
target_sr_audio = 22050   # resample audio before envelope
env_ds_fs = 50            # envelope downsample rate (Hz)
hp_cut = 20.0             # high-pass cutoff on audio (Hz); set None to disable
lpf_broad = 10.0          # first low-pass on envelope before DS (Hz)
swell_cut_1 = 0.3         # swell envelope cutoff 1 (Hz)
swell_cut_2 = 0.1         # swell envelope cutoff 2 (Hz)
mid_band = (1.0, 5.0)     # mid-band (Hz) on envelope

# Run processing
results = process(
    wav_path=wav_path,
    out_prefix=out_prefix,
    min_sec=60, max_sec=120,
    target_sr_audio=target_sr_audio,
    env_ds_fs=env_ds_fs,
    hp_cut=hp_cut,
    lpf_broad=lpf_broad,
    swell_cut_1=swell_cut_1,
    swell_cut_2=swell_cut_2,
    mid_band=mid_band,
)

results


[OK] Saved curves CSV → sea_60-120_curves.csv
[OK] Saved plot → sea_60-120_curves.png


{'csv': 'sea_60-120_curves.csv',
 'png': 'sea_60-120_curves.png',
 'fs_env': 50,
 'length_s': 59.98}