In [None]:
import os
from pathlib import Path
import re
import glob
import numpy as np
import pandas as pd
import soundfile as sf
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal

## Repo directory overview (17.02.26)

```
data/
├── 1m straight pipe y branch/
│   ├── clogged pipe, no noisy first obs.wav
│   ├── clogged pipe, no noisy.WAV
│   ├── free pipe, no noisy first obs.wav
│   └── free pipe, no noisy.WAV
│
└── 3m straight pipe/
    ├── baseline/
    │   └── 16_02/
    │       ├── 16_02_26_baseline(bad).WAV
    │       └── baseline.WAV
    │
    ├── Ø20w120/
    │   ├── 16_02/
    │   │   ├── pos1.WAV
    │   │   ├── pos2.WAV
    │   │   ├── pos3.WAV
    │   │   ├── pos4.WAV
    │   │   └── pos5.WAV
    │   │
    │   └── 16_02(bad batch)/
    │       ├── ZOOM0001.WAV
    │       ├── ZOOM0002.WAV
    │       ├── ZOOM0003.WAV
    │       ├── ZOOM0004.WAV
    │       └── ZOOM0005.WAV
    │
    ├── Ø20w40/
    │   └── 16_02/
    │       ├── pos1.WAV
    │       ├── pos2.WAV
    │       ├── pos3.WAV
    │       ├── pos4.WAV
    │       └── pos5.WAV
    │
    ├── Ø40w120/
    │   └── 16_02/
    │       ├── pos1.WAV
    │       ├── pos2.WAV
    │       ├── pos3.WAV
    │       ├── pos4.WAV
    │       └── pos5.WAV
    │
    └── Ø40w40/
        └── 16_02/
            ├── pos1.WAV
            ├── pos2.WAV
            ├── pos3.WAV
            ├── pos4.WAV
            └── pos5.WAV
```



In [None]:
PROJECT_ROOT = Path.cwd()

DATA_ROOT = PROJECT_ROOT / "data"

In [None]:
def load_wav(filepath, normalize=True, to_mono=True):
    """
    Load a .wav file.

    Parameters
    ----------
    filepath : str
        Path to the .wav file.
    normalize : bool
        If True, scales integer signals to [-1, 1].
    to_mono : bool
        If True, converts stereo to mono by averaging channels.

    Returns
    -------
    fs : int
        Sampling frequency (Hz).
    signal : np.ndarray
        Audio signal (float32 if normalized, otherwise original dtype).
    """

    fs, signal = wavfile.read(filepath)

    # Convert to float if needed
    if normalize and np.issubdtype(signal.dtype, np.integer):
        max_val = np.iinfo(signal.dtype).max
        signal = signal.astype(np.float32) / max_val

    # Convert stereo to mono
    if to_mono and signal.ndim == 2:
        signal = np.mean(signal, axis=1)

    return fs, signal

In [None]:
# Prepare to collect signals grouped by insert spec and position
wav_data = {}

# Pattern to match insert spec and position
pattern = re.compile(r"Ø(\d+)w(\d+)/16_02/pos(\d+)\.WAV")

# Search for all .wav files in the 3m straight pipe directory
for wav_path in glob.glob(str(DATA_ROOT / "3m straight pipe" / "*" / "16_02" / "pos*.WAV")):
    match = pattern.search(str(wav_path))
    if match:
        insert_spec = f"Ø{match.group(1)}w{match.group(2)}"
        position = f"pos{match.group(3)}"
        fs, signal = load_wav(wav_path)
        wav_data.setdefault(insert_spec, {})[position] = {"fs": fs, "signal": signal, "path": wav_path}

# Load baseline
baseline_dir = DATA_ROOT / "3m straight pipe" / "baseline" / "16_02"
baseline_files = list(baseline_dir.glob("*.WAV"))
baseline_data = {}
for baseline_file in baseline_files:
    fs, signal = load_wav(baseline_file)
    baseline_data[baseline_file.name] = {"fs": fs, "signal": signal, "path": baseline_file}

# wav_data: dict of insert_spec -> position -> dict(fs, signal, path)
# baseline_data: dict of filename -> dict(fs, signal, path)