# Lecture 10B — Notebook 10B.0: Recording Queue, Imports, and Segment Selections

**Purpose:** Create the L10B project + manifest workflow. Record/import multiple clips and store reusable named segments for mel/MFCC analysis.


In [None]:
import os, json, math, re
from pathlib import Path

import numpy as np
import scipy.signal as sig
import scipy.fft as fft
import matplotlib.pyplot as plt

# Optional audio playback
try:
    from IPython.display import Audio, display
    HAS_IPY_AUDIO = True
except Exception:
    HAS_IPY_AUDIO = False

# Optional recording
try:
    import sounddevice as sd
    HAS_SD = True
except Exception as e:
    HAS_SD = False
    print("sounddevice not available (recording disabled).", e)

# ---------- Project paths ----------
PROJECT_ROOT = Path.cwd() / "EE519_L10B_Project"
REC_DIR = PROJECT_ROOT / "recordings"
FIG_DIR = PROJECT_ROOT / "figures"
RES_DIR = PROJECT_ROOT / "results"
MANIFEST_PATH = PROJECT_ROOT / "manifest.json"

for d in [REC_DIR, FIG_DIR, RES_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def load_manifest(path=MANIFEST_PATH):
    if path.exists():
        return json.loads(path.read_text())
    return {"course":"EE519","lecture":"10B","created_utc":None,"clips":[]}

def save_manifest(manifest, path=MANIFEST_PATH):
    if manifest.get("created_utc") is None:
        manifest["created_utc"] = str(np.datetime64("now"))
    path.write_text(json.dumps(manifest, indent=2))
    print("Saved manifest:", path)

def save_fig(fig, name, dpi=150):
    out = FIG_DIR / name
    fig.savefig(out, dpi=dpi, bbox_inches="tight")
    print("Saved figure:", out)
    return out

# ---------- WAV I/O ----------
import wave
def write_wav(path: Path, x: np.ndarray, fs: int):
    x = np.asarray(x, dtype=np.float32)
    x = np.clip(x, -1.0, 1.0)
    x_i16 = (x * 32767.0).astype(np.int16)
    with wave.open(str(path), "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(fs)
        wf.writeframes(x_i16.tobytes())

def read_wav(path: Path):
    with wave.open(str(path), "rb") as wf:
        fs = wf.getframerate()
        n = wf.getnframes()
        x = np.frombuffer(wf.readframes(n), dtype=np.int16).astype(np.float32) / 32768.0
    return fs, x

def peak_normalize(x, target=0.98):
    m = np.max(np.abs(x)) + 1e-12
    return (x / m) * target

def play_audio(x, fs, label="audio"):
    if not HAS_IPY_AUDIO:
        print("(Audio playback not available)", label)
        return
    display(Audio(x, rate=fs))

def record_clip(seconds=2.0, fs=16000):
    if not HAS_SD:
        raise RuntimeError("sounddevice not available. Load wav files instead.")
    print(f"Recording {seconds:.1f}s @ {fs} Hz ...")
    x = sd.rec(int(seconds*fs), samplerate=fs, channels=1, dtype="float32")
    sd.wait()
    return fs, x.squeeze()

def add_clip_to_manifest(filename, label, fs, notes=""):
    clip = {
        "filename": filename,
        "label": label,
        "fs": int(fs),
        "notes": notes,
        "added_utc": str(np.datetime64("now")),
        "selections": {}
    }
    manifest["clips"].append(clip)
    save_manifest(manifest)
    return len(manifest["clips"]) - 1

def list_clips():
    for i,c in enumerate(manifest["clips"]):
        print(f"[{i}] {c['label']:14s}  {c['filename']}  fs={c['fs']}  notes={c.get('notes','')}")

# ---------- Selection + framing helpers ----------
def seconds_to_samples(t0, t1, fs, xlen):
    s0 = int(max(0, round(t0*fs)))
    s1 = int(min(xlen, round(t1*fs)))
    if s1 <= s0:
        raise ValueError("Bad selection: t1 must be > t0")
    return s0, s1

def samples_to_frame_range(s0, s1, N, H, xlen):
    f0 = max(0, int((s0 - N)//H) + 1)
    f1 = min(int((s1)//H), int((xlen-N)//H))
    if f1 < f0:
        f0 = max(0, int(s0//H))
        f1 = min(int((xlen-N)//H), f0)
    return f0, f1

def frame_signal(x, N, H):
    if len(x) < N:
        x = np.pad(x, (0, N-len(x)))
    num = 1 + (len(x) - N)//H
    idx = np.arange(N)[None,:] + H*np.arange(num)[:,None]
    return x[idx]

def db(x):
    return 20*np.log10(np.maximum(x, 1e-12))

# ---------- Auditory scale helpers ----------
def hz_to_mel(hz):
    return 2595.0 * np.log10(1.0 + hz/700.0)

def mel_to_hz(mel):
    return 700.0 * (10**(mel/2595.0) - 1.0)

def mel_filterbank(fs, nfft, n_mels=26, fmin=0.0, fmax=None):
    if fmax is None:
        fmax = fs/2
    # mel points
    mmin, mmax = hz_to_mel(fmin), hz_to_mel(fmax)
    m_pts = np.linspace(mmin, mmax, n_mels+2)
    hz_pts = mel_to_hz(m_pts)
    # fft bin frequencies
    freqs = np.linspace(0, fs/2, nfft//2 + 1)
    bins = np.floor((nfft+1) * hz_pts / fs).astype(int)
    fb = np.zeros((n_mels, len(freqs)), dtype=np.float64)
    for i in range(n_mels):
        b0, b1, b2 = bins[i], bins[i+1], bins[i+2]
        b0 = np.clip(b0, 0, len(freqs)-1)
        b1 = np.clip(b1, 0, len(freqs)-1)
        b2 = np.clip(b2, 0, len(freqs)-1)
        if b1 == b0: b1 += 1
        if b2 == b1: b2 += 1
        # rising
        fb[i, b0:b1] = (np.arange(b0, b1) - b0) / (b1 - b0 + 1e-12)
        # falling
        fb[i, b1:b2] = (b2 - np.arange(b1, b2)) / (b2 - b1 + 1e-12)
    return fb, freqs, hz_pts

manifest = load_manifest()
print("Project root:", PROJECT_ROOT)
print("Clips in manifest:", len(manifest["clips"]))


## What you will do
- Record (or import) multiple audio clips
- Store them in `EE519_L10B_Project/recordings/`
- Create **named selections** (time ranges) per clip inside `manifest.json`
- Save a few “quick-look” plots for sanity checks


## 1) Recording queue (recommended)

Targets (suggested):
- `vowel_a` (steady /a/)
- `vowel_i` (steady /i/)
- `fricative_s` (steady /s/)
- `sentence` (short sentence)

If recording isn't available, skip to **Import WAVs**.


In [None]:
DO_RECORD = True  # set True in class

TARGETS = [
    ("vowel_a", 2.0, "steady /a/"),
    ("vowel_i", 2.0, "steady /i/"),
    ("fricative_s", 2.0, "steady /s/"),
    ("sentence", 3.0, "short sentence"),
]

if DO_RECORD:
    for label, seconds, notes in TARGETS:
        fs, x = record_clip(seconds=seconds, fs=16000)
        x = peak_normalize(x)
        fname = f"student10B_{label}.wav"
        write_wav(REC_DIR/fname, x, fs)
        add_clip_to_manifest(fname, label=label, fs=fs, notes=notes)
        print("Recorded:", fname)
        play_audio(x, fs, label)
else:
    print("Skipping recording (set DO_RECORD=True).")


## 2) Import WAV files (fallback)

Drop WAV files into:
`EE519_L10B_Project/recordings/`
and run this cell.


In [None]:
existing = set([c["filename"] for c in manifest["clips"]])
wav_files = sorted([p.name for p in REC_DIR.glob("*.wav")])
added = 0
for wf in wav_files:
    if wf not in existing:
        fs, _ = read_wav(REC_DIR/wf)
        add_clip_to_manifest(wf, label="imported", fs=fs, notes="auto-import")
        added += 1

print("WAV files:", wav_files)
print("Newly added:", added)
print("Total clips:", len(manifest["clips"]))


## 3) Create named selections (time ranges)

We store selections at:
`clip["selections"]["analysis_segments"][segment_name]`

Recommended segment names:
- `vowel_mid`
- `fricative_mid`
- `sentence_voiced`
- `sentence_unvoiced` (optional)


In [None]:
list_clips()
CLIP_IDX = 3
clip = manifest["clips"][CLIP_IDX]
fs, x = read_wav(REC_DIR / clip["filename"])
x = peak_normalize(x)

print("Selected:", CLIP_IDX, clip["label"], clip["filename"], "fs=", fs, "len_sec=", len(x)/fs)
play_audio(x, fs, clip["label"])


In [None]:
# Segment parameters (stored with selection)
WIN_MS, HOP_MS = 25, 10
N = int(WIN_MS*1e-3*fs)
H = int(HOP_MS*1e-3*fs)

segment_name = "seg1"  # change to vowel_mid, fricative_mid, etc.
t0, t1 = 0.50, 1.20    # seconds

s0, s1 = seconds_to_samples(t0, t1, fs, len(x))
f0, f1 = samples_to_frame_range(s0, s1, N, H, len(x))

clip = manifest["clips"][CLIP_IDX]
clip.setdefault("selections", {}).setdefault("analysis_segments", {})
clip["selections"]["analysis_segments"][segment_name] = {
    "t0": float(t0), "t1": float(t1),
    "s0": int(s0), "s1": int(s1),
    "win_ms": float(WIN_MS), "hop_ms": float(HOP_MS),
    "N": int(N), "H": int(H),
    "frame_range": [int(f0), int(f1)]
}
manifest["clips"][CLIP_IDX] = clip
save_manifest(manifest)

print("Stored selection:", segment_name, "frames", (f0, f1))


### Quick-look waveform + selection (save to figures/)


In [None]:
t = np.arange(len(x))/fs
fig = plt.figure(figsize=(10,3))
plt.plot(t, x, linewidth=0.8)
plt.axvspan(t0, t1, alpha=0.2)
plt.title(f"Waveform + selection '{segment_name}': clip {CLIP_IDX} ({clip['label']})")
plt.xlabel("Time (s)"); plt.ylabel("Amplitude")
plt.tight_layout(); plt.show()
save_fig(fig, f"L10B_sel_clip{CLIP_IDX}_{re.sub(r'[^a-zA-Z0-9_]+','_',segment_name)}.png")


## Wrap-up
**What you learned:** project+manifest workflow for L10B.  
**What’s next:** 10B.1 builds mel scale + filterbanks and shows how they tile frequency.


## Reflection
1) Which segments are most stationary (good for short-time analysis)? Why?  
2) Why do we store selections in the manifest instead of hardcoding time indices?  
3) What naming scheme helps you reuse selections across notebooks?
