# Voice Input Pipeline (PI1â€“PI5)

This notebook implements the microphone recording pipeline requirements with explicit sections:

- **PI1 (Basic):** Capture audio and save in a standard format (`.wav`)
- **PI2 (Basic):** Start/stop recording UI
- **PI3 (Expected):** Real-time audio level monitoring + quality feedback
- **PI4 (Expected):** Save metadata (timestamp, duration, sample rate, path)
- **PI5 (Advanced):** Automated preprocessing (noise reduction + normalization)

> If a package is missing, install it in your environment (examples in next cell).

In [None]:
# Optional installs (run only if needed):
# %pip install sounddevice scipy ipywidgets pandas matplotlib
# %pip install noisereduce

from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import sounddevice as sd
from scipy.io.wavfile import write as wav_write
from scipy.io.wavfile import read as wav_read
from scipy.signal import butter, lfilter

import ipywidgets as widgets
from IPython.display import display, clear_output

SAMPLE_RATE = 16000
CHANNELS = 1
RECORDINGS_DIR = Path("data/raw/recordings")
PROCESSED_DIR = Path("data/processed/recordings")
METADATA_CSV = Path("data/processed/recordings_metadata.csv")

RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

state = {
    "is_recording": False,
    "stream": None,
    "frames": [],
    "started_at": None,
    "latest_raw_path": None,
    "latest_processed_path": None,
    "latest_duration_sec": 0.0,
    "latest_level": 0.0,
}

print("Setup complete.")

## PI1 (Basic): Capture audio from microphone and save in standard format

In [None]:
def save_wav(audio_float32: np.ndarray, sample_rate: int, out_path: Path) -> Path:
    audio_clipped = np.clip(audio_float32, -1.0, 1.0)
    audio_int16 = (audio_clipped * 32767).astype(np.int16)
    wav_write(str(out_path), sample_rate, audio_int16)
    return out_path


def record_fixed_duration(seconds: float = 3.0, sample_rate: int = SAMPLE_RATE, channels: int = CHANNELS) -> Path:
    print(f"Recording for {seconds:.1f}s...")
    recording = sd.rec(int(seconds * sample_rate), samplerate=sample_rate, channels=channels, dtype="float32")
    sd.wait()
    if channels == 1:
        recording = recording[:, 0]

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_path = RECORDINGS_DIR / f"mic_capture_{timestamp}.wav"
    save_wav(recording, sample_rate, out_path)

    state["latest_raw_path"] = str(out_path)
    state["latest_duration_sec"] = float(seconds)

    print(f"Saved: {out_path}")
    return out_path

# Example:
# raw_path = record_fixed_duration(seconds=3.0)