# Whisper Smoke Test

**Purpose**: Verify Whisper installation and basic functionality

**Test**: 5-second audio transcription

---

This is the first notebook to run for Whisper testing.
It validates that the model loads and performs basic ASR.

In [None]:
# === SETUP ===

import sys
import os
from pathlib import Path

# Add harness to path
harness_path = Path.cwd().parent.parent / 'harness'
sys.path.insert(0, str(harness_path))

import torch
import whisper
import numpy as np

from harness import AudioLoader, ModelRegistry, PerformanceTimer
from harness.metrics_asr import ASRMetrics

print("=== Whisper Smoke Test ===")
print(f"PyTorch: {torch.__version__}")
print(f"Device: {torch.device('mps' if torch.backends.mps.is_available() else 'cpu')}")

device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f"Using device: {device}")

In [None]:
# === LOAD MODEL ===

import yaml

# Load config
config_path = Path.cwd().parent / 'config.yaml'
with open(config_path, 'r') as f:
config = yaml.safe_load(f)

print(f"Loading Whisper model: {config['model_name']}")

# Load model using registry
model_wrapper = ModelRegistry.load_model('whisper', config, device)
model = model_wrapper['model']

print(f"✓ Whisper loaded successfully")
print(f"  Device: {device}")
print(f"  Model: {config['model_name']}")

In [None]:
# === LOAD TEST AUDIO ===

# Use canonical test audio
test_audio_path = Path.cwd().parent.parent.parent / 'data' / 'audio' / 'PRIMARY' / 'llm_recording_pranay.m4a'

print(f"Loading audio: {test_audio_path}")

try:
    loader = AudioLoader(target_sample_rate=16000)  # Whisper requires 16kHz
    audio, sr, metadata = loader.load_audio(test_audio_path, 'whisper')
    
    # Use first 5 seconds for smoke test
    audio_5s = audio[:5 * sr]
    
    print(f"✓ Audio loaded successfully")
    print(f"  Duration: {len(audio_5s)/sr:.1f}s")
    print(f"  Sample rate: {sr}Hz")
    
except FileNotFoundError:
    print(f"✗ Audio file not found: {test_audio_path}")
    raise

In [None]:
# === RUN TRANSCRIPTION ===

timer = PerformanceTimer()

with timer.time_operation("whisper_transcribe"):
    result = model.transcribe(audio_5s, language='en')

text = result['text'].strip()
latency_ms = timer.elapsed_time_ms

print(f"=== SMOKE TEST RESULTS ===")
print(f"Latency: {latency_ms:.1f}ms")
print(f"Text length: {len(text)} characters")
print(f"Text: {text}")

# Validate
if len(text) > 0:
    print("\n✅ SMOKE TEST PASSED")
    print("✓ Whisper is working correctly")
else:
    print("\n❌ SMOKE TEST FAILED")
    print("✗ No transcription generated")

## ✅ Smoke Test Complete

If you see "SMOKE TEST PASSED" above, Whisper is working correctly.

### Next Steps:
1. Run `10_asr.ipynb` for full ASR evaluation
2. Compare results with LFM2.5-Audio in `compare/00_scorecard.ipynb`