# LFM-2.5-Audio Advanced Testing (Core)

**Purpose**: Core advanced testing of LFM-2.5-Audio model capabilities

**Model**: LFM2.5-Audio-1.5B from HuggingFace (LiquidAI/LFM2.5-Audio-1.5B)

**Capabilities**: Audio transcription, generation, interleaved processing

---

This notebook provides the core advanced testing framework.
Run this first to verify model works, then expand to full evaluation.

In [None]:
# === CORE SETUP ===

import torch
import torchaudio
import numpy as np
import time
import json
from datetime import datetime
from pathlib import Path

# Import harness modules
import sys
sys.path.append('harness')

print("=== LFM-2.5-Audio Advanced Testing ===")
print(f"Python: {sys.version.split()[0]}")
print(f"PyTorch: {torch.__version__}")
print(f"Torchaudio: {torchaudio.__version__}")
print(f"Device: {torch.device('mps' if torch.backends.mps.is_available() else 'cpu')}")

device = 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f"Using device: {device}")
print("âœ… Setup complete")

In [None]:
# === MODEL LOADING ===

from liquid_audio import LFM2AudioModel, LFM2AudioProcessor, ChatState

# Model configuration
HF_REPO = 'LiquidAI/LFM2.5-Audio-1.5B'

print(f"Loading model: {HF_REPO}")

# Load model with monitoring
load_start = time.time()

processor = LFM2AudioProcessor.from_pretrained(HF_REPO).eval()
model = LFM2AudioModel.from_pretrained(HF_REPO).eval()

if device != 'cpu':
    model = model.to(device)

load_time = time.time() - load_start

print(f"âœ“ Model loaded successfully!")
print(f"  Load time: {load_time:.1f}s")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"  Device: {device}")

In [None]:
# === AUDIO LOADING ===

# Test with canonical audio
test_audio_path = Path('data/audio/GROUND_TRUTH/clean_speech_10s.wav')

print(f"Loading audio: {test_audio_path}")

try:
    waveform, sr = torchaudio.load(str(test_audio_path))
    
    # Convert to mono if needed
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    
    # Resample to 24kHz for LFM
    if sr != 24000:
        resampler = torchaudio.transforms.Resample(sr, 24000)
        waveform = resampler(waveform)
        sr = 24000
    
    print(f"âœ“ Audio loaded successfully")
    print(f"  Shape: {waveform.shape}")
    print(f"  Sample rate: {sr}Hz")
    print(f"  Duration: {waveform.shape[1]/sr:.1f}s")
    
except FileNotFoundError:
    print(f"âœ— Audio file not found: {test_audio_path}")
    print("Create canonical test audio first")
    raise

In [None]:
# === CORE TRANSCRIPTION ===

def transcribe_audio(model, processor, waveform, sr):
    """Basic transcription using LFM model."""
    
    start_time = time.time()
    
    try:
        # Create chat state
        chat = ChatState(processor)
        
        # Set up for transcription
        chat.new_turn("system")
        chat.add_text("Perform ASR.")
        chat.end_turn()
        
        # Add audio
        chat.new_turn("user")
        chat.add_audio(waveform, sr)
        chat.end_turn()
        
        chat.new_turn("assistant")
        
        # Generate transcription
        text_tokens = []
        
        for token in model.generate_sequential(**chat, max_new_tokens=512):
            if token.numel() == 1:  # Text token
                text_tokens.append(token)
        
        # Decode text
        if text_tokens:
            text_tensor = torch.stack(text_tokens, 1)
            text = processor.text.decode(text_tensor[0])
        else:
            text = ""
        
        latency = time.time() - start_time
        
        return text.strip(), latency, {'success': True}
        
    except Exception as e:
        latency = time.time() - start_time
        return "", latency, {'success': False, 'error': str(e)}

print("âœ“ Transcription function defined")

In [None]:
# === CORE TESTING ===

print("=== CORE TRANSCRIPTION TESTING ===")

# Test transcription
text, latency, metadata = transcribe_audio(model, processor, waveform, sr)

print(f"Latency: {latency*1000:.1f}ms")
print(f"Text length: {len(text)} characters")
print(f"Text preview: {text[:100]}..." if len(text) > 100 else f"Text: {text}")

if metadata['success']:
    print("âœ“ Transcription successful")
else:
    print(f"âœ— Transcription failed: {metadata.get('error', 'Unknown')}")

# Load ground truth for comparison
ground_truth_path = Path('data/text/GROUND_TRUTH/clean_speech_10s.txt')

if ground_truth_path.exists():
    with open(ground_truth_path, 'r') as f:
        ground_truth = f.read().strip()
    
    print(f"\n=== GROUND TRUTH COMPARISON ===")
    print(f"Expected: {ground_truth[:100]}...")
    print(f"Got:      {text[:100]}...")
    
    # Simple character-level comparison
    if len(ground_truth) > 0:
        cer = sum(1 for a, b in zip(ground_truth.lower(), text.lower()) if a != b) / len(ground_truth)
        print(f"Approximate CER: {cer:.3f}")

print(f"\nðŸŽ‰ Core testing completed!")
print(f"âœ… Model loaded and tested successfully")
print(f"âœ… Audio processing working")
print(f"âœ… Transcription capabilities verified")