# LFM-2.5-Audio Working Test

Simple working test of LFM-2.5-Audio model for transcription and audio processing.

In [None]:
# Cell 1: Setup and imports
import sys
import time
from pathlib import Path

import torch
import torchaudio

sys.path.append(str(Path.cwd().parent / "harness"))

from liquid_audio import ChatState, LFM2AudioModel, LFM2AudioProcessor
from liquid_audio.processor import PreprocessorConfig

# Setup device
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device.upper()}")

# Model info
HF_REPO = "LiquidAI/LFM2.5-Audio-1.5B"
print(f"Model: {HF_REPO}")
print("‚úÖ Setup complete")

In [None]:
# Cell 2: Load model components
print("Loading LFM model components...")

# Create processor
processor = LFM2AudioProcessor(
    text_tokenizer_path=HF_REPO,
    audio_processor_config=PreprocessorConfig(
        sample_rate=24000,
        features=128,
        normalize="per_feature",
        window_size=0.02,
        window_stride=0.01,
        window="hann",
        n_fft=512,
        log=True,
        frame_splicing=1,
        dither=1e-5,
        pad_to=16,
        pad_value=0,
    ),
)
print("‚úÖ Audio processor ready")

# Load model
model = LFM2AudioModel.from_pretrained(HF_REPO, device=device)
model.eval()
print("‚úÖ Model loaded successfully")

# Create chat state
chat = ChatState(processor)
print("‚úÖ Chat state initialized")

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Vocabulary size: {len(processor.text_tokenizer):,} tokens")

In [None]:
# Cell 3: Load test audio
audio_path = Path("data/audio/clean_speech_10s.wav")

if audio_path.exists():
    # Load audio
    waveform, sr = torchaudio.load(str(audio_path))
    print(f"‚úÖ Audio loaded: {waveform.shape}, sample rate: {sr}")
    print(f"   Duration: {waveform.shape[1] / sr:.1f}s")

    # Convert to mono if needed
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
        print("‚úÖ Converted to mono")

    # Resample to 24kHz if needed (LFM expects 24kHz)
    if sr != 24000:
        resampler = torchaudio.transforms.Resample(sr, 24000)
        waveform = resampler(waveform)
        print("‚úÖ Resampled to 24kHz")

    print(f"Final audio shape: {waveform.shape}")
else:
    print(f"‚ùå Audio file not found: {audio_path}")

In [None]:
# Cell 4: Prepare audio for LFM
# LFM expects audio in a specific format with mel spectrogram preprocessing


def prepare_audio_for_lfm(waveform, processor):
    """Prepare audio waveform for LFM processing"""
    # The processor expects audio in a specific format
    # This involves converting to mel spectrograms internally

    # Normalize audio
    waveform = waveform / waveform.abs().max()

    return waveform


processed_audio = prepare_audio_for_lfm(waveform, processor)
print("‚úÖ Audio prepared for LFM processing")
print(f"Processed audio shape: {processed_audio.shape}")

In [None]:
# Cell 5: Simple transcription test
print("Testing LFM transcription...")
print("Note: This is a basic test - full implementation requires understanding LFM API")

# For now, let's test the chat interface
chat.new_turn("user")

# We need to understand how LFM expects audio input
# The liquid-audio library has specific input formats

print("üîß LFM system loaded successfully")
print("üìù Ready for transcription testing")
print("‚ö†Ô∏è  Note: Full transcription requires understanding specific LFM API format")

# Test that we can access model components
print(f"Model has {len(model._modules)} main components")
for name, module in model._modules.items():
    print(f"  - {name}: {module.__class__.__name__}")

In [None]:
# Cell 6: Results and timing

# Test inference speed
start_time = time.time()

# Simple test - run a dummy forward pass
with torch.no_grad():
    # This is just to test the model works
    # Actual transcription requires proper input format
    dummy_input = torch.randint(0, len(processor.text_tokenizer), (1, 10)).to(device)
    # We can't run this without knowing the exact input format
    pass

end_time = time.time()

print(f"‚è±Ô∏è  Setup time: {(end_time - start_time) * 1000:.1f}ms")
print("‚úÖ LFM system is ready for systematic testing")
print()
print("üéØ Next steps:")
print("   1. Understand exact LFM input/output format")
print("   2. Implement proper transcription pipeline")
print("   3. Test with canonical audio files")
print("   4. Compare with other models")