# üîß Environment Validation Test

This notebook validates that your UV environment is properly configured for model testing.

## Expected Results:
- ‚úÖ All imports should work without errors
- ‚úÖ Torch should detect MPS (Apple Silicon) device
- ‚úÖ All libraries should show correct versions
- ‚úÖ Audio files should be accessible

In [None]:
# Cell 1: Python Environment Check
import sys
import os

print('üêç Python Environment Check')
print('=' * 50)
print(f'Python executable: {sys.executable}')
print(f'Python version: {sys.version}')
print(f'Python prefix: {sys.prefix}')
print()

# Check if we're in the right environment
expected_path = '/Users/pranay/Projects/speech_experiments/model-lab/.venv'
if sys.prefix.startswith(expected_path):
    print('‚úÖ CORRECT: Using model-lab UV environment')
else:
    print(f'‚ùå WRONG: Not using model-lab environment')
    print(f'Expected: {expected_path}')
    print(f'Got: {sys.prefix}')

In [None]:
# Cell 2: Critical Library Imports
print('üìö Critical Library Imports')
print('=' * 50)

# Test torch
try:
    import torch
    print(f'‚úÖ torch: {torch.__version__}')
except ImportError as e:
    print(f'‚ùå torch: {e}')

# Test torchaudio
try:
    import torchaudio
    print(f'‚úÖ torchaudio: {torchaudio.__version__}')
except ImportError as e:
    print(f'‚ùå torchaudio: {e}')

# Test liquid-audio
try:
    from liquid_audio import LFM2AudioModel, LFM2AudioProcessor, ChatState
    print('‚úÖ liquid-audio: imports work')
except ImportError as e:
    print(f'‚ùå liquid-audio: {e}')

# Test numpy
try:
    import numpy as np
    print(f'‚úÖ numpy: {np.__version__}')
except ImportError as e:
    print(f'‚ùå numpy: {e}')

# Test librosa
try:
    import librosa
    print(f'‚úÖ librosa: {librosa.__version__}')
except ImportError as e:
    print(f'‚ùå librosa: {e}')

# Test pandas
try:
    import pandas as pd
    print(f'‚úÖ pandas: {pd.__version__}')
except ImportError as e:
    print(f'‚ùå pandas: {e}')

# Test matplotlib
try:
    import matplotlib
    print(f'‚úÖ matplotlib: {matplotlib.__version__}')
except ImportError as e:
    print(f'‚ùå matplotlib: {e}')

In [None]:
# Cell 3: Device and Hardware Check
print('üñ•Ô∏è  Device and Hardware Check')
print('=' * 50)

import torch

# Check CUDA
cuda_available = torch.cuda.is_available()
print(f'CUDA available: {cuda_available}')
if cuda_available:
    print(f'CUDA device: {torch.cuda.get_device_name(0)}')

# Check MPS (Apple Silicon)
mps_available = torch.backends.mps.is_available()
print(f'MPS available: {mps_available}')
if mps_available:
    print('‚úÖ MPS (Apple Silicon) acceleration available')
    device = 'mps'
else:
    print('‚ö†Ô∏è  MPS not available, using CPU')
    device = 'cpu'

# Check CPU
import platform
print(f'CPU: {platform.processor()}')
print(f'Machine: {platform.machine()}')
print(f'Platform: {platform.platform()}')

print(f'\nüéØ Using device: {device.upper()}')

In [None]:
# Cell 4: Test File System Access
from pathlib import Path

print('üìÅ File System Access Check')
print('=' * 50)

# Check current directory
current_dir = Path.cwd()
print(f'Current directory: {current_dir}')

# Check data directories
audio_dir = Path('data/audio')
text_dir = Path('data/text')

if audio_dir.exists():
    audio_files = list(audio_dir.glob('*.wav'))
    print(f'‚úÖ Audio directory exists: {len(audio_files)} WAV files')
    for audio_file in audio_files[:5]:
        print(f'   - {audio_file.name}')
else:
    print('‚ùå Audio directory not found')

if text_dir.exists():
    text_files = list(text_dir.glob('*.txt'))
    print(f'‚úÖ Text directory exists: {len(text_files)} TXT files')
    for text_file in text_files[:5]:
        print(f'   - {text_file.name}')
else:
    print('‚ùå Text directory not found')

In [None]:
# Cell 5: Test Audio Loading
import torchaudio
import torch
from pathlib import Path

print('üéµ Audio Loading Test')
print('=' * 50)

# Test loading canonical audio
audio_path = Path('data/audio/clean_speech_10s.wav')

if audio_path.exists():
    try:
        waveform, sr = torchaudio.load(str(audio_path))
        print(f'‚úÖ Successfully loaded: {audio_path.name}')
        print(f'   Shape: {waveform.shape}')
        print(f'   Sample rate: {sr} Hz')
        print(f'   Duration: {waveform.shape[1]/sr:.1f} seconds')
        print(f'   Data type: {waveform.dtype}')
        print(f'   Min/Max: {waveform.min():.3f} / {waveform.max():.3f}')
    except Exception as e:
        print(f'‚ùå Error loading audio: {e}')
else:
    print(f'‚ùå Audio file not found: {audio_path}')

# Test conversation audio
conv_path = Path('data/audio/conversation_2ppl_10s.wav')
if conv_path.exists():
    try:
        conv_waveform, conv_sr = torchaudio.load(str(conv_path))
        print(f'\n‚úÖ Successfully loaded: {conv_path.name}')
        print(f'   Shape: {conv_waveform.shape}')
        print(f'   Duration: {conv_waveform.shape[1]/conv_sr:.1f} seconds')
    except Exception as e:
        print(f'\n‚ùå Error loading conversation: {e}')

In [None]:
# Cell 6: Test LFM Model Loading (Basic)
from liquid_audio import LFM2AudioProcessor
from liquid_audio.processor import PreprocessorConfig

print('üîß LFM Model Component Test')
print('=' * 50)

HF_REPO = 'LiquidAI/LFM2.5-Audio-1.5B'

try:
    # Create audio processor config
    audio_config = PreprocessorConfig(
        sample_rate=24000,
        features=128,
        normalize='per_feature',
        window_size=0.02,
        window_stride=0.01, 
        window='hann',
        n_fft=512,
        log=True,
        frame_splicing=1,
        dither=1e-5,
        pad_to=16,
        pad_value=0
    )
    print('‚úÖ PreprocessorConfig created')
    
    # Create processor
    processor = LFM2AudioProcessor(
        text_tokenizer_path=HF_REPO,
        audio_processor_config=audio_config
    )
    print('‚úÖ LFM2AudioProcessor created')
    print(f'   Vocabulary size: {len(processor.text_tokenizer):,} tokens')
    
    print(f'\nüéâ LFM components working correctly!')
    print(f'üìä Repository: {HF_REPO}')
    
except Exception as e:
    print(f'‚ùå LFM setup failed: {e}')
    import traceback
    traceback.print_exc()

In [None]:
# Cell 7: Test Simple Audio Processing
import torchaudio
import torch
from pathlib import Path

print('üéõÔ∏è  Audio Processing Test')
print('=' * 50)

audio_path = Path('data/audio/clean_speech_10s.wav')

if audio_path.exists():
    try:
        # Load audio
        waveform, sr = torchaudio.load(str(audio_path))
        print(f'Original: {waveform.shape}, {sr} Hz')
        
        # Test resampling
        if sr != 24000:
            resampler = torchaudio.transforms.Resample(sr, 24000)
            waveform_24k = resampler(waveform)
            print(f'‚úÖ Resampled to: {waveform_24k.shape}, 24000 Hz')
        
        # Test mono conversion
        if waveform.shape[0] > 1:
            waveform_mono = waveform.mean(dim=0, keepdim=True)
            print(f'‚úÖ Converted to mono: {waveform_mono.shape}')
        
        # Test normalization
        waveform_norm = waveform / waveform.abs().max()
        print(f'‚úÖ Normalized: range [{waveform_norm.min():.3f}, {waveform_norm.max():.3f}]')
        
        # Test spectrogram
        spectrogram_transform = torchaudio.transforms.Spectrogram()
        spectrogram = spectrogram_transform(waveform)
        print(f'‚úÖ Spectrogram: {spectrogram.shape}')
        
        # Test Mel spectrogram
        mel_transform = torchaudio.transforms.MelSpectrogram(sample_rate=sr, n_mels=128)
        mel_spectrogram = mel_transform(waveform)
        print(f'‚úÖ Mel spectrogram: {mel_spectrogram.shape}')
        
        print('\nüéâ Audio processing pipeline working!')
        
    except Exception as e:
        print(f'‚ùå Audio processing failed: {e}')
        import traceback
        traceback.print_exc()
else:
    print(f'‚ùå Test audio not found: {audio_path}')

In [None]:
# Cell 8: Summary and Recommendations
print('üìã ENVIRONMENT VALIDATION SUMMARY')
print('=' * 50)
print()
print('‚úÖ Checks completed:')
print('   ‚Ä¢ Python environment configuration')
print('   ‚Ä¢ Library import validation')
print('   ‚Ä¢ Hardware acceleration detection')
print('   ‚Ä¢ File system access')
print('   ‚Ä¢ Audio loading capability')
print('   ‚Ä¢ LFM component initialization')
print('   ‚Ä¢ Audio processing pipeline')
print()
print('üéØ Next Steps:')
print('   1. Run full LFM model testing notebook')
print('   2. Test transcription capabilities')
print('   3. Run performance benchmarks')
print('   4. Compare with other models')
print()
print('üöÄ Your environment is ready for systematic model testing!')