# Whisper Model Debug Notebook

This notebook allows direct testing of the BeautyAI transcription services to diagnose voice recognition issues.

In [None]:
# Import required libraries
import sys
import os
sys.path.append('/home/lumi/beautyai/backend/src')

import json
import time
import logging
from pathlib import Path
import IPython.display as ipd
import numpy as np

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("✅ Libraries imported successfully")

In [None]:
# Import BeautyAI transcription services
from beautyai_inference.services.voice.transcription.transcription_factory import create_transcription_service
from beautyai_inference.services.voice.transcription.faster_whisper_service import FasterWhisperTranscriptionService
from beautyai_inference.services.voice.transcription.transformers_whisper_service import TransformersWhisperService
from beautyai_inference.config.voice_config_loader import get_voice_config

print("✅ BeautyAI transcription services imported successfully")

In [None]:
# Check voice configuration
voice_config = get_voice_config()
config_summary = voice_config.get_config_summary()

print("🔧 Voice Configuration Summary:")
print(json.dumps(config_summary, indent=2))

# Check which transcription service is being used
transcription_service = create_transcription_service()
service_type = type(transcription_service).__name__
print(f"\n🎯 Active Transcription Service: {service_type}")

# Check model info
if hasattr(transcription_service, 'get_model_info'):
    model_info = transcription_service.get_model_info()
    print(f"📊 Model Info: {model_info}")

In [None]:
# File upload widget
from ipywidgets import FileUpload, VBox, HBox, Button, Output, Dropdown, HTML
import ipywidgets as widgets

# Create upload widget
upload_widget = FileUpload(
    accept='.wav,.mp3,.webm,.pcm,.ogg,.m4a',
    multiple=False,
    description='Choose audio file:'
)

# Language selection
language_dropdown = Dropdown(
    options=[('Arabic', 'ar'), ('English', 'en'), ('Auto-detect', 'auto')],
    value='ar',
    description='Language:'
)

# Service selection
available_services = [(name, name) for name, result in service_results.items() if result['loaded']]
if not available_services:
    available_services = [('No services loaded', 'none')]

service_dropdown = Dropdown(
    options=available_services,
    description='Service:'
)

# Test button
test_button = Button(
    description='Test Transcription',
    button_style='primary',
    icon='microphone'
)

# Output widget
output_widget = Output()

# Layout
controls = VBox([
    HTML("<h3>🎤 Audio File Transcription Test</h3>"),
    upload_widget,
    HBox([language_dropdown, service_dropdown]),
    test_button,
    output_widget
])

display(controls)

In [None]:
# Test transcription function
def test_transcription(button):
    with output_widget:
        output_widget.clear_output()
        
        if not upload_widget.value:
            print("❌ Please upload an audio file first")
            return
        
        selected_service_name = service_dropdown.value
        if selected_service_name == 'none':
            print("❌ No transcription services available")
            return
        
        selected_language = language_dropdown.value
        
        print(f"🔄 Testing transcription with:")
        print(f"   Service: {selected_service_name}")
        print(f"   Language: {selected_language}")
        print(f"   File: {upload_widget.value[0]['name']}")
        
        # Get the service
        service = service_results[selected_service_name]['service']
        if not service:
            print("❌ Service not available")
            return
        
        # Get file data
        file_info = upload_widget.value[0]
        audio_bytes = file_info['content']
        file_name = file_info['name']
        
        print(f"📄 File size: {len(audio_bytes)} bytes")
        
        # Determine audio format
        file_ext = Path(file_name).suffix.lower()
        format_map = {
            '.wav': 'wav',
            '.mp3': 'mp3', 
            '.webm': 'webm',
            '.pcm': 'pcm',
            '.ogg': 'ogg',
            '.m4a': 'm4a'
        }
        audio_format = format_map.get(file_ext, 'unknown')
        print(f"🎵 Detected format: {audio_format}")
        
        # Test transcription
        try:
            start_time = time.time()
            
            # Call transcription
            transcript = service.transcribe_audio_bytes(
                audio_bytes=audio_bytes,
                audio_format=audio_format,
                language=selected_language
            )
            
            end_time = time.time()
            processing_time = end_time - start_time
            
            print(f"\n⏱️  Processing time: {processing_time:.2f} seconds")
            print(f"📝 Transcript:")
            print(f"   '{transcript}'")
            
            if transcript:
                print(f"✅ Transcription successful!")
                print(f"📊 Stats:")
                print(f"   - Length: {len(transcript)} characters")
                print(f"   - Words: {len(transcript.split()) if transcript else 0}")
                print(f"   - Speed: {len(audio_bytes) / 1024 / processing_time:.1f} KB/s")
            else:
                print(f"⚠️  Empty transcript returned")
                
        except Exception as e:
            print(f"❌ Transcription failed: {str(e)}")
            import traceback
            print(f"📋 Full error:")
            traceback.print_exc()

# Connect button to function
test_button.on_click(test_transcription)

print("🎯 Upload an audio file and click 'Test Transcription' to test the whisper models")