In [None]:
# Phi-4 Audio Transcription Example
#
# This notebook demonstrates how to load the Phi-4 model and use it for audio transcription.

import os
import torch
import soundfile as sf
from IPython.display import Audio
from pathlib import Path

# Import our utility functions
from model_downloader import download_phi4_model
from audio_transcriber import transcribe_audio

# 1. Download the model (this may take some time the first run)
model, processor, generation_config = download_phi4_model()

# 2. Display an audio upload widget
from IPython.display import display
import ipywidgets as widgets

upload_button = widgets.FileUpload(
    accept='.wav,.mp3,.flac,.ogg,.m4a',
    multiple=False,
    description='Upload Audio'
)
display(upload_button)

# 3. Function to handle the uploaded file
def process_uploaded_audio(change):
    if not upload_button.value:
        return
    
    # Get the uploaded file
    uploaded_file = next(iter(upload_button.value.values()))
    filename = uploaded_file['name']
    content = uploaded_file['content']
    
    # Save the file temporarily
    temp_path = f"temp_{filename}"
    with open(temp_path, 'wb') as f:
        f.write(content)
    
    # Display audio player
    print(f"Uploaded: {filename}")
    display(Audio(temp_path))
    
    # Transcribe the audio
    result = transcribe_audio(temp_path, model, processor, generation_config)
    
    # Display the transcription
    print("\nTranscription:")
    print("--------------")
    print(result)
    
    # Clean up the temporary file
    os.remove(temp_path)

# Attach the callback to the upload widget
upload_button.observe(process_uploaded_audio, names='value')

# 4. Example with a sample audio file
# If you have a sample audio file you want to test with:
# sample_path = "path/to/your/audio.mp3"
# display(Audio(sample_path))
# transcription = transcribe_audio(sample_path, model, processor, generation_config)
# print("\nTranscription of sample audio:")
# print("--------------")
# print(transcription)