# STEP 1: Install Whisper and Dependencies

In [None]:
!pip install -q openai-whisper
!apt-get -qq install -y ffmpeg
print("✅ Installation complete!")

# STEP 2: Import Libraries

In [None]:
import whisper
import os
from google.colab import files
from IPython.display import Audio, display

# STEP 3: Upload Audio File

In [None]:
def upload_audio():
    """Upload audio file from your computer"""
    print("📁 Click 'Choose Files' to upload your audio file...")
    uploaded = files.upload()
    # Get the filename
    filename = list(uploaded.keys())[0]
    print(f"✅ Uploaded: {filename}")
    return filename

# STEP 4: Transcribe Audio

In [None]:
def transcribe_audio(audio_file, model_size="base", language=None):
    """
    Transcribe audio file using Whisper

    Parameters:
    - audio_file: path to audio file
    - model_size: 'tiny', 'base', 'small', 'medium', 'large'
    - language: e.g., 'en', 'es', 'fr' (None for auto-detect)
    """
    print(f"🤖 Loading {model_size} model...")
    model = whisper.load_model(model_size)

    print("🎙️ Transcribing audio...")
    if language:
        result = model.transcribe(audio_file, language=language)
    else:
        result = model.transcribe(audio_file)

    return result

# STEP 5: Display and Save Results

In [None]:
def save_transcription(result, output_file="transcription.txt"):
    """Save transcription to file and download"""

    # Full text
    full_text = result["text"]

    # Create detailed output
    output = "=" * 50 + "\n"
    output += "FULL TRANSCRIPTION\n"
    output += "=" * 50 + "\n\n"
    output += full_text + "\n\n"

    output += "=" * 50 + "\n"
    output += "TIMESTAMPED SEGMENTS\n"
    output += "=" * 50 + "\n\n"

    for segment in result["segments"]:
        start = segment["start"]
        end = segment["end"]
        text = segment["text"].strip()
        output += f"[{start:.2f}s - {end:.2f}s] {text}\n"

    # Detected language
    if "language" in result:
        output += f"\n\nDetected Language: {result['language']}\n"

    # Save to file
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(output)

    print(f"✅ Transcription saved to {output_file}")

    # Download file
    files.download(output_file)

    return full_text

# Run

In [None]:
# Step 1: Upload your audio file
audio_file = upload_audio()

# Step 2: Transcribe
# model size: 'tiny', 'base', 'small', 'medium', 'large'
# 'base' is recommended for balance of speed and accuracy
result = transcribe_audio(audio_file, model_size="base")

# Step 3: Display results
print("\n" + "=" * 50)
print("📝 TRANSCRIPTION:")
print("=" * 50)
print(result["text"])

# Step 4: Save and download
transcription = save_transcription(result)

# Optional: Display audio player
print("\n🔊 Audio Player:")
display(Audio(audio_file))