In [4]:
import whisper
import torch
from pathlib import Path
import urllib.request

def safe_load_model(model_name="base", device=None):
    """
    Securely load Whisper model using weights_only=True with clear logs.
    """
    from whisper.model import Whisper, ModelDimensions

    print(f"🔍 Checking model name: {model_name}")
    
    # Available model URLs
    _MODELS = {
        "tiny": "https://openaipublic.blob.core.windows.net/whisper/models/tiny.pt",
        "base": "https://openaipublic.blob.core.windows.net/whisper/models/base.pt",
        "small": "https://openaipublic.blob.core.windows.net/whisper/models/small.pt",
        "medium": "https://openaipublic.blob.core.windows.net/whisper/models/medium.pt",
        "large": "https://openaipublic.blob.core.windows.net/whisper/models/large-v2.pt",
    }

    if model_name not in _MODELS:
        raise ValueError(f"❌ Unknown model: {model_name}")

    model_url = _MODELS[model_name]
    model_path = Path.home() / ".cache/whisper" / f"{model_name}.pt"
    model_path.parent.mkdir(parents=True, exist_ok=True)

    # Download if missing
    if not model_path.exists():
        print(f"⬇️ Downloading {model_name} model from {model_url} ...")
        urllib.request.urlretrieve(model_url, model_path)
        print(f"✅ Downloaded and saved to {model_path}")
    else:
        print(f"📦 Found cached model at {model_path}")

    # Load the model checkpoint securely
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    print(f"⚙️ Loading model on device: {device} with weights_only=True ...")
    checkpoint = torch.load(model_path, map_location=device, weights_only=True)

    print("🧠 Constructing Whisper model object ...")
    dims = ModelDimensions(**checkpoint["dims"])
    model = Whisper(dims)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.to(device)
    print("✅ Model loaded successfully!\n")

    return model

# Step 1: Load the model
print("🚀 Starting model load...")
model = safe_load_model("large")

# Step 2: File to translate
audio_path = "4-russian-japanese-war.mp3"
print(f"📂 Audio file selected: {audio_path}")

# Step 3: Translate (Russian → English)
print("📝 Translating audio to English subtitles ...")
result = model.transcribe(audio_path, task="translate", language="ru", verbose=True)

# Step 4: Save subtitles
output_format = "srt"  # or "vtt"
output_file = audio_path.rsplit(".", 1)[0] + f".translated.{output_format}"
print(f"💾 Saving translated subtitles to: {output_file}")

with open(output_file, "w", encoding="utf-8") as f:
    f.write(result[output_format])

print(f"✅ Done! Translated subtitles saved as: {output_file}")


🚀 Starting model load...
🔍 Checking model name: large
⬇️ Downloading large model from https://openaipublic.blob.core.windows.net/whisper/models/large-v2.pt ...


HTTPError: HTTP Error 404: The specified resource does not exist.

In [3]:
torch.cuda.is_available()

True