# 🎬 Video Dubbing Application - Google Colab

AI-powered video dubbing with WhisperX, OpenAI GPT, Coqui TTS, and MuseTalk

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yourusername/video-dubbing/blob/main/Video_Dubbing_Colab.ipynb)

## Features:
- 🎤 **WhisperX**: High-quality speech recognition with alignment
- 🧠 **OpenAI GPT**: Context-aware translation (GPT-4o, GPT-4)
- 🗣️ **Coqui XTTS**: Natural multilingual text-to-speech
- 💋 **MuseTalk**: Advanced lip synchronization
- ⚡ **GPU Accelerated**: Optimized for Colab's free GPU

## 🚀 Setup Instructions

**IMPORTANT**: Enable GPU runtime:
1. Go to `Runtime` → `Change runtime type`
2. Select `T4 GPU` (free) or `A100/V100` (Pro)
3. Click `Save`


In [None]:
# Check GPU availability and system info
import torch
import subprocess
import os

print("🔥 GPU & System Check")
print("=" * 50)

# Check GPU
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f"📱 GPU: {gpu_name}")
    print(f"💾 VRAM: {gpu_memory:.1f}GB")
else:
    print("⚠️ No GPU detected! Please enable GPU runtime.")

# Check disk space
result = subprocess.run(['df', '-h', '/content'], capture_output=True, text=True)
print(f"\n💽 Disk Space:")
print(result.stdout)

# Python version
import sys
print(f"🐍 Python: {sys.version.split()[0]}")


In [None]:
# Install system dependencies
print("📦 Installing system dependencies...")

!apt-get update -qq
!apt-get install -y -qq ffmpeg git-lfs

print("✅ System dependencies installed!")


In [None]:
# Clone repository and install Python dependencies
import os

if not os.path.exists('/content/video-dubbing'):
    print("📥 Cloning video dubbing repository...")
    !git clone https://github.com/phillip1029/video-dubbing.git /content/video-dubbing
    print("✅ Repository cloned!")
else:
    print("📁 Repository exists, updating...")
    !cd /content/video-dubbing && git pull

# Change to project directory
os.chdir('/content/video-dubbing')
print(f"📂 Current directory: {os.getcwd()}")

# Install Python dependencies
print("\n🐍 Installing Python dependencies...")
!pip install --upgrade pip
!pip install -q -r requirements.txt


In [None]:
# API Key Setup
import os
from getpass import getpass

print("🔑 OpenAI API Key Setup")
print("For best translation quality, enter your OpenAI API key.")
print("You can skip this to use Google Translate (free but lower quality).")

openai_key = getpass("Enter your OpenAI API key (or press Enter to skip): ")

if openai_key.strip():
    os.environ['OPENAI_API_KEY'] = openai_key
    print("✅ OpenAI API key set! Using GPT for translations.")
    translation_service = "openai"
else:
    print("📝 No API key. Using Google Translate.")
    translation_service = "google"

print(f"🌐 Translation service: {translation_service}")


In [None]:
# Upload video file
from google.colab import files
import os

print("📤 Upload your video file:")
print("Supported: MP4, AVI, MOV, MKV")
print("Recommended: < 100MB for free Colab")

uploaded = files.upload()

if uploaded:
    video_filename = list(uploaded.keys())[0]
    video_path = f"/content/{video_filename}"
    
    file_size = os.path.getsize(video_path) / (1024*1024)  # MB
    print(f"\n✅ Video uploaded: {video_filename}")
    print(f"📏 Size: {file_size:.1f} MB")
    
    if file_size > 200:
        print("⚠️ Large file - may hit memory limits")
else:
    print("❌ No video uploaded")
    video_path = None


In [None]:
# Select target language
print("🌍 Select target language:")

languages = {
    "es": "Spanish", "fr": "French", "de": "German", "it": "Italian",
    "pt": "Portuguese", "ru": "Russian", "ja": "Japanese", "ko": "Korean", 
    "zh": "Chinese", "ar": "Arabic", "hi": "Hindi", "nl": "Dutch",
    "pl": "Polish", "tr": "Turkish"
}

for code, name in languages.items():
    print(f"  {code}: {name}")

target_language = input("\nEnter language code (e.g., 'es'): ").strip().lower()

if target_language in languages:
    print(f"✅ Target: {languages[target_language]} ({target_language})")
else:
    print("❌ Invalid code. Using Spanish (es)")
    target_language = "es"


In [None]:
# Process video dubbing
import sys
import time
sys.path.append('/content/video-dubbing/src')

# @markdown ### 🔄 Resume Previous Session?
# @markdown Enter a session ID here to resume a crashed or stopped run. Leave blank for a new run.
resume_session_id = "" #@param {type:"string"}

if video_path and os.path.exists(video_path) or resume_session_id:
    print("🚀 Starting video dubbing pipeline...")
    
    from src.utils.config import AppConfig
    from src.pipeline import VideoDubbingPipeline
    
    # Create config
    config = AppConfig()

    # --- ASR Configuration ---
    config.asr.service = "openai_api" 
    config.asr.api_key = os.environ.get('OPENAI_API_KEY')
    config.asr.split_long_audio = True
    config.asr.split_threshold_min = 20
    
    # --- Translation Configuration ---
    config.translation.service = translation_service
    if translation_service == "openai":
        config.translation.model_name = "gpt-4o"
        config.translation.api_key = os.environ.get('OPENAI_API_KEY')
    
    # Optimize for Colab
    config.whisperx.batch_size = 8
    config.whisperx.model_size = "medium"
    
    # Create and run pipeline
    pipeline = VideoDubbingPipeline(config, session_id=resume_session_id if resume_session_id else None, resume=bool(resume_session_id))
    
    print(f"SESSION ID: {pipeline.session_id}")

    # Determine input video path
    if resume_session_id:
        input_video = pipeline.pipeline_state.get("metadata", {}).get("input_video")
        if not input_video:
            print("❌ Cannot resume: Original video path not found in saved state.")
            # Exit or raise error
        else:
            print(f"Resuming with video: {input_video}")
    else:
        input_video = video_path

    # Create output path
    output_filename = f"dubbed_{os.path.basename(input_video)}_{target_language}.mp4"
    output_path = f"/content/{output_filename}"
    
    print(f"📹 Input: {os.path.basename(input_video)}")
    print(f"🌍 Target: {languages.get(target_language, target_language)}")
    print(f"🎤 Transcription: {config.asr.service}")
    print(f"🧠 Translation: {config.translation.service.upper()}")
    print("\n⏳ Processing... This may take several minutes.")
    
    start_time = time.time()
    
    try:
        result = pipeline.process_video(
            video_path=input_video,
            target_language=target_language,
            output_path=output_path,
            speaker_reference=None,
            auto_approve=True
        )
        
        end_time = time.time()
        
        if result["success"]:
            processing_time = result['pipeline_state']['metadata'].get('duration', end_time - start_time)
            print(f"\n🎉 SUCCESS! Dubbing completed!")
            print(f"⏱️ Time: {processing_time:.1f} seconds")
            print(f"📁 Output: {output_filename}")
            
            if os.path.exists(output_path):
                output_size = os.path.getsize(output_path) / (1024*1024)
                print(f"📏 Size: {output_size:.1f} MB")
        else:
            print(f"\n❌ FAILED: {result['error']}")
            
    except Exception as e:
        print(f"\n💥 ERROR: {str(e)}")
        print("\n🔧 Troubleshooting:")
        print("1. Try smaller video (< 50MB)")
        print("2. Ensure GPU is enabled")
        print("3. Check your OpenAI API Key")
        print("4. Restart runtime if out of memory")
        
else:
    print("❌ No video file to process. Upload a video or provide a session ID to resume.")


In [None]:
# Download results
from google.colab import files

if 'output_path' in locals() and os.path.exists(output_path):
    print(f"📥 Downloading: {output_filename}")
    files.download(output_path)
    print("✅ Download started! Check your browser downloads.")
else:
    print("❌ No output file. Run dubbing pipeline first.")

# List available files
print("\\n📁 Available files:")
for file in os.listdir('/content'):
    if file.endswith(('.mp4', '.avi', '.mov', '.wav', '.mp3')):
        size = os.path.getsize(f'/content/{file}') / (1024*1024)
        print(f"  {file} ({size:.1f} MB)")
