# VoiceDub - YouTube Dubbing Backend (GPU)

This notebook runs the dubbing backend on Google Colab's free T4 GPU.

**Setup:**
1. Go to **Runtime > Change runtime type > T4 GPU**
2. Run all cells below
3. Copy the `ngrok` public URL and paste it in your frontend

**Features:** Chatterbox TTS (human-like voice) + Whisper (GPU transcription) + Gemini translation

In [None]:
#@title 1. Check GPU
!nvidia-smi
import torch
print(f"\nCUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

In [None]:
#@title 2. Clone repo & install dependencies
import os

if os.path.exists('/content/app'):
    !cd /content/app && git pull
else:
    !git clone https://github.com/sasmalgiri/youtube-dubbing.git /content/app

# Install build tools
!apt-get -qq install -y libsndfile1 > /dev/null 2>&1

# Install chatterbox deps manually (pip build fails on Colab)
!pip install -q numpy==1.26.4
!pip install -q "librosa>=0.11.0" "s3tokenizer" "torch>=2.6.0" "torchaudio>=2.6.0" \
    "transformers==4.46.3" "diffusers==0.29.0" "resemble-perth>=1.0.1" \
    "conformer>=0.3.2" "safetensors>=0.5.3" "spacy-pkuseg" "pykakasi>=2.3.0" \
    "pyloudnorm" "omegaconf"
!pip install -q chatterbox-tts --no-deps

# Install remaining backend deps
!pip install -q fastapi uvicorn[standard] python-multipart pydantic edge-tts \
    faster-whisper deep-translator google-genai openai elevenlabs \
    sse-starlette rich yt-dlp pyngrok

# Restart runtime so numpy 1.26.4 loads properly
print("\nAll installed! Restarting runtime for numpy fix...")
print("After restart, skip this cell and run cells 3, 4, 5, 6.")
import os
os._exit(0)

In [None]:
#@title 3. Set API Keys + Upload YouTube Cookies
#@markdown Enter your API keys below:

GEMINI_API_KEY = "" #@param {type:"string"}
ELEVENLABS_API_KEY = "" #@param {type:"string"}
OPENAI_API_KEY = "" #@param {type:"string"}
NGROK_AUTH_TOKEN = "" #@param {type:"string"}
NGROK_DOMAIN = "" #@param {type:"string"}

#@markdown ---
#@markdown **Required:** `GEMINI_API_KEY` + `NGROK_AUTH_TOKEN`
#@markdown
#@markdown **Optional:** `NGROK_DOMAIN`, `ELEVENLABS_API_KEY`, `OPENAI_API_KEY`

import os
os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY
if ELEVENLABS_API_KEY:
    os.environ['ELEVENLABS_API_KEY'] = ELEVENLABS_API_KEY
if OPENAI_API_KEY:
    os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

with open('/content/app/backend/.env', 'w') as f:
    f.write(f'GEMINI_API_KEY={GEMINI_API_KEY}\n')
    if ELEVENLABS_API_KEY:
        f.write(f'ELEVENLABS_API_KEY={ELEVENLABS_API_KEY}\n')
    if OPENAI_API_KEY:
        f.write(f'OPENAI_API_KEY={OPENAI_API_KEY}\n')

print("API keys set!")

# Upload YouTube cookies (required — YouTube blocks Colab IPs)
print("\n--- YouTube Cookies ---")
print("YouTube blocks downloads from Colab. You need to upload cookies.")
print("Steps:")
print("  1. Install 'Get cookies.txt LOCALLY' Chrome extension")
print("  2. Go to youtube.com (make sure you're logged in)")
print("  3. Click the extension → Export → saves cookies.txt")
print("  4. Upload it below:\n")

from google.colab import files
uploaded = files.upload()
for name, data in uploaded.items():
    with open('/content/app/backend/cookies.txt', 'wb') as f:
        f.write(data)
    print(f"\nCookies saved! ({name}, {len(data)} bytes)")
    break

In [None]:
#@title 4. Pre-download Whisper model + fix yt-dlp
%cd /content/app/backend

# Install deno (required by yt-dlp for YouTube extraction)
!curl -fsSL https://deno.land/install.sh | sh
import os
os.environ["PATH"] = "/root/.deno/bin:" + os.environ["PATH"]

# Verify yt-dlp works
!yt-dlp --version
print("yt-dlp + deno ready!")

# Pre-download Whisper model
from faster_whisper import WhisperModel
print("\nDownloading Whisper 'small' model...")
model = WhisperModel("small", device="cuda", compute_type="float16")
del model
print("Whisper model cached!")

In [None]:
#@title 5. Start Backend Server + ngrok Tunnel
import subprocess
import time
import os
from pyngrok import ngrok, conf

# Ensure deno is in PATH for yt-dlp
os.environ["PATH"] = "/root/.deno/bin:" + os.environ.get("PATH", "")

# Set ngrok auth token
if NGROK_AUTH_TOKEN:
    conf.get_default().auth_token = NGROK_AUTH_TOKEN
else:
    raise ValueError("Missing NGROK_AUTH_TOKEN")

# Start uvicorn in background (inherits PATH with deno)
proc = subprocess.Popen(
    ['python', '-m', 'uvicorn', 'app:app', '--host', '0.0.0.0', '--port', '8000'],
    cwd='/content/app/backend',
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    env=os.environ.copy(),
)
time.sleep(3)

# Use static domain if provided
if NGROK_DOMAIN:
    public_url = ngrok.connect(8000, "http", domain=NGROK_DOMAIN)
    url = f"https://{NGROK_DOMAIN}"
else:
    public_url = ngrok.connect(8000, "http")
    url = str(public_url)

print("=" * 60)
print("Backend running on GPU!")
print(f"\nPUBLIC URL: {url}")
print(f"\nTest: {url}/api/health")
print("=" * 60)

In [None]:
#@title 6. Monitor Server Logs (run this to see live output)
#@markdown Keep this cell running to see backend logs in real-time.

import time

print("Monitoring server... (this cell keeps running)")
print("Submit a dubbing job from your frontend to see progress here.")
print("-" * 60)

try:
    while proc.poll() is None:
        line = proc.stdout.readline()
        if line:
            print(line.decode('utf-8', errors='replace').rstrip())
        else:
            time.sleep(0.5)
except KeyboardInterrupt:
    print("\nStopped monitoring (server still running)")