<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_colab_parallel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# WhisperJAV Two-Pass Edition v1.8.2

**Adaptive Two-Pass Processing** - Automatically optimizes for your platform.

| Platform | GPUs | How it works |
|----------|------|---------------|
| **Kaggle** | 2x T4 (8GB each) | **Parallel** - Pass 1 on GPU 0, Pass 2 on GPU 1 simultaneously |
| **Colab L4/A100** | 1x GPU (16-24GB) | **Sequential** - Pass 1 first, then Pass 2 (avoids memory issues) |

| Option | What it controls |
|--------|------------------|
| **Speech Segmenter** | How to detect speech in audio (silero, ten, none) |
| **Model** | Which AI model to use (large-v2, large-v3, turbo, kotoba) |

---
<div style="font-size: 8px; line-height: 1.0;">
1. Upload your videos to <code>Google Drive/WhisperJAV/</code><br>
2. Select settings and Click <b>Runtime ‚Üí Run all</b> in the menu<br>
3. <b>Connect Google Drive</b> when prompted<br>
4. Wait for your subtitles!
</div>

<small>The notebook will automatically disconnect when finished to save your GPU credits.</small>

In [None]:
#@title Step 1: Settings { display-mode: "form" }

#@markdown **Pass 1 Configuration**
pass1_quality = "balanced" #@param ["faster", "fast", "balanced", "fidelity", "transformers"]
pass1_sensitivity = "aggressive" #@param ["conservative", "balanced", "aggressive"]
pass1_speech_segmenter = "automatic" #@param ["automatic", "silero", "ten", "none"]
pass1_model = "automatic" #@param ["automatic", "large-v2", "large-v3", "turbo", "kotoba-bilingual", "kotoba-v2.0", "kotoba-v2.1", "kotoba-v2.2"]

#@markdown ---
#@markdown **Pass 2 Configuration**
pass2_quality = "transformers" #@param ["faster", "fast", "balanced", "fidelity", "transformers"]
pass2_sensitivity = "aggressive" #@param ["conservative", "balanced", "aggressive"]
pass2_speech_segmenter = "ten" #@param ["automatic", "silero", "ten", "none"]
pass2_model = "kotoba-bilingual" #@param ["automatic", "large-v2", "large-v3", "turbo", "kotoba-bilingual", "kotoba-v2.0", "kotoba-v2.1", "kotoba-v2.2"]

#@markdown ---
#@markdown **Merge Strategy**
merge_method = "prefer first pass" #@param ["automatic", "keep all", "prefer first pass", "prefer second pass"]

#@markdown ---
#@markdown **Files & Output**
folder_name = "WhisperJAV" #@param {type:"string"}
subtitle_language = "Japanese" #@param ["Japanese", "English (auto-translate)", "English (AI translate)"]

#@markdown ---
#@markdown **AI Translation** *(if selected "English (AI translate)")*
translation_service = "local" #@param ["local", "deepseek", "openrouter", "gemini", "claude", "gpt"]
local_model = "gemma-9b" #@param ["gemma-9b", "llama-8b", "llama-3b", "auto"]
#@markdown <font size="1">local: Free, runs on GPU. gemma-9b (8GB+ VRAM), llama-8b (6GB+), llama-3b (3GB+). Cloud providers require API key.</font>
api_key = "" #@param {type:"string"}
translation_style = "standard" #@param ["standard", "explicit"]

#@markdown ---
#@markdown **Credits**
opening_credit = "" #@param {type:"string"}
closing_credit = "Subs by WhisperJAV" #@param {type:"string"}

#@markdown ---
#@markdown **Session**
auto_disconnect = True #@param {type:"boolean"}
#@markdown ‚òùÔ∏è Auto-disconnect when done (saves GPU credits)

# Mapping dictionaries
combine_map = {"automatic": "smart_merge", "keep all": "full_merge",
               "prefer first pass": "pass1_primary", "prefer second pass": "pass2_primary"}
language_map = {"Japanese": "native", "English (auto-translate)": "direct-to-english",
                "English (AI translate)": "llm"}
tone_map = {"standard": "standard", "explicit": "pornify"}

# Speech segmenter mapping (None = use pipeline default)
segmenter_map = {"automatic": None, "silero": "silero", "ten": "ten", "none": "none"}

# Model mapping (None = use pipeline default)
model_map = {
    "automatic": None,
    "large-v2": "large-v2",
    "large-v3": "large-v3",
    "turbo": "large-v3-turbo",
    "kotoba-bilingual": "kotoba-tech/kotoba-whisper-bilingual-v1.0",
    "kotoba-v2.0": "kotoba-tech/kotoba-whisper-v2.0",
    "kotoba-v2.1": "kotoba-tech/kotoba-whisper-v2.1",
    "kotoba-v2.2": "kotoba-tech/kotoba-whisper-v2.2"
}

# Define model compatibility:
# - Kotoba models (HuggingFace) ONLY work with "transformers" pipeline
# - Legacy models (large-v2/v3/turbo) work with ALL pipelines (faster, fast, balanced, fidelity, transformers)
KOTOBA_MODELS = {"kotoba-bilingual", "kotoba-v2.0", "kotoba-v2.1", "kotoba-v2.2"}
LEGACY_PIPELINES = {"faster", "fast", "balanced", "fidelity"}

# Auto-correct incompatible model-pipeline combinations
warnings_list = []

# Check Pass 1 compatibility
if pass1_model in KOTOBA_MODELS and pass1_quality in LEGACY_PIPELINES:
    warnings_list.append(f"Pass 1: {pass1_model} requires 'transformers' pipeline. Auto-correcting from '{pass1_quality}' to 'transformers'.")
    pass1_quality = "transformers"

# Check Pass 2 compatibility
if pass2_model in KOTOBA_MODELS and pass2_quality in LEGACY_PIPELINES:
    warnings_list.append(f"Pass 2: {pass2_model} requires 'transformers' pipeline. Auto-correcting from '{pass2_quality}' to 'transformers'.")
    pass2_quality = "transformers"

WHISPERJAV_CONFIG = {
    'pass1_pipeline': pass1_quality,
    'pass1_sensitivity': pass1_sensitivity,
    'pass1_speech_segmenter': segmenter_map[pass1_speech_segmenter],
    'pass1_model': model_map[pass1_model],
    'pass2_pipeline': pass2_quality,
    'pass2_sensitivity': pass2_sensitivity,
    'pass2_speech_segmenter': segmenter_map[pass2_speech_segmenter],
    'pass2_model': model_map[pass2_model],
    'merge_strategy': combine_map[merge_method],
    'folder_name': folder_name,
    'subtitle_language': language_map[subtitle_language],
    'translation_service': translation_service,
    'local_model': local_model,
    'api_key': api_key,
    'translation_style': tone_map[translation_style],
    'opening_credit': opening_credit,
    'closing_credit': closing_credit,
    'auto_disconnect': auto_disconnect,
    # Display values
    '_pass1_quality': pass1_quality,
    '_pass1_sensitivity': pass1_sensitivity,
    '_pass1_speech_segmenter': pass1_speech_segmenter,
    '_pass1_model': pass1_model,
    '_pass2_quality': pass2_quality,
    '_pass2_sensitivity': pass2_sensitivity,
    '_pass2_speech_segmenter': pass2_speech_segmenter,
    '_pass2_model': pass2_model,
    '_merge_method': merge_method,
    '_subtitle_language': subtitle_language,
    '_translation_style': translation_style,
}

from IPython.display import display, HTML

# Display any auto-correction warnings
for warning in warnings_list:
    display(HTML(f'<div style="padding:6px 10px;background:#fef9c3;border-radius:4px;font-size:10px;margin-bottom:4px"><b>‚ö† Auto-corrected:</b> {warning}</div>'))

# Build status display
p1_info = f"{pass1_quality}"
if pass1_speech_segmenter != "automatic":
    p1_info += f"/{pass1_speech_segmenter}"
if pass1_model != "automatic":
    p1_info += f"/{pass1_model}"

p2_info = f"{pass2_quality}"
if pass2_speech_segmenter != "automatic":
    p2_info += f"/{pass2_speech_segmenter}"
if pass2_model != "automatic":
    p2_info += f"/{pass2_model}"

display(HTML(f'<div style="padding:6px 10px;background:#e0f2fe;border-radius:4px;font-size:10px"><b>Parallel Mode:</b> Pass1({p1_info}) ‚áÜ Pass2({p2_info}) | Merge: {merge_method} | Folder: {folder_name}</div>'))

In [None]:
#@title Step 2: Two-Pass Transcribe { display-mode: "form" }
#@markdown Connect Drive ‚Üí Install ‚Üí Run passes (parallel on Kaggle, sequential on Colab) ‚Üí Merge results

import os, sys, subprocess, shlex, time, re
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from IPython.display import display, HTML, clear_output
from dataclasses import dataclass
from typing import List, Dict, Any, Tuple

def status(msg, ok=True):
    icon = "‚úì" if ok else "‚úó"
    print(f"{icon} {msg}")

def section(title):
    print(f"\n{'‚îÄ'*50}\n{title}\n{'‚îÄ'*50}")

# Check config
if 'WHISPERJAV_CONFIG' not in dir():
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> Run Step 1 first</div>'))
    raise SystemExit()
cfg = WHISPERJAV_CONFIG

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PRE-FLIGHT CHECKS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("PRE-FLIGHT CHECKS")

# Python version check - WhisperJAV requires Python 3.10-3.12
py_version = sys.version_info
print(f"Python: {sys.version.split()[0]}")
if py_version >= (3, 13):
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> Python 3.13+ not supported. WhisperJAV requires Python 3.10-3.12 (openai-whisper incompatible)</div>'))
    raise SystemExit(f"Python {sys.version.split()[0]} not supported")
elif py_version < (3, 10):
    raise SystemExit(f"Python {sys.version.split()[0]} too old. Requires 3.10-3.12.")
status(f"Python version OK")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# CONNECT GOOGLE DRIVE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("CONNECTING GOOGLE DRIVE")
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    folder_path = Path(f"/content/drive/MyDrive/{cfg['folder_name']}")
    folder_path.mkdir(parents=True, exist_ok=True)
    status(f"Connected: {folder_path}")
except Exception as e:
    status(f"Failed to connect: {e}", False)
    raise SystemExit()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# CHECK GPUs AND DETERMINE MODE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("DETECTING PLATFORM")
gpu_check = subprocess.run("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader", shell=True, capture_output=True, text=True)
if gpu_check.returncode != 0 or not gpu_check.stdout.strip():
    status("No GPU detected. Go to Runtime ‚Üí Change runtime type ‚Üí T4 GPU", False)
    raise SystemExit()

gpu_lines = [line.strip() for line in gpu_check.stdout.strip().split('\n') if line.strip()]
num_gpus = len(gpu_lines)

for i, gpu_info in enumerate(gpu_lines):
    status(f"GPU {i}: {gpu_info}")

# Adaptive mode selection
if num_gpus >= 2:
    PARALLEL_MODE = True
    gpu_assignment = {1: "0", 2: "1"}
    print(f"\n  ‚ö° Kaggle Mode: PARALLEL (Pass 1 ‚Üí GPU 0, Pass 2 ‚Üí GPU 1)")
else:
    PARALLEL_MODE = False
    gpu_assignment = {1: "0", 2: "0"}
    print(f"\n  üìù Colab Mode: SEQUENTIAL (avoids memory contention)")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# INSTALL WHISPERJAV
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("INSTALLING WHISPERJAV")
install_start = time.time()

REPO_URL = "https://github.com/meizhong986/WhisperJAV.git"
REPO_PATH = "/content/WhisperJAV"
SCRIPT_PATH = f"{REPO_PATH}/installer/install_colab.sh"
VENV_PATH = "/content/whisperjav_env"

# Logic only needed for Colab - Kaggle installs globally
if os.path.exists("/kaggle"):
    # Kaggle fallback (simplified manual install since root is allowed)
    steps = [
        ("apt-get update -qq && apt-get install -y -qq ffmpeg portaudio19-dev libc++1 libc++abi1 libsndfile1 libgl1 > /dev/null 2>&1", "System tools"),
        ("pip install -q whisperjav[cli,enhance,translate,huggingface,analysis,compatibility] @ git+https://github.com/meizhong986/WhisperJAV.git@main", "WhisperJAV + Deps")
    ]
    for cmd, name in steps:
        if subprocess.run(cmd, shell=True).returncode != 0: raise SystemExit(f"{name} failed")
    
    cfg['whisperjav_cmd'] = "whisperjav"
    # Kaggle doesn't use venv, assumes path is correct
    status("Installation complete (Kaggle)")

else:
    # Colab: Use install_colab.sh for isolated environment
    def run_installer():
        env = {**os.environ, "PATH": f"{os.environ.get('PATH', '')}:{os.path.expanduser('~/.local/bin')}"}
        process = subprocess.Popen(["bash", SCRIPT_PATH], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, text=True, env=env)
        for line in iter(process.stdout.readline, ''): print(line, end='', flush=True)
        process.wait()
        return process.returncode

    # Check/Install
    venv_python = f"{VENV_PATH}/bin/python"
    if os.path.exists(venv_python) and subprocess.run([venv_python, "-c", "import whisperjav"], capture_output=True).returncode == 0:
        status("WhisperJAV already installed (skipping)")
    else:
        if os.path.exists(VENV_PATH): subprocess.run(["rm", "-rf", VENV_PATH])
        if not os.path.exists(REPO_PATH): subprocess.run(["git", "clone", REPO_URL, REPO_PATH])
        if run_installer() != 0: raise SystemExit("Installation failed")

    # Update config to use venv binary
    cfg['whisperjav_cmd'] = f"{VENV_PATH}/bin/whisperjav"
    status(f"Installation complete ({time.time()-install_start:.0f}s)")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# FIND MEDIA FILES
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("SCANNING FILES")
video_types = {'.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm', '.m4v', '.mp3', '.wav', '.flac', '.m4a'}
videos = [f for f in folder_path.iterdir() if f.suffix.lower() in video_types]

if not videos:
    status(f"No media files in {cfg['folder_name']}/", False)
    raise SystemExit()

status(f"Found {len(videos)} file(s)")
for v in videos[:5]:
    print(f"  ‚Ä¢ {v.name}")
if len(videos) > 5:
    print(f"  ... and {len(videos)-5} more")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# MERGE FUNCTIONS (from whisperjav/ensemble/merge.py)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

@dataclass
class Subtitle:
    index: int
    start_time: float
    end_time: float
    text: str

    @property
    def duration(self) -> float:
        return self.end_time - self.start_time

def parse_srt(path: Path) -> List[Subtitle]:
    subs = []
    if not path.exists(): return []
    
    content = path.read_text(encoding='utf-8').strip()
    # Simple regex parser for SRT
    pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n((?:(?!\n\n).)*)', re.DOTALL)
    
    def time_to_seconds(t_str):
        h, m, s, ms = map(int, re.split(r'[:,]', t_str))
        return h * 3600 + m * 60 + s + ms / 1000.0

    for match in pattern.finditer(content):
        idx, start, end, text = match.groups()
        subs.append(Subtitle(
            index=int(idx),
            start_time=time_to_seconds(start),
            end_time=time_to_seconds(end),
            text=text.strip()
        ))
    return subs

def merge_srt(path1: Path, path2: Path, output_path: Path, strategy: str = "smart_merge") -> Dict:
    """Simplified merge implementation for the notebook context."""
    subs1 = parse_srt(path1)
    subs2 = parse_srt(path2)
    
    # Strategy implementation
    if strategy == "pass1_primary":
        final_subs = subs1
    elif strategy == "pass2_primary":
        final_subs = subs2
    else:
        # Simple concatenation for smart merge fallback in this script
        # In real library we use proper overlap detection
        final_subs = subs1 + subs2
        final_subs.sort(key=lambda s: s.start_time)
        
        # Re-index
        for i, sub in enumerate(final_subs, 1):
            sub.index = i
            
    # Write output
    with open(output_path, 'w', encoding='utf-8') as f:
        for sub in final_subs:
            start_h = int(sub.start_time // 3600)
            start_m = int((sub.start_time % 3600) // 60)
            start_s = int(sub.start_time % 60)
            start_ms = int((sub.start_time * 1000) % 1000)
            
            end_h = int(sub.end_time // 3600)
            end_m = int((sub.end_time % 3600) // 60)
            end_s = int(sub.end_time % 60)
            end_ms = int((sub.end_time * 1000) % 1000)
            
            f.write(f"{sub.index}\n")
            f.write(f"{start_h:02d}:{start_m:02d}:{start_s:02d},{start_ms:03d} --> {end_h:02d}:{end_m:02d}:{end_s:02d},{end_ms:03d}\n")
            f.write(f"{sub.text}\n\n")
            
    return {
        'pass1_count': len(subs1),
        'pass2_count': len(subs2),
        'merged_count': len(final_subs)
    }

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# EXECUTION ENGINE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

def build_pass_command(pass_num: int, video_path: Path, output_dir: Path, cfg: dict) -> Tuple[List[str], Path]:
    """Build whisperjav command for a single pass.

    Note: WhisperJAV doesn't have --output-name, so we use separate directories
    for each pass to avoid conflicts when running in parallel.
    Output naming is automatic: {basename}.{lang_code}.whisperjav.srt
    """
    # Use separate directory for each pass to avoid conflicts
    pass_output_dir = output_dir / f"pass{pass_num}"
    pass_output_dir.mkdir(parents=True, exist_ok=True)

    pipeline = cfg[f'pass{pass_num}_pipeline']
    sensitivity = cfg[f'pass{pass_num}_sensitivity']
    segmenter = cfg[f'pass{pass_num}_speech_segmenter']
    model = cfg[f'pass{pass_num}_model']

    # Use configured command (venv) or default
    executable = cfg.get('whisperjav_cmd', 'whisperjav')
    cmd = [executable, str(video_path), '--output-dir', str(pass_output_dir),
           '--mode', pipeline, '--sensitivity', sensitivity]

    if segmenter:
        cmd.extend(['--speech-segmenter', segmenter])

    if model:
        cmd.extend(['--model', model])

    if cfg['subtitle_language'] == 'direct-to-english':
        cmd.extend(['--subs-language', 'direct-to-english'])
    else:
        cmd.extend(['--subs-language', 'native'])

    # Return the pass output directory - we'll find the SRT file after processing
    return cmd, pass_output_dir

def find_output_srt(pass_output_dir: Path, video_name: str) -> Path:
    """Find the generated SRT file in the pass output directory.

    WhisperJAV auto-generates: {basename}.{lang}.whisperjav.srt
    e.g., video.ja.whisperjav.srt or video.en.whisperjav.srt
    """
    base_name = Path(video_name).stem
    # Look for any SRT file matching the video name
    patterns = [
        f"{base_name}.*.whisperjav.srt",  # Standard format
        f"{base_name}.srt",                # Fallback
        f"{base_name}*.srt",               # Any SRT with base name
    ]
    for pattern in patterns:
        matches = list(pass_output_dir.glob(pattern))
        if matches:
            return matches[0]
    # Last resort: any SRT in directory
    all_srts = list(pass_output_dir.glob("*.srt"))
    return all_srts[0] if all_srts else None

def run_pass(pass_num: int, video: Path, output_dir: Path, cfg: dict, gpu_id: str) -> Dict:
    """Run a single pass on a specific GPU."""
    cmd, pass_output_dir = build_pass_command(pass_num, video, output_dir, cfg)

    env = os.environ.copy()
    env['CUDA_VISIBLE_DEVICES'] = gpu_id

    start_time = time.time()
    result = subprocess.run(shlex.join(cmd), shell=True, capture_output=True, text=True, env=env)
    elapsed = time.time() - start_time

    # Find the output SRT file
    actual_output = find_output_srt(pass_output_dir, video.name)

    return {
        'pass': pass_num,
        'video': video.name,
        'success': result.returncode == 0 and actual_output and actual_output.exists(),
        'output': actual_output,
        'output_dir': pass_output_dir,
        'elapsed': elapsed,
        'gpu': gpu_id,
        'stderr': result.stderr[-500:] if result.stderr else ''  # Last 500 chars for debugging
    }

# Process each video
all_results = []
merged_outputs = []

for video_idx, video in enumerate(videos, 1):
    print(f"\n[{video_idx}/{len(videos)}] Processing: {video.name}")

    results = {}

    if PARALLEL_MODE:
        # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
        # KAGGLE: Run both passes in parallel on separate GPUs
        # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = {
                executor.submit(run_pass, 1, video, folder_path, cfg, gpu_assignment[1]): 1,
                executor.submit(run_pass, 2, video, folder_path, cfg, gpu_assignment[2]): 2
            }

            for future in as_completed(futures):
                pass_num = futures[future]
                result = future.result()
                results[pass_num] = result
                status_icon = "‚úì" if result['success'] else "‚úó"
                print(f"    {status_icon} Pass {pass_num} (GPU {result['gpu']}): {result['elapsed']:.1f}s")
                if not result['success'] and result['stderr']:
                    print(f"        Error: {result['stderr'][:200]}")
    else:
        # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
        # COLAB: Run passes sequentially on same GPU
        # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
        for pass_num in [1, 2]:
            result = run_pass(pass_num, video, folder_path, cfg, gpu_assignment[pass_num])
            results[pass_num] = result
            status_icon = "‚úì" if result['success'] else "‚úó"
            print(f"    {status_icon} Pass {pass_num}: {result['elapsed']:.1f}s")
            if not result['success'] and result['stderr']:
                print(f"        Error: {result['stderr'][:200]}")

    # Merge results if both passes succeeded
    if results[1]['success'] and results[2]['success']:
        merged_output = folder_path / f"{video.stem}.merged.whisperjav.srt"
        stats = merge_srt(results[1]['output'], results[2]['output'], merged_output, cfg['merge_strategy'])
        print(f"    ‚úì Merged: {stats['pass1_count']} + {stats['pass2_count']} ‚Üí {stats['merged_count']} subtitles")
        merged_outputs.append(merged_output)
    else:
        # Use whichever pass succeeded
        for p in [1, 2]:
            if results[p]['success']:
                # Copy to main folder with consistent naming
                final_output = folder_path / f"{video.stem}.whisperjav.srt"
                import shutil
                shutil.copy2(results[p]['output'], final_output)
                merged_outputs.append(final_output)
                print(f"    ‚ö† Using Pass {p} only (other pass failed)")
                break
        else:
            print(f"    ‚úó Both passes failed!")

    all_results.append(results)

# Store for Step 3
WHISPERJAV_NEW_SRTS = merged_outputs
WHISPERJAV_FOLDER_PATH = folder_path

status(f"\nCreated {len(merged_outputs)} merged subtitle file(s)")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# ADD CREDITS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("ADDING CREDITS")

if cfg['opening_credit'] or cfg['closing_credit']:
    credits_count = 0
    for srt_file in merged_outputs:
        try:
            content = srt_file.read_text(encoding='utf-8')
            if cfg['opening_credit']:
                content = f"0\n00:00:00,000 --> 00:00:00,500\n{cfg['opening_credit']}\n\n" + content
            if cfg['closing_credit']:
                content += f"\n9999\n23:59:58,000 --> 23:59:59,000\n{cfg['closing_credit']}\n"
            srt_file.write_text(content, encoding='utf-8')
            credits_count += 1
        except Exception as e:
            print(f"  Warning: Could not add credits to {srt_file.name}: {e}")
    status(f"Credits added to {credits_count} file(s)")
else:
    status("No credits configured")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# COMPLETE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
section("TWO-PASS TRANSCRIPTION COMPLETE")

mode_text = "parallel" if PARALLEL_MODE else "sequential"
if cfg['subtitle_language'] == 'llm' and cfg['api_key']:
    display(HTML(f'<div style="padding:8px 10px;background:#fef9c3;border-radius:4px;border-left:2px solid #ca8a04;font-size:10px"><b>‚úì Transcription done ({mode_text})!</b> {len(merged_outputs)} file(s). AI Translation will start next...</div>'))
else:
    display(HTML(f'<div style="padding:8px 10px;background:#f0fdf4;border-radius:4px;border-left:2px solid #16a34a;font-size:10px"><b>‚úì Done ({mode_text})!</b> {len(merged_outputs)} subtitle(s) saved to Google Drive/{cfg["folder_name"]}/</div>'))
    if cfg['subtitle_language'] == 'llm' and not cfg['api_key']:
        print("Note: AI translation skipped (no API key provided)")

    if cfg['auto_disconnect']:
        print("\nAuto-disconnecting in 10s to save GPU credits...")
        time.sleep(10)
        try:
            from google.colab import runtime
            runtime.unassign()
        except: pass

In [None]:
#@title Step 3: AI Translation (if selected) { display-mode: "form" }
#@markdown Translate each subtitle file using AI (only runs if "English (AI translate)" selected)

import os, sys, subprocess, shlex, time
from pathlib import Path
from IPython.display import display, HTML

def status(msg, ok=True):
    icon = "‚úì" if ok else "‚úó"
    print(f"{icon} {msg}")

def section(title):
    print(f"\n{'‚îÄ'*40}\n{title}\n{'‚îÄ'*40}")

# Check prerequisites
if 'WHISPERJAV_CONFIG' not in dir():
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> Run Step 1 first</div>'))
    raise SystemExit()

if 'WHISPERJAV_NEW_SRTS' not in dir():
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> Run Step 2 first</div>'))
    raise SystemExit()

cfg = WHISPERJAV_CONFIG
new_srts = WHISPERJAV_NEW_SRTS
folder_path = WHISPERJAV_FOLDER_PATH

# Check if AI translation is needed
if cfg['subtitle_language'] != 'llm':
    display(HTML('<div style="padding:8px 10px;background:#f0f9ff;border-radius:4px;border-left:2px solid #3b82f6;font-size:10px"><b>‚Ñπ Skipped:</b> AI translation not selected</div>'))
    raise SystemExit()

# Check API key requirement (not needed for local provider)
is_local = cfg['translation_service'] == 'local'
if not is_local and not cfg['api_key']:
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> No API key provided for cloud translation. Use "local" provider for free GPU translation.</div>'))
    raise SystemExit()

if not new_srts:
    display(HTML('<div style="padding:8px;background:#fef2f2;border-radius:4px;color:#991b1b;font-size:10px"><b>Error:</b> No subtitle files to translate</div>'))
    raise SystemExit()

# Set up API key (not needed for local)
if not is_local:
    env_map = {
        "deepseek": "DEEPSEEK_API_KEY",
        "openrouter": "OPENROUTER_API_KEY",
        "gemini": "GEMINI_API_KEY",
        "claude": "ANTHROPIC_API_KEY",
        "gpt": "OPENAI_API_KEY"
    }
    os.environ[env_map.get(cfg['translation_service'], "API_KEY")] = cfg['api_key']

# Translate each SRT file
section("AI TRANSLATION")
if is_local:
    print(f"Provider: local ({cfg.get('local_model', 'gemma-9b')})")
    print("Note: First run downloads model (~5GB) and llama-cpp-python (~700MB)")
else:
    print(f"Provider: {cfg['translation_service']}")
print(f"Style: {cfg['_translation_style']}")
print(f"Files to translate: {len(new_srts)}\n")

translated_files = []
failed_files = []

for i, srt_file in enumerate(new_srts, 1):
    print(f"[{i}/{len(new_srts)}] Translating: {srt_file.name}")

    translate_cmd = [
        'whisperjav-translate',
        '-i', str(srt_file),
        '--provider', cfg['translation_service'],
        '-t', 'english',
        '--tone', cfg['translation_style'],
        '--stream'
    ]

    # Add model for local provider
    if is_local:
        translate_cmd.extend(['--model', cfg.get('local_model', 'gemma-9b')])

    full_cmd = shlex.join(translate_cmd)

    try:
        process = subprocess.Popen(
            full_cmd,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            bufsize=1,
            universal_newlines=True
        )

        for line in process.stderr:
            print(f"    {line}", end='')

        stdout_output, _ = process.communicate()

        if process.returncode == 0:
            output_path = stdout_output.strip()
            if output_path:
                translated_files.append(Path(output_path))
            status(f"Completed: {srt_file.name}")
        else:
            status(f"Failed: {srt_file.name}", False)
            failed_files.append(srt_file)

    except Exception as e:
        status(f"Error translating {srt_file.name}: {e}", False)
        failed_files.append(srt_file)

    print()

# Complete
section("COMPLETE")

if failed_files:
    display(HTML(f'<div style="padding:8px 10px;background:#fef9c3;border-radius:4px;border-left:2px solid #ca8a04;font-size:10px"><b>‚ö† Partially done!</b> {len(translated_files)}/{len(new_srts)} translated. {len(failed_files)} failed.</div>'))
else:
    display(HTML(f'<div style="padding:8px 10px;background:#f0fdf4;border-radius:4px;border-left:2px solid #16a34a;font-size:10px"><b>‚úì All done!</b> {len(new_srts)} Japanese + {len(translated_files)} English subtitle(s) in Google Drive/{cfg["folder_name"]}/</div>'))

# Auto-disconnect
if cfg['auto_disconnect']:
    print("\nAuto-disconnecting in 10s to save GPU credits...")
    time.sleep(10)
    try:
        from google.colab import runtime
        runtime.unassign()
    except: pass
else:
    print("\nRemember to disconnect manually to save GPU credits.")