<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_colab_edition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# üéå WhisperJAV - Colab Edition v1.7.0-beta

**Japanese AV Subtitle Generator** with AI-powered transcription.

### New in 1.7.0:
- üáØüáµ **Kotoba Model** - Japanese-optimized whisper for better dialogue recognition
- üéØ **Ensemble Mode** - Two-pass processing for maximum accuracy
- ‚ö° **Faster Processing** - Improved performance with GPU acceleration

---

### How to Use:
1. **Choose your experience level below** (Quick, Standard, or Advanced)
2. **Run all cells** (`Runtime` ‚Üí `Run all`)
3. **Upload your video** when prompted
4. **Download subtitles** when complete!

---

In [None]:
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "intro_markdown"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_colab_edition.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
        "# WhisperJAV - Colab Edition\n",
        "üéå 2025.09.02: updated to always use the latest version of WhisperJAV release\n",
        "1. Make sure your audios are in drive folder WhisperJAV\n",
        "2. If you want to change the default settings use below form\n",
        "3. Click `Runtime` ‚Üí `Run all` in the menu to start everything.\n",
        "4. **Connect Google Drive** when prompted, then continue, continue.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "master_control_cell",
        "title": "1Ô∏è‚É£ Configure, Run & Disconnect"
      },
      "outputs": [],
      "source": [
        "#@title Configuration & Settings\n",
        "\n",
        "\n",
        "#@markdown Choose accuracy vs. speed level\n",
        "mode = \"balanced\"  #@param [\"balanced\", \"fast\", \"faster\"]\n",
        "\n",
        "#@markdown Select details vs. noise tolerance level\n",
        "sensitivity = \"aggressive\"  #@param [\"balanced\", \"aggressive\", \"conservative\"]\n",
        "\n",
        "#@markdown Select subtitle output format\n",
        "subs_language = \"native\"  #@param [\"native\", \"direct-to-english\"]\n",
        "\n",
        "#@markdown ‚ÑπÔ∏è **Subtitle options:** 'native' = keep in source language (Japanese), 'direct-to-english' = translate to English via Whisper\n",
        "\n",
        "\n",
        "\n",
        "adaptive_classification = False\n",
        "adaptive_audio_enhancement = False\n",
        "smart_postprocessing = True\n",
        "opening_credits = \"Subtitles by yourname\" #@param {type:\"string\"}\n",
        "closing_credits_text = \"Subs by WhisperJAV Colab\"\n",
        "\n",
        "#@markdown ---\n",
        "#@markdown üîå Session Management *Automatically disconnect when finished to save GPU credits.*\n",
        "auto_disconnect = True #@param {type:\"boolean\"}\n",
        "#@markdown ---\n",
        "\n",
        "#===============================================================================\n",
        "#  ‚úÖ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED\n",
        "#===============================================================================\n",
        "import os\n",
        "import sys\n",
        "import subprocess\n",
        "import shlex\n",
        "import time\n",
        "from pathlib import Path\n",
        "import html\n",
        "from google.colab import drive\n",
        "from IPython.display import display, HTML\n",
        "from tqdm.notebook import tqdm\n",
        "\n",
        "print(\"--- STEP 1: PRE-FLIGHT CHECKS ---\")\n",
        "!nvidia-smi --query-gpu=name,driver_version,memory.total,memory.used --format=csv,noheader\n",
        "print(\"‚úÖ GPU check complete.\\n\")\n",
        "\n",
        "print(\"--- STEP 2: CONNECTING GOOGLE DRIVE ---\")\n",
        "try:\n",
        "    drive.mount('/content/drive', force_remount=True)\n",
        "    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')\n",
        "    drive_folder.mkdir(exist_ok=True)\n",
        "    print(f\"‚úÖ Google Drive connected. Using folder: {drive_folder}\\n\")\n",
        "except Exception as e:\n",
        "    display(HTML(f'<div style=\\\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\\\"><h3 style=\\\"color: #721c24;\\\">‚ùå ERROR: Failed to connect Google Drive.</h3><p style=\\\"color: #721c24;\\\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))\n",
        "    sys.exit()\n",
        "\n",
        "# === Installation Snippet ===\n",
        "import torch\n",
        "import subprocess\n",
        "import sys\n",
        "import time\n",
        "from IPython.display import HTML, display\n",
        "\n",
        "def print_step_html(step_num, title):\n",
        "    display(HTML(f\"\"\"\n",
        "    <div style='margin-top:16px;margin-bottom:8px'>\n",
        "        <span style='font-weight:bold;color:#1f77b4'>Step {step_num}:</span>\n",
        "        <span style='margin-left:8px'>{title}</span>\n",
        "    </div>\n",
        "    \"\"\"))\n",
        "\n",
        "def print_status_html(success, message, duration=None):\n",
        "    color = \"#2ecc71\" if success else \"#e74c3c\"\n",
        "    icon = \"‚úî\" if success else \"‚úñ\"\n",
        "    time_str = f\" <span style='color:#7f8c8d;font-size:0.9em'>({duration:.1f}s)</span>\" if duration else \"\"\n",
        "    display(HTML(f\"\"\"\n",
        "    <div style='margin-left:24px;margin-bottom:4px'>\n",
        "        <span style='color:{color};font-weight:bold'>{icon}</span>\n",
        "        <span style='margin-left:6px'>{message}{time_str}</span>\n",
        "    </div>\n",
        "    \"\"\"))\n",
        "\n",
        "def run_install_command(command, success_msg, error_msg):\n",
        "    start_time = time.time()\n",
        "    try:\n",
        "        result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)\n",
        "        print_status_html(True, success_msg, time.time()-start_time)\n",
        "        return True\n",
        "    except subprocess.CalledProcessError as e:\n",
        "        print_status_html(False, f\"{error_msg}\")\n",
        "        print(f\"--- DETAILED ERROR LOG FOR '{success_msg}' ---\\n{e.stderr.strip()}\\n-------------------------------------\", file=sys.stderr)\n",
        "        return False\n",
        "\n",
        "def verify_pytorch():\n",
        "    try:\n",
        "        import torch\n",
        "        import torchvision\n",
        "        import torchaudio\n",
        "        print_status_html(True, f\"PyTorch {torch.__version__}, TorchVision {torchvision.__version__}, TorchAudio {torchaudio.__version__}\")\n",
        "        if torch.cuda.is_available():\n",
        "            print_status_html(True, f\"CUDA {torch.version.cuda} available\")\n",
        "        else:\n",
        "            print_status_html(False, \"CUDA not available - GPU required\")\n",
        "            return False\n",
        "        return True\n",
        "    except ImportError as e:\n",
        "        print_status_html(False, f\"PyTorch check failed: {str(e)}\")\n",
        "        return False\n",
        "\n",
        "def install_whisperjav():\n",
        "    display(HTML(\"<h3 style='color:#1f77b4; border-bottom: 1px solid #ccc; padding-bottom: 5px;'>STEP 3: Installing Dependencies <span style='font-size: 0.9em; font-weight: normal;'>Takes about 3min ‚òï</span></h3>\"))\n",
        "\n",
        "    # 3.1: PyTorch verification\n",
        "    print_step_html(3.1, \"Verifying PyTorch installation\")\n",
        "    if not verify_pytorch():\n",
        "        print_status_html(False, \"Cannot proceed without valid PyTorch/CUDA\")\n",
        "        return False\n",
        "\n",
        "    # 3.2: System dependencies\n",
        "    print_step_html(3.2, \"Installing system packages\")\n",
        "    sys_cmd = \"apt-get update -qq && apt-get install -y -qq portaudio19-dev ffmpeg\"\n",
        "    if not run_install_command(sys_cmd, \"System packages installed\", \"System package install failed\"):\n",
        "        return False\n",
        "\n",
        "    # 3.3: Core Python dependencies (isolated)\n",
        "    print_step_html(3.3, \"Installing Python dependencies (will take a good minute)\")\n",
        "    deps = [\n",
        "        \"tqdm\", \"numba\", \"more-itertools\", \"tiktoken\", \"triton\",\n",
        "        \"ffmpeg-python\", \"soundfile\", \"auditok\", \"numpy\", \"scipy\",\n",
        "        \"pysrt\", \"srt\", \"aiofiles\", \"jsonschema\", \"Pillow\", \"colorama\",\n",
        "        \"librosa\", \"matplotlib\", \"pyloudnorm\", \"requests\", \"transformers\",\n",
        "        \"optimum\", \"accelerate\", \"faster-whisper\"\n",
        "    ]\n",
        "    pip_cmd = f\"pip install -q  {' '.join(deps)}\"\n",
        "    if not run_install_command(pip_cmd, \"Core dependencies installed\", \"Dependency install failed\"):\n",
        "        return False\n",
        "\n",
        "    # 3.4: Specialized components\n",
        "    print_step_html(3.4, \"Installing specialized components\")\n",
        "    components = [\n",
        "        # Whisper (no deps)\n",
        "        (\"pip install -q --no-deps git+https://github.com/openai/whisper.git@main\",\n",
        "         \"OpenAI Whisper\"),\n",
        "\n",
        "        # Stable-TS (with deps)\n",
        "        (\"pip install -q --no-deps git+https://github.com/meizhong986/stable-ts-fix-setup.git@main\",\n",
        "         \"Stable-TS\"),\n",
        "\n",
        "        # WhisperJAV (no deps)\n",
        "        (\"pip install --no-deps -q git+https://github.com/meizhong986/WhisperJAV.git@main\",\n",
        "         \"WhisperJAV\")\n",
        "    ]\n",
        "\n",
        "    for cmd, name in components:\n",
        "        if not run_install_command(cmd, f\"{name} installed\", f\"{name} install failed\"):\n",
        "            return False\n",
        "\n",
        "    # 3.5: Verification\n",
        "    print_step_html(3.5, \"Verifying installation\")\n",
        "    try:\n",
        "        import numpy, torch, torchaudio, torchvision, whisper\n",
        "        display(HTML(f\"\"\"\n",
        "        <div style=\"background:#f8f9fa;padding:12px;border-radius:4px;margin-top:8px\">\n",
        "            <b>‚úÖ Installation Verification:</b><br>\n",
        "            NumPy: {numpy.__version__}<br>\n",
        "            Torch: {torch.__version__} (CUDA: {torch.version.cuda})<br>\n",
        "            TorchAudio: {torchaudio.__version__}<br>\n",
        "            TorchVision: {torchvision.__version__}<br>\n",
        "            Whisper: {whisper.__version__}\n",
        "        </div>\n",
        "        \"\"\"))\n",
        "        print_status_html(True, \"Verification successful\")\n",
        "        return True\n",
        "    except Exception as e:\n",
        "        print_status_html(False, f\"Verification failed: {str(e)}\")\n",
        "        return False\n",
        "\n",
        "if not install_whisperjav():\n",
        "    display(HTML(\"<h3 style='color:#e74c3c'>‚úñ Installation Failed. Notebook halted.</h3>\"))\n",
        "    sys.exit()\n",
        "else:\n",
        "    display(HTML(\"<h3 style='color:#2ecc71'>‚úî Installation Completed</h3>\"))\n",
        "# === End Installation Snippet ===\n",
        "\n",
        "print(\"--- STEP 4: PREPARING TO RUN WHISPERJAV TRANSCRIPTION ---\")\n",
        "# If user doesn't change the example, treat it as empty\n",
        "if opening_credits == \"Subtitles by yourname\": opening_credits = \"\"\n",
        "\n",
        "# Build the command robustly as a list of arguments\n",
        "command_list = [\n",
        "    'whisperjav',\n",
        "    str(drive_folder)\n",
        "]\n",
        "\n",
        "options = {\n",
        "    '--mode': mode,\n",
        "    '--sensitivity': sensitivity,\n",
        "    '--subs-language': subs_language,\n",
        "    '--output-dir': str(drive_folder),\n",
        "    '--adaptive-classification': adaptive_classification,\n",
        "    '--adaptive-audio-enhancement': adaptive_audio_enhancement,\n",
        "    '--smart-postprocessing': smart_postprocessing\n",
        "}\n",
        "\n",
        "for flag, value in options.items():\n",
        "    if isinstance(value, bool):\n",
        "        if value:\n",
        "            command_list.append(flag)\n",
        "    elif value:\n",
        "        command_list.append(flag)\n",
        "        command_list.append(str(value))\n",
        "\n",
        "# Join the list into a shell-safe string to be used with Popen(shell=True)\n",
        "full_command = shlex.join(command_list)\n",
        "print(f\"Executing command: {full_command}\\n\")\n",
        "\n",
        "# Execute with live output and robust error handling\n",
        "try:\n",
        "    with subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, universal_newlines=True) as process:\n",
        "        for line in process.stdout:\n",
        "            print(line, end='')\n",
        "\n",
        "    if process.returncode != 0:\n",
        "        raise subprocess.CalledProcessError(process.returncode, process.args)\n",
        "\n",
        "except subprocess.CalledProcessError as e:\n",
        "    error_message = f\"The main process failed with exit code {e.returncode}.\"\n",
        "    display(HTML(f'''<div style=\\\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\\\"><h3 style=\\\"color: #721c24;\\\">‚ùå ERROR: Transcription Failed</h3><p style=\\\"color: #721c24;\\\">{html.escape(error_message)} Please check the console output above for the specific error from the script.</p></div>'''))\n",
        "    sys.exit()\n",
        "\n",
        "print(\"\\n--- STEP 5: POST-PROCESSING ---\")\n",
        "srt_files = list(drive_folder.glob('*.srt'))\n",
        "\n",
        "if opening_credits or closing_credits_text:\n",
        "    for srt_file in tqdm(srt_files, desc=\"Final post-process\"):\n",
        "        try:\n",
        "            # Read the original content\n",
        "            original_content = srt_file.read_text(encoding='utf-8')\n",
        "\n",
        "            # Initialize the content with the original\n",
        "            new_content = original_content\n",
        "\n",
        "            # Add opening credits if needed\n",
        "            if opening_credits:\n",
        "                prologue_line = f\"0\\n00:00:00,000 --> 00:00:00,500\\n{opening_credits}\\n\\n\"\n",
        "                new_content = prologue_line + new_content\n",
        "\n",
        "            # Add closing credits if needed\n",
        "            if closing_credits_text:\n",
        "                closing_line = f\"\\n9999\\n23:59:58,000 --> 23:59:59,000\\n{closing_credits_text}\\n\"\n",
        "                new_content += closing_line\n",
        "\n",
        "            # Write the updated content back to the file\n",
        "            srt_file.write_text(new_content, encoding='utf-8')\n",
        "\n",
        "        except Exception as e:\n",
        "            print(f\"   - Warning: Could not add credits to {srt_file.name}: {e}\")\n",
        "print(\"‚úÖ Post-processing complete.\\n\")\n",
        "\n",
        "display(HTML(\"\"\"<div style=\\\"background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;\\\"><h3 style=\\\"color: #155724; margin-top: 0;\\\">üéâ Success! All tasks are complete.</h3><p style=\\\"color: #155724; margin-bottom: 0;\\\">The session will now disconnect automatically if you enabled the option.</p></div>\"\"\"))\n",
        "\n",
        "time.sleep(5)  # Add a delay to ensure all file operations are completed\n",
        "\n",
        "if auto_disconnect:\n",
        "    print(\"\\nüîå Auto-disconnect enabled. This session will now end to save resources.\")\n",
        "    time.sleep(10)\n",
        "    from google.colab import runtime\n",
        "    runtime.unassign()"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}

In [None]:
#@title üì¶ Step 2: Installation (Auto-runs)
#@markdown This cell installs WhisperJAV and its dependencies. Takes about 2-3 minutes.

import os
import sys
import subprocess
import time
from pathlib import Path
from IPython.display import display, HTML, clear_output

def show_status(message, status="info", details=None):
    """Display formatted status message"""
    colors = {"success": "#2ecc71", "error": "#e74c3c", "info": "#3498db", "warning": "#f39c12"}
    icons = {"success": "‚úÖ", "error": "‚ùå", "info": "‚ÑπÔ∏è", "warning": "‚ö†Ô∏è"}
    color = colors.get(status, "#333")
    icon = icons.get(status, "")
    html = f'<div style="padding:8px;margin:4px 0;border-left:4px solid {color}"><b>{icon} {message}</b>'
    if details:
        html += f'<br><span style="color:#666;font-size:0.9em">{details}</span>'
    html += '</div>'
    display(HTML(html))

def run_cmd(cmd, desc, silent=True):
    """Run command with error handling"""
    try:
        if silent:
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
        else:
            result = subprocess.run(cmd, shell=True, check=True)
        return True
    except subprocess.CalledProcessError as e:
        show_status(f"{desc} failed", "error", str(e.stderr)[:200] if hasattr(e, 'stderr') else None)
        return False

# ========== INSTALLATION START ==========
display(HTML("<h3>üöÄ Installing WhisperJAV 1.7.0-beta</h3>"))
start_time = time.time()

# 1. GPU Check
show_status("Checking GPU...", "info")
try:
    import torch
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        show_status(f"GPU Available: {gpu_name}", "success")
    else:
        show_status("No GPU detected - CPU mode will be slower", "warning")
except:
    show_status("PyTorch not found, will be installed", "info")

# 2. System packages
show_status("Installing system packages...", "info")
run_cmd("apt-get update -qq && apt-get install -y -qq ffmpeg portaudio19-dev > /dev/null 2>&1", "System packages")
show_status("System packages ready", "success")

# 3. Python dependencies
show_status("Installing Python dependencies... (this takes ~2 min)", "info")
deps = [
    "tqdm numba tiktoken ffmpeg-python soundfile auditok",
    "numpy scipy pysrt srt aiofiles jsonschema Pillow colorama",
    "librosa matplotlib pyloudnorm requests faster-whisper",
    "transformers optimum accelerate huggingface-hub pydantic"
]
for dep_group in deps:
    run_cmd(f"pip install -q {dep_group}", "Dependencies")
show_status("Python dependencies installed", "success")

# 4. WhisperJAV and components
show_status("Installing WhisperJAV components...", "info")
components = [
    ("pip install -q --no-deps git+https://github.com/openai/whisper.git@main", "OpenAI Whisper"),
    ("pip install -q --no-deps git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "Stable-TS"),
    ("pip install -q git+https://github.com/meizhong986/WhisperJAV.git@main", "WhisperJAV"),
]
for cmd, name in components:
    if run_cmd(cmd, name):
        show_status(f"{name} installed", "success")

# 5. Verify installation
show_status("Verifying installation...", "info")
try:
    import whisperjav
    from whisperjav.__version__ import __version_display__
    show_status(f"WhisperJAV {__version_display__} ready!", "success")
    INSTALLED = True
except Exception as e:
    try:
        from whisperjav.__version__ import __version__
        show_status(f"WhisperJAV {__version__} ready!", "success")
        INSTALLED = True
    except:
        show_status("Installation verification failed", "error", str(e))
        INSTALLED = False

elapsed = time.time() - start_time
if INSTALLED:
    display(HTML(f'<div style="background:#d4edda;padding:16px;border-radius:8px;margin-top:16px"><h3 style="color:#155724;margin:0">‚úÖ Installation Complete ({elapsed:.0f}s)</h3><p style="margin:8px 0 0 0">Scroll down to your selected mode to continue.</p></div>'))
else:
    display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px;margin-top:16px"><h3 style="color:#721c24;margin:0">‚ùå Installation Failed</h3><p>Please try running this cell again or check the error messages above.</p></div>'))
    sys.exit(1)

In [None]:
#@title üü¢ QUICK MODE - One-Click Transcription
#@markdown **Best for beginners!** Uses optimal settings for Japanese AV content.
#@markdown
#@markdown Settings: `kotoba-faster-whisper` pipeline + `aggressive` sensitivity

#@markdown ---
#@markdown ### üìÅ File Selection

file_source = "Google Drive" #@param ["Google Drive", "Upload File"]

#@markdown ### üìÇ Google Drive Settings (if using Drive)
drive_folder_name = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# Mount Drive or handle upload
if file_source == "Google Drive":
    print("üìÇ Mounting Google Drive...")
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_name}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
    print(f"‚úÖ Using folder: {input_path}")
    
    # Check for media files
    media_files = list(input_path.glob('*.mp4')) + list(input_path.glob('*.mkv')) + list(input_path.glob('*.avi')) + list(input_path.glob('*.mov'))
    if not media_files:
        display(HTML(f'<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è No video files found</h4><p>Please upload video files to your Google Drive folder: <code>MyDrive/{drive_folder_name}</code></p></div>'))
    else:
        print(f"üìπ Found {len(media_files)} video file(s)")
else:
    print("üì§ Please upload your video file...")
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
        print(f"‚úÖ Uploaded: {filename}")
    else:
        print("‚ùå No file uploaded")
        sys.exit(1)

# Build and run command
print("\n" + "="*60)
print("üéØ Starting transcription with Japanese Expert preset...")
print("   Pipeline: kotoba-faster-whisper (Japanese-optimized)")
print("   Sensitivity: aggressive (catches all dialogue)")
print("="*60 + "\n")

cmd = [
    'whisperjav',
    str(input_path),
    '--mode', 'kotoba-faster-whisper',
    '--sensitivity', 'aggressive',
    '--output-dir', str(output_path)
]

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(
        full_cmd, shell=True,
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        universal_newlines=True, bufsize=1
    )
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3><p>Your subtitle files (.srt) are saved in the output folder.</p></div>'))
        # List output files
        srt_files = list(output_path.glob('*.srt'))
        if srt_files:
            print(f"\nüìÑ Generated {len(srt_files)} subtitle file(s):")
            for f in srt_files:
                print(f"   ‚Ä¢ {f.name}")
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4><p>Check the error messages above.</p></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error</h4><p>{str(e)}</p></div>'))

In [None]:
#@title üü° STANDARD MODE - Preset Selection
#@markdown Choose from optimized presets with the option to tweak settings.

#@markdown ---
#@markdown ### üéØ Select Preset

preset = "\U0001F1EF\U0001F1F5 Japanese Expert (RECOMMENDED)" #@param ["\U0001F680 Quick Scan (~5 min/hour)", "\U0001F1EF\U0001F1F5 Japanese Expert (RECOMMENDED)", "\U0001F3AF Maximum Quality (~20 min/hour)"]

#@markdown ---
#@markdown ### ‚öôÔ∏è Optional Adjustments

override_sensitivity = "Use preset default" #@param ["Use preset default", "conservative", "balanced", "aggressive"]
output_language = "Japanese (native)" #@param ["Japanese (native)", "English (direct translation)"]

#@markdown ---
#@markdown ### üìÅ File Selection

file_source_std = "Google Drive" #@param ["Google Drive", "Upload File"]
drive_folder_std = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# Parse preset to CLI options
preset_configs = {
    "Quick Scan": {
        "mode": "faster",
        "sensitivity": "balanced",
        "ensemble": False,
        "desc": "Fast preview - ~5 min per hour of video"
    },
    "Japanese Expert": {
        "mode": "kotoba-faster-whisper",
        "sensitivity": "aggressive",
        "ensemble": False,
        "desc": "Best for JAV content - ~8 min per hour"
    },
    "Maximum Quality": {
        "mode": "kotoba-faster-whisper",
        "sensitivity": "aggressive",
        "ensemble": True,
        "pass2": "balanced",
        "pass2_sensitivity": "balanced",
        "merge": "smart_merge",
        "desc": "Two-pass ensemble - ~20 min per hour"
    }
}

# Match preset
config = None
for key in preset_configs:
    if key in preset:
        config = preset_configs[key]
        break

if not config:
    config = preset_configs["Japanese Expert"]

print(f"üìã Selected preset: {preset.split(' ', 1)[-1] if ' ' in preset else preset}")
print(f"   {config['desc']}")

# Apply overrides
if override_sensitivity != "Use preset default":
    config["sensitivity"] = override_sensitivity
    print(f"   Sensitivity override: {override_sensitivity}")

subs_lang = "native" if "Japanese" in output_language else "direct-to-english"

# File handling
if file_source_std == "Google Drive":
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_std}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
else:
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
    else:
        sys.exit(1)

# Build command
cmd = ['whisperjav', str(input_path), '--output-dir', str(output_path)]

if config.get('ensemble'):
    cmd.extend(['--ensemble',
                '--pass1-pipeline', config['mode'],
                '--pass1-sensitivity', config['sensitivity'],
                '--pass2-pipeline', config.get('pass2', 'balanced'),
                '--pass2-sensitivity', config.get('pass2_sensitivity', 'balanced'),
                '--merge-strategy', config.get('merge', 'smart_merge')])
else:
    cmd.extend(['--mode', config['mode'], '--sensitivity', config['sensitivity']])

cmd.extend(['--subs-language', subs_lang])

print("\n" + "="*60)
print("üéØ Starting transcription...")
print("="*60 + "\n")

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1)
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3></div>'))
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error: {str(e)}</h4></div>'))

In [None]:
#@title üî¥ ADVANCED MODE - Full Configuration
#@markdown Complete control over all transcription parameters.

#@markdown ---
#@markdown ### üîß Pipeline Configuration

pipeline = "kotoba-faster-whisper" #@param ["faster", "fast", "balanced", "fidelity", "kotoba-faster-whisper"]
sensitivity = "aggressive" #@param ["conservative", "balanced", "aggressive"]
output_language_adv = "native" #@param ["native", "direct-to-english"]

#@markdown ---
#@markdown ### üéØ Ensemble Mode (Two-Pass Processing)

enable_ensemble = False #@param {type:"boolean"}
pass2_pipeline = "balanced" #@param ["faster", "fast", "balanced", "fidelity", "kotoba-faster-whisper"]
pass2_sensitivity = "balanced" #@param ["conservative", "balanced", "aggressive"]
merge_strategy = "smart_merge" #@param ["smart_merge", "full_merge", "pass1_primary", "pass2_primary"]

#@markdown ---
#@markdown ### ‚öôÔ∏è Advanced Options

scene_detection_method = "auditok" #@param ["auditok", "silero"]
disable_vad = False #@param {type:"boolean"}
enable_debug = False #@param {type:"boolean"}

#@markdown ---
#@markdown ### üìÅ File Selection

file_source_adv = "Google Drive" #@param ["Google Drive", "Upload File"]
drive_folder_adv = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# File handling
if file_source_adv == "Google Drive":
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_adv}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
else:
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
    else:
        sys.exit(1)

# Build command
cmd = ['whisperjav', str(input_path), '--output-dir', str(output_path)]

if enable_ensemble:
    cmd.extend([
        '--ensemble',
        '--pass1-pipeline', pipeline,
        '--pass1-sensitivity', sensitivity,
        '--pass2-pipeline', pass2_pipeline,
        '--pass2-sensitivity', pass2_sensitivity,
        '--merge-strategy', merge_strategy
    ])
else:
    cmd.extend(['--mode', pipeline, '--sensitivity', sensitivity])

cmd.extend(['--subs-language', output_language_adv])
cmd.extend(['--scene-detection-method', scene_detection_method])

if disable_vad:
    cmd.append('--no-vad')

if enable_debug:
    cmd.append('--debug')

# Display configuration
print("üìã Advanced Configuration:")
print(f"   Pipeline: {pipeline}")
print(f"   Sensitivity: {sensitivity}")
print(f"   Scene Detection: {scene_detection_method}")
if enable_ensemble:
    print(f"   Ensemble: {pipeline} ‚Üí {pass2_pipeline} ({merge_strategy})")
    print(f"   Pass 2 Sensitivity: {pass2_sensitivity}")
if disable_vad:
    print("   VAD: Disabled")
if enable_debug:
    print("   Debug: Enabled")

print("\n" + "="*60)
print("üéØ Starting advanced transcription...")
print("="*60 + "\n")

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1)
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3></div>'))
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error: {str(e)}</h4></div>'))

In [None]:
#@title üåê TRANSLATION (Optional)
#@markdown Translate your subtitle files to another language using AI.

#@markdown ---
#@markdown ### ‚öôÔ∏è Translation Settings

enable_translation = False #@param {type:"boolean"}
translation_provider = "deepseek" #@param ["deepseek", "openrouter", "gemini", "claude", "gpt"]
target_language = "english" #@param ["english", "indonesian", "spanish", "chinese"]
translation_tone = "standard" #@param ["standard", "pornify"]

#@markdown ---
#@markdown ### üîë API Key
#@markdown Enter your API key for the selected provider:

api_key = "" #@param {type:"string"}

#@markdown ---
#@markdown ### üìÅ SRT Files Location

srt_folder = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive

if not enable_translation:
    print("‚ÑπÔ∏è Translation is disabled. Check the box above to enable.")
else:
    if not api_key:
        display(HTML('<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è API Key Required</h4><p>Please enter your API key for the translation provider.</p></div>'))
    else:
        # Set API key as environment variable (matches whisperjav.translate.providers)
        env_vars = {
            "deepseek": "DEEPSEEK_API_KEY",
            "openrouter": "OPENROUTER_API_KEY",
            "gemini": "GEMINI_API_KEY",
            "claude": "ANTHROPIC_API_KEY",
            "gpt": "OPENAI_API_KEY"
        }
        os.environ[env_vars.get(translation_provider, "API_KEY")] = api_key
        
        # Mount drive and find SRT files
        drive.mount('/content/drive', force_remount=False)
        srt_path = Path(f'/content/drive/MyDrive/{srt_folder}')
        
        srt_files = list(srt_path.glob('*.srt'))
        if not srt_files:
            display(HTML(f'<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è No SRT files found</h4><p>No subtitle files found in: <code>MyDrive/{srt_folder}</code></p></div>'))
        else:
            print(f"üìÑ Found {len(srt_files)} SRT file(s) to translate")
            print(f"   Provider: {translation_provider}")
            print(f"   Target: {target_language}")
            print(f"   Tone: {translation_tone}")
            print("\n" + "="*60 + "\n")
            
            for srt_file in srt_files:
                cmd = f'whisperjav-translate -i "{srt_file}" --provider {translation_provider} --target {target_language} --tone {translation_tone}'
                print(f"Translating: {srt_file.name}")
                try:
                    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
                    if result.returncode == 0:
                        print(f"   ‚úÖ Complete")
                    else:
                        print(f"   ‚ùå Failed: {result.stderr[:100]}")
                except Exception as e:
                    print(f"   ‚ùå Error: {str(e)}")
            
            display(HTML('<div style="background:#d4edda;padding:16px;border-radius:8px;margin-top:16px"><h4 style="color:#155724">‚úÖ Translation Complete</h4><p>Translated files saved with language suffix (e.g., _en.srt)</p></div>'))