<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_colab_edition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# üéå WhisperJAV - Colab Edition v1.7.0-beta

**Japanese AV Subtitle Generator** with AI-powered transcription.

### New in 1.7.0:
- üáØüáµ **Kotoba Model** - Japanese-optimized whisper for better dialogue recognition
- üéØ **Ensemble Mode** - Two-pass processing for maximum accuracy
- ‚ö° **Faster Processing** - Improved performance with GPU acceleration

---

### How to Use:
1. **Choose your experience level below** (Quick, Standard, or Advanced)
2. **Run all cells** (`Runtime` ‚Üí `Run all`)
3. **Upload your video** when prompted
4. **Download subtitles** when complete!

---

In [None]:
#@title üéöÔ∏è Step 1: Choose Your Experience Level

#@markdown ### Select how you want to use WhisperJAV:

experience_level = "\U0001F7E2 Quick & Easy (Recommended for beginners)" #@param ["\U0001F7E2 Quick & Easy (Recommended for beginners)", "\U0001F7E1 Standard Mode (More control)", "\U0001F534 Advanced Mode (Full power)"]

#@markdown ---
#@markdown | Level | Description | Best For |
#@markdown |-------|-------------|----------|
#@markdown | üü¢ Quick & Easy | One-click transcription with optimal defaults | Beginners |
#@markdown | üü° Standard | Choose from presets, adjust sensitivity | Regular users |
#@markdown | üî¥ Advanced | Full control over all parameters | Power users |

# Parse experience level
if "Quick" in experience_level:
    MODE = "quick"
    print("‚úÖ Quick Mode selected - Using Japanese Expert preset (kotoba + aggressive)")
    print("   This is the best setting for most JAV content!")
elif "Standard" in experience_level:
    MODE = "standard"
    print("‚úÖ Standard Mode selected - You can choose from presets below")
else:
    MODE = "advanced"
    print("‚úÖ Advanced Mode selected - Full configuration available")

print(f"\n‚Üí Scroll down to the {MODE.upper()} MODE cell to continue")

In [None]:
#@title üì¶ Step 2: Installation (Auto-runs)
#@markdown This cell installs WhisperJAV and its dependencies. Takes about 2-3 minutes.

import os
import sys
import subprocess
import time
from pathlib import Path
from IPython.display import display, HTML, clear_output

def show_status(message, status="info", details=None):
    """Display formatted status message"""
    colors = {"success": "#2ecc71", "error": "#e74c3c", "info": "#3498db", "warning": "#f39c12"}
    icons = {"success": "‚úÖ", "error": "‚ùå", "info": "‚ÑπÔ∏è", "warning": "‚ö†Ô∏è"}
    color = colors.get(status, "#333")
    icon = icons.get(status, "")
    html = f'<div style="padding:8px;margin:4px 0;border-left:4px solid {color}"><b>{icon} {message}</b>'
    if details:
        html += f'<br><span style="color:#666;font-size:0.9em">{details}</span>'
    html += '</div>'
    display(HTML(html))

def run_cmd(cmd, desc, silent=True):
    """Run command with error handling"""
    try:
        if silent:
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
        else:
            result = subprocess.run(cmd, shell=True, check=True)
        return True
    except subprocess.CalledProcessError as e:
        show_status(f"{desc} failed", "error", str(e.stderr)[:200] if hasattr(e, 'stderr') else None)
        return False

# ========== INSTALLATION START ==========
display(HTML("<h3>üöÄ Installing WhisperJAV 1.7.0-beta</h3>"))
start_time = time.time()

# 1. GPU Check
show_status("Checking GPU...", "info")
try:
    import torch
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        show_status(f"GPU Available: {gpu_name}", "success")
    else:
        show_status("No GPU detected - CPU mode will be slower", "warning")
except:
    show_status("PyTorch not found, will be installed", "info")

# 2. System packages
show_status("Installing system packages...", "info")
run_cmd("apt-get update -qq && apt-get install -y -qq ffmpeg portaudio19-dev > /dev/null 2>&1", "System packages")
show_status("System packages ready", "success")

# 3. Python dependencies
show_status("Installing Python dependencies... (this takes ~2 min)", "info")
deps = [
    "tqdm numba tiktoken ffmpeg-python soundfile auditok",
    "numpy scipy pysrt srt aiofiles jsonschema Pillow colorama",
    "librosa matplotlib pyloudnorm requests faster-whisper",
    "transformers optimum accelerate huggingface-hub pydantic"
]
for dep_group in deps:
    run_cmd(f"pip install -q {dep_group}", "Dependencies")
show_status("Python dependencies installed", "success")

# 4. WhisperJAV and components
show_status("Installing WhisperJAV components...", "info")
components = [
    ("pip install -q --no-deps git+https://github.com/openai/whisper.git@main", "OpenAI Whisper"),
    ("pip install -q --no-deps git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "Stable-TS"),
    ("pip install -q git+https://github.com/meizhong986/WhisperJAV.git@main", "WhisperJAV"),
]
for cmd, name in components:
    if run_cmd(cmd, name):
        show_status(f"{name} installed", "success")

# 5. Verify installation
show_status("Verifying installation...", "info")
try:
    import whisperjav
    from whisperjav.__version__ import __version_display__
    show_status(f"WhisperJAV {__version_display__} ready!", "success")
    INSTALLED = True
except Exception as e:
    try:
        from whisperjav.__version__ import __version__
        show_status(f"WhisperJAV {__version__} ready!", "success")
        INSTALLED = True
    except:
        show_status("Installation verification failed", "error", str(e))
        INSTALLED = False

elapsed = time.time() - start_time
if INSTALLED:
    display(HTML(f'<div style="background:#d4edda;padding:16px;border-radius:8px;margin-top:16px"><h3 style="color:#155724;margin:0">‚úÖ Installation Complete ({elapsed:.0f}s)</h3><p style="margin:8px 0 0 0">Scroll down to your selected mode to continue.</p></div>'))
else:
    display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px;margin-top:16px"><h3 style="color:#721c24;margin:0">‚ùå Installation Failed</h3><p>Please try running this cell again or check the error messages above.</p></div>'))
    sys.exit(1)

In [None]:
#@title üü¢ QUICK MODE - One-Click Transcription
#@markdown **Best for beginners!** Uses optimal settings for Japanese AV content.
#@markdown
#@markdown Settings: `kotoba-faster-whisper` pipeline + `aggressive` sensitivity

#@markdown ---
#@markdown ### üìÅ File Selection

file_source = "Google Drive" #@param ["Google Drive", "Upload File"]

#@markdown ### üìÇ Google Drive Settings (if using Drive)
drive_folder_name = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# Mount Drive or handle upload
if file_source == "Google Drive":
    print("üìÇ Mounting Google Drive...")
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_name}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
    print(f"‚úÖ Using folder: {input_path}")
    
    # Check for media files
    media_files = list(input_path.glob('*.mp4')) + list(input_path.glob('*.mkv')) + list(input_path.glob('*.avi')) + list(input_path.glob('*.mov'))
    if not media_files:
        display(HTML(f'<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è No video files found</h4><p>Please upload video files to your Google Drive folder: <code>MyDrive/{drive_folder_name}</code></p></div>'))
    else:
        print(f"üìπ Found {len(media_files)} video file(s)")
else:
    print("üì§ Please upload your video file...")
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
        print(f"‚úÖ Uploaded: {filename}")
    else:
        print("‚ùå No file uploaded")
        sys.exit(1)

# Build and run command
print("\n" + "="*60)
print("üéØ Starting transcription with Japanese Expert preset...")
print("   Pipeline: kotoba-faster-whisper (Japanese-optimized)")
print("   Sensitivity: aggressive (catches all dialogue)")
print("="*60 + "\n")

cmd = [
    'whisperjav',
    str(input_path),
    '--mode', 'kotoba-faster-whisper',
    '--sensitivity', 'aggressive',
    '--output-dir', str(output_path)
]

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(
        full_cmd, shell=True,
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        universal_newlines=True, bufsize=1
    )
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3><p>Your subtitle files (.srt) are saved in the output folder.</p></div>'))
        # List output files
        srt_files = list(output_path.glob('*.srt'))
        if srt_files:
            print(f"\nüìÑ Generated {len(srt_files)} subtitle file(s):")
            for f in srt_files:
                print(f"   ‚Ä¢ {f.name}")
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4><p>Check the error messages above.</p></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error</h4><p>{str(e)}</p></div>'))

In [None]:
#@title üü° STANDARD MODE - Preset Selection
#@markdown Choose from optimized presets with the option to tweak settings.

#@markdown ---
#@markdown ### üéØ Select Preset

preset = "\U0001F1EF\U0001F1F5 Japanese Expert (RECOMMENDED)" #@param ["\U0001F680 Quick Scan (~5 min/hour)", "\U0001F1EF\U0001F1F5 Japanese Expert (RECOMMENDED)", "\U0001F3AF Maximum Quality (~20 min/hour)"]

#@markdown ---
#@markdown ### ‚öôÔ∏è Optional Adjustments

override_sensitivity = "Use preset default" #@param ["Use preset default", "conservative", "balanced", "aggressive"]
output_language = "Japanese (native)" #@param ["Japanese (native)", "English (direct translation)"]

#@markdown ---
#@markdown ### üìÅ File Selection

file_source_std = "Google Drive" #@param ["Google Drive", "Upload File"]
drive_folder_std = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# Parse preset to CLI options
preset_configs = {
    "Quick Scan": {
        "mode": "faster",
        "sensitivity": "balanced",
        "ensemble": False,
        "desc": "Fast preview - ~5 min per hour of video"
    },
    "Japanese Expert": {
        "mode": "kotoba-faster-whisper",
        "sensitivity": "aggressive",
        "ensemble": False,
        "desc": "Best for JAV content - ~8 min per hour"
    },
    "Maximum Quality": {
        "mode": "kotoba-faster-whisper",
        "sensitivity": "aggressive",
        "ensemble": True,
        "pass2": "balanced",
        "pass2_sensitivity": "balanced",
        "merge": "smart_merge",
        "desc": "Two-pass ensemble - ~20 min per hour"
    }
}

# Match preset
config = None
for key in preset_configs:
    if key in preset:
        config = preset_configs[key]
        break

if not config:
    config = preset_configs["Japanese Expert"]

print(f"üìã Selected preset: {preset.split(' ', 1)[-1] if ' ' in preset else preset}")
print(f"   {config['desc']}")

# Apply overrides
if override_sensitivity != "Use preset default":
    config["sensitivity"] = override_sensitivity
    print(f"   Sensitivity override: {override_sensitivity}")

subs_lang = "native" if "Japanese" in output_language else "direct-to-english"

# File handling
if file_source_std == "Google Drive":
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_std}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
else:
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
    else:
        sys.exit(1)

# Build command
cmd = ['whisperjav', str(input_path), '--output-dir', str(output_path)]

if config.get('ensemble'):
    cmd.extend(['--ensemble',
                '--pass1-pipeline', config['mode'],
                '--pass1-sensitivity', config['sensitivity'],
                '--pass2-pipeline', config.get('pass2', 'balanced'),
                '--pass2-sensitivity', config.get('pass2_sensitivity', 'balanced'),
                '--merge-strategy', config.get('merge', 'smart_merge')])
else:
    cmd.extend(['--mode', config['mode'], '--sensitivity', config['sensitivity']])

cmd.extend(['--subs-language', subs_lang])

print("\n" + "="*60)
print("üéØ Starting transcription...")
print("="*60 + "\n")

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1)
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3></div>'))
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error: {str(e)}</h4></div>'))

In [None]:
#@title üî¥ ADVANCED MODE - Full Configuration
#@markdown Complete control over all transcription parameters.

#@markdown ---
#@markdown ### üîß Pipeline Configuration

pipeline = "kotoba-faster-whisper" #@param ["faster", "fast", "balanced", "fidelity", "kotoba-faster-whisper"]
sensitivity = "aggressive" #@param ["conservative", "balanced", "aggressive"]
output_language_adv = "native" #@param ["native", "direct-to-english"]

#@markdown ---
#@markdown ### üéØ Ensemble Mode (Two-Pass Processing)

enable_ensemble = False #@param {type:"boolean"}
pass2_pipeline = "balanced" #@param ["faster", "fast", "balanced", "fidelity", "kotoba-faster-whisper"]
pass2_sensitivity = "balanced" #@param ["conservative", "balanced", "aggressive"]
merge_strategy = "smart_merge" #@param ["smart_merge", "full_merge", "pass1_primary", "pass2_primary"]

#@markdown ---
#@markdown ### ‚öôÔ∏è Advanced Options

scene_detection_method = "auditok" #@param ["auditok", "silero"]
disable_vad = False #@param {type:"boolean"}
enable_debug = False #@param {type:"boolean"}

#@markdown ---
#@markdown ### üìÅ File Selection

file_source_adv = "Google Drive" #@param ["Google Drive", "Upload File"]
drive_folder_adv = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
import shlex
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive, files

# File handling
if file_source_adv == "Google Drive":
    drive.mount('/content/drive', force_remount=False)
    input_path = Path(f'/content/drive/MyDrive/{drive_folder_adv}')
    input_path.mkdir(exist_ok=True)
    output_path = input_path
else:
    uploaded = files.upload()
    if uploaded:
        filename = list(uploaded.keys())[0]
        input_path = Path(f'/content/{filename}')
        output_path = Path('/content/output')
        output_path.mkdir(exist_ok=True)
    else:
        sys.exit(1)

# Build command
cmd = ['whisperjav', str(input_path), '--output-dir', str(output_path)]

if enable_ensemble:
    cmd.extend([
        '--ensemble',
        '--pass1-pipeline', pipeline,
        '--pass1-sensitivity', sensitivity,
        '--pass2-pipeline', pass2_pipeline,
        '--pass2-sensitivity', pass2_sensitivity,
        '--merge-strategy', merge_strategy
    ])
else:
    cmd.extend(['--mode', pipeline, '--sensitivity', sensitivity])

cmd.extend(['--subs-language', output_language_adv])
cmd.extend(['--scene-detection-method', scene_detection_method])

if disable_vad:
    cmd.append('--no-vad')

if enable_debug:
    cmd.append('--debug')

# Display configuration
print("üìã Advanced Configuration:")
print(f"   Pipeline: {pipeline}")
print(f"   Sensitivity: {sensitivity}")
print(f"   Scene Detection: {scene_detection_method}")
if enable_ensemble:
    print(f"   Ensemble: {pipeline} ‚Üí {pass2_pipeline} ({merge_strategy})")
    print(f"   Pass 2 Sensitivity: {pass2_sensitivity}")
if disable_vad:
    print("   VAD: Disabled")
if enable_debug:
    print("   Debug: Enabled")

print("\n" + "="*60)
print("üéØ Starting advanced transcription...")
print("="*60 + "\n")

full_cmd = shlex.join(cmd)
print(f"Command: {full_cmd}\n")

try:
    process = subprocess.Popen(full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1)
    for line in process.stdout:
        print(line, end='')
    process.wait()
    
    if process.returncode == 0:
        display(HTML('<div style="background:#d4edda;padding:20px;border-radius:8px;margin-top:16px"><h3 style="color:#155724">üéâ Transcription Complete!</h3></div>'))
    else:
        display(HTML('<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Transcription failed</h4></div>'))
except Exception as e:
    display(HTML(f'<div style="background:#f8d7da;padding:16px;border-radius:8px"><h4 style="color:#721c24">‚ùå Error: {str(e)}</h4></div>'))

In [None]:
#@title üåê TRANSLATION (Optional)
#@markdown Translate your subtitle files to another language using AI.

#@markdown ---
#@markdown ### ‚öôÔ∏è Translation Settings

enable_translation = False #@param {type:"boolean"}
translation_provider = "deepseek" #@param ["deepseek", "openrouter", "gemini", "claude", "gpt"]
target_language = "english" #@param ["english", "indonesian", "spanish", "chinese"]
translation_tone = "standard" #@param ["standard", "pornify"]

#@markdown ---
#@markdown ### üîë API Key
#@markdown Enter your API key for the selected provider:

api_key = "" #@param {type:"string"}

#@markdown ---
#@markdown ### üìÅ SRT Files Location

srt_folder = "WhisperJAV" #@param {type:"string"}

#@markdown ---

import os
import sys
import subprocess
from pathlib import Path
from IPython.display import display, HTML
from google.colab import drive

if not enable_translation:
    print("‚ÑπÔ∏è Translation is disabled. Check the box above to enable.")
else:
    if not api_key:
        display(HTML('<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è API Key Required</h4><p>Please enter your API key for the translation provider.</p></div>'))
    else:
        # Set API key as environment variable (matches whisperjav.translate.providers)
        env_vars = {
            "deepseek": "DEEPSEEK_API_KEY",
            "openrouter": "OPENROUTER_API_KEY",
            "gemini": "GEMINI_API_KEY",
            "claude": "ANTHROPIC_API_KEY",
            "gpt": "OPENAI_API_KEY"
        }
        os.environ[env_vars.get(translation_provider, "API_KEY")] = api_key
        
        # Mount drive and find SRT files
        drive.mount('/content/drive', force_remount=False)
        srt_path = Path(f'/content/drive/MyDrive/{srt_folder}')
        
        srt_files = list(srt_path.glob('*.srt'))
        if not srt_files:
            display(HTML(f'<div style="background:#fff3cd;padding:16px;border-radius:8px"><h4 style="color:#856404">‚ö†Ô∏è No SRT files found</h4><p>No subtitle files found in: <code>MyDrive/{srt_folder}</code></p></div>'))
        else:
            print(f"üìÑ Found {len(srt_files)} SRT file(s) to translate")
            print(f"   Provider: {translation_provider}")
            print(f"   Target: {target_language}")
            print(f"   Tone: {translation_tone}")
            print("\n" + "="*60 + "\n")
            
            for srt_file in srt_files:
                cmd = f'whisperjav-translate -i "{srt_file}" --provider {translation_provider} --target {target_language} --tone {translation_tone}'
                print(f"Translating: {srt_file.name}")
                try:
                    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
                    if result.returncode == 0:
                        print(f"   ‚úÖ Complete")
                    else:
                        print(f"   ‚ùå Failed: {result.stderr[:100]}")
                except Exception as e:
                    print(f"   ‚ùå Error: {str(e)}")
            
            display(HTML('<div style="background:#d4edda;padding:16px;border-radius:8px;margin-top:16px"><h4 style="color:#155724">‚úÖ Translation Complete</h4><p>Translated files saved with language suffix (e.g., _en.srt)</p></div>'))