<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# üéå WhisperJAV - Google Colab Edition
Generate Subtitles for Japanese Adult Videos using Free GPU

## üìã Quick Start Guide
1. **Configure Your Settings** in the panel below (Cell 1).
2. Click `Runtime` ‚Üí `Run all` in the menu to start everything.
3. **Connect Google Drive** when prompted.
4. The notebook will run all steps and disconnect automatically if the option is checked.

In [None]:
#@title Configure & Run WhisperJAV
#@markdown ### ‚Üê Click here to hide the code and see only the options.
#@markdown Set your options below, then click `Runtime -> Run all`.

#@markdown --- 
#@markdown ### ‚öôÔ∏è Core Settings
mode = "balanced"  #@param ["balanced", "fast", "faster"]
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]
subs_language = "japanese"  #@param ["japanese", "english-direct"] 

#@markdown --- 
#@markdown ### ‚ú® Enhancement & Output Settings
adaptive_classification = False  #@param {type:"boolean"}
adaptive_audio_enhancement = False  #@param {type:"boolean"}
smart_postprocessing = True  #@param {type:"boolean"}
opening_prologue = "Subtitles by yourname" #@param {type:"string"}
closing_credits_text = "Subs by WhisperJAV Colab" #@param {type:"string"}

#@markdown --- 
#@markdown ### üîå Session Management
auto_disconnect = True #@param {type:"boolean"}
#@markdown *Automatically disconnect when finished to save GPU credits.*

#===============================================================================
#  ‚úÖ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import subprocess
import shlex
import time
from pathlib import Path
from google.colab import drive
from IPython.display import display, HTML
from tqdm.notebook import tqdm

# If user doesn't change the example, treat it as empty
if opening_prologue == "Subtitles by yourname": opening_prologue = ""

print("STEP 1: PRE-FLIGHT CHECKS...")
try:
    import torch
    if not torch.cuda.is_available(): raise ImportError("PyTorch found, but CUDA is not available.")
    print(f"- ‚úÖ GPU detected: {torch.cuda.get_device_name(0)}\n")
except ImportError as e:
    display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">‚ùå ERROR: GPU or PyTorch Issue!</h3><p style=\"color: #721c24;\">{e}<br>This notebook requires a GPU and a working PyTorch installation.</p></div>'))
    sys.exit()

print("STEP 2: CONNECTING GOOGLE DRIVE...")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"- ‚úÖ Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">‚ùå ERROR: Failed to connect Google Drive.</h3><p style=\"color: #721c24;\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))
    sys.exit()

# === Faithful Integration of Your Installation Snippet ===
def print_step_html(step_num, title):
    display(HTML(f'''
    <div style='margin-top:16px;margin-bottom:8px'>
        <span style='font-weight:bold;color:#1f77b4'>Step {step_num}:</span>
        <span style='margin-left:8px'>{title}</span>
    </div>
    '''))

def print_status_html(success, message, duration=None):
    color = "#2ecc71" if success else "#e74c3c"
    icon = "‚úî" if success else "‚úñ"
    time_str = f" <span style='color:#7f8c8d;font-size:0.9em'>({duration:.1f}s)</span>" if duration else ""
    display(HTML(f'''
    <div style='margin-left:24px;margin-bottom:4px'>
        <span style='color:{color};font-weight:bold'>{icon}</span>
        <span style='margin-left:6px'>{message}{time_str}</span>
    </div>
    '''))

def run_install_command(command, success_msg, error_msg):
    start_time = time.time()
    try:
        subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        print_status_html(True, success_msg, time.time()-start_time)
        return True
    except subprocess.CalledProcessError as e:
        error_details = e.stderr.strip()
        # Don't display the giant error log directly, just the summary.
        # Users can unhide output to see the full pip log if they need to.
        print_status_html(False, f"{error_msg}. See console log for details.")
        # Print the detailed error to the standard console log for debugging
        print(f"\n--- DETAILED ERROR LOG FOR '{success_msg}' ---\n{error_details}\n-------------------------------------", file=sys.stderr)
        return False

def install_whisperjav():
    display(HTML("<h3 style='color:#1f77b4; border-bottom: 1px solid #ccc; padding-bottom: 5px;'>STEP 3: Installing Dependencies</h3>"))

    print_step_html(3.1, "Installing system packages")
    if not run_install_command("apt-get update -qq && apt-get install -y -qq portaudio19-dev ffmpeg", "System packages installed", "Failed to install system packages"):
        return False

    print_step_html(3.2, "Installing Python utilities")
    if not run_install_command("pip install -q ffmpeg-python soundfile auditok numpy scipy tqdm pysrt srt numba pyaudio", "Utilities installed", "Failed to install utilities"):
        return False

    print_step_html(3.3, "Installing core components")
    dependencies = [
        ("openai-whisper@git+https://github.com/openai/whisper@v20231117", "OpenAI Whisper"),
        ("faster-whisper", "Faster Whisper"),
        ("stable-ts@git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "Stable TS")
    ]
    for pkg, name in dependencies:
        if not run_install_command(f"pip install -q {pkg}", f"{name} installed", f"Failed to install {name}"):
            return False

    print_step_html(3.4, "Installing WhisperJAV main package")
    if not run_install_command("pip install --no-deps --ignore-installed -q git+https://github.com/meizhong986/WhisperJAV.git", "WhisperJAV installed", "Failed to install WhisperJAV"):
        return False

    return True

# Run installation orchestrator
if not install_whisperjav():
    display(HTML("<h3 style='color:#e74c3c'>‚úñ Installation Failed</h3><p>The notebook cannot continue. Please review the error messages above.</p>"))
    sys.exit()
else:
    display(HTML("<h3 style='color:#2ecc71'>‚úî Installation Completed Successfully</h3>"))
    print("\n")
# === End of Snippet Integration ===

print("STEP 4: RUNNING WHISPERJAV TRANSCRIPTION...")
base_cmd = f"python -m whisperjav '{drive_folder}'"
options = {
    '--mode': mode, '--sensitivity': sensitivity, '--subs-language': subs_language, '--output-dir': str(drive_folder),
    '--adaptive-classification': adaptive_classification, '--adaptive-audio-enhancement': adaptive_audio_enhancement, '--smart-postprocessing': smart_postprocessing
}
cmd_parts = [base_cmd] + [f'{flag} {shlex.quote(str(value))}' for flag, value in options.items() if not isinstance(value, bool) and value] + [flag for flag, value in options.items() if isinstance(value, bool) and value]
full_command = ' '.join(cmd_parts)
process = subprocess.Popen(full_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True, encoding='utf-8')
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None: break
    if output: print(output.strip())
if process.poll() != 0:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">‚ùå ERROR: Transcription failed.</h3><p style=\"color: #721c24;\">The WhisperJAV process returned an error. Please check the log above.</p></div>'))
    sys.exit()

print("STEP 5: POST-PROCESSING CREDITS...")
srt_files = list(drive_folder.glob('*.srt'))
if opening_prologue:
  prologue_line = f"0\n00:00:00,000 --> 00:00:00,500\n{opening_prologue.replace('<b>', '').replace('</b>','')}\n\n"
  for srt_file in tqdm(srt_files, desc=\"Adding Opening Credits\"): srt_file.write_text(prologue_line + srt_file.read_text(encoding='utf-8'), encoding='utf-8')
if closing_credits_text:
  for srt_file in tqdm(srt_files, desc=\"Adding Closing Credits\"): 
    with open(srt_file, 'a', encoding='utf-8') as f: f.write(f'\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_text.replace('<b>','').replace('</b>','')}\n')

display(HTML("""<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;"><h3 style="color: #155724; margin-top: 0;">üéâ Success! All tasks are complete.</h3><p style="color: #155724; margin-bottom: 0;">The session will now disconnect automatically if you enabled the option.</p></div>"""))

if auto_disconnect:
    print("\nüîå Auto-disconnect enabled. This session will now end to save resources.")
    time.sleep(5)
    from google.colab import runtime
    runtime.unassign()
