<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎌 WhisperJAV - Google Colab Edition
Generate Subtitles for Japanese Adult Videos using Free GPU

## 📋 Quick Start Guide
1. **Configure Your Settings** in the panel below (Cell 1).
2. Click `Runtime` → `Run all` in the menu to start everything.
3. **Connect Google Drive** when prompted.
4. The notebook will run all steps and disconnect automatically if the option is checked.

In [None]:
#@title Configure & Run WhisperJAV
#@markdown ### ← Click here to hide the code and see only the options.
#@markdown Set your options below, then click `Runtime -> Run all`.

#@markdown --- 
#@markdown ### ⚙️ Core Settings
mode = "balanced"  #@param ["balanced", "fast", "faster"]
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]
subs_language = "japanese"  #@param ["japanese", "english-direct"] 

#@markdown --- 
#@markdown ### ✨ Enhancement & Output Settings
adaptive_classification = False  #@param {type:"boolean"}
adaptive_audio_enhancement = False  #@param {type:"boolean"}
smart_postprocessing = True  #@param {type:"boolean"}
opening_prologue = "Subtitles by yourname" #@param {type:"string"}
closing_credits_text = "Subs by WhisperJAV Colab" #@param {type:"string"}

#@markdown --- 
#@markdown ### 🔌 Session & Logging
show_install_logs = False #@param {type:"boolean"}
#@markdown *Check this to see all installation details. If an error occurs, logs will be shown automatically.*
auto_disconnect = True #@param {type:"boolean"}
#@markdown *Automatically disconnect when finished to save GPU credits.*

#===============================================================================
#  ✅ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import subprocess
import shlex
import time
from pathlib import Path
from google.colab import drive
from IPython.display import display, HTML

def run_and_check(command, description, show_logs=False):
    """Runs a command, checks for errors, and adheres to logging preferences."""
    print(f"- {description}...", end='')
    if show_logs:
        # Stream output live
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True, encoding='utf-8')
        while True:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())
        return_code = process.poll()
    else:
        # Hide output but capture it for error reporting
        result = subprocess.run(command, shell=True, capture_output=True, text=True, encoding='utf-8')
        return_code = result.returncode

    if return_code == 0:
        print(" ✅")
        return True
    else:
        print(" ❌ FAILED")
        # Print error details regardless of the logging preference
        error_output = result.stderr if not show_logs else "See log above."
        display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Step \"{description}\" failed.</h3><p style=\"color: #721c24;\">The notebook cannot continue. See error log below:</p><pre style=\"white-space: pre-wrap; word-wrap: break-word;\">{error_output}</pre></div>'))
        sys.exit()

# If user doesn't change the example, treat it as empty
if opening_prologue == "Subtitles by yourname": opening_prologue = ""

print("STEP 1: PRE-FLIGHT CHECKS...")
run_and_check("which nvidia-smi", "Checking GPU availability")

print("STEP 2: CONNECTING GOOGLE DRIVE...")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"- Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Failed to connect Google Drive.</h3><p style=\"color: #721c24;\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))
    sys.exit()

print("STEP 3: INSTALLING & VERIFYING DEPENDENCIES...")
run_and_check("apt-get update -qq && apt-get install -y -qq ffmpeg", "Installing system libraries (ffmpeg)", show_install_logs)
run_and_check("pip install -q torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124", "Installing PyTorch Stack", show_install_logs)
run_and_check("pip install -q --no-deps git+https://github.com/meizhong986/WhisperJAV.git", "Installing WhisperJAV (core)", show_install_logs)
core_deps = ["openai-whisper@git+https://github.com/openai/whisper.git@v20231117", "stable-ts@git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "faster-whisper>=1.1.1", "ffmpeg-python", "soundfile", "auditok", "numpy", "scipy", "tqdm", "pysrt", "srt", "numba"]
run_and_check(f'pip install -q {" ".join(core_deps)}', "Installing core dependencies (stable-ts, etc.)", show_install_logs)
print("✅ All dependencies installed successfully.\n")

print("STEP 4: RUNNING WHISPERJAV TRANSCRIPTION...")
base_cmd = f"python -m whisperjav '{drive_folder}'"
options = {
    '--mode': mode, '--sensitivity': sensitivity, '--subs-language': subs_language, '--output-dir': str(drive_folder),
    '--adaptive-classification': adaptive_classification, '--adaptive-audio-enhancement': adaptive_audio_enhancement, '--smart-postprocessing': smart_postprocessing
}
cmd_parts = [base_cmd] + [f'{flag} {shlex.quote(str(value))}' for flag, value in options.items() if not isinstance(value, bool) and value] + [flag for flag, value in options.items() if isinstance(value, bool) and value]
full_command = ' '.join(cmd_parts)
print(f"Executing command: {full_command}")
print("--- Processing will start now. You will see the live output below. ---")
transcription_success = run_and_check(full_command, "Transcription Process", True) # Always show logs for this step
print("--- End of processing. ---\n")

if not transcription_success:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Transcription failed.</h3><p style=\"color: #721c24;\">The WhisperJAV process returned an error. Please check the log above.</p></div>'))
    sys.exit()

print("STEP 5: POST-PROCESSING CREDITS...")
from tqdm.notebook import tqdm
srt_files = list(drive_folder.glob('*.srt'))
if not srt_files:
    print("⚠️ No subtitle files were found to add credits to.")
else:
    opening_prologue_safe = opening_prologue.replace('\n', ' ').replace('\r', '')
    if opening_prologue_safe:
        prologue_line = f"0\n00:00:00,000 --> 00:00:00,500\n{opening_prologue_safe}\n\n"
        for srt_file in tqdm(srt_files, desc="Adding Opening Credits"):
            original_content = srt_file.read_text(encoding='utf-8')
            srt_file.write_text(prologue_line + original_content, encoding='utf-8')

    closing_credits_safe = closing_credits_text.replace('\n', ' ').replace('\r', '')
    if closing_credits_safe:
        for srt_file in tqdm(srt_files, desc="Adding Closing Credits"):
            with open(srt_file, 'a', encoding='utf-8') as f:
                f.write(f"\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_safe}\n")

display(HTML("""<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;"><h3 style="color: #155724; margin-top: 0;">🎉 Success! All tasks are complete.</h3><p style="color: #155724; margin-bottom: 0;">You can now check your Google Drive folder for the results. This session will disconnect automatically if the option is checked.</p></div>"""))

if auto_disconnect:
    print("\n🔌 Auto-disconnect enabled. This session will now end to save resources.")
    time.sleep(5)
    from google.colab import runtime
    runtime.unassign()
