<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎌 WhisperJAV - Google Colab Edition
Generate Subtitles for Japanese Adult Videos using Free GPU

## 📋 Quick Start Guide
1. **Configure Your Settings** in the panel below (Cell 1).
2. Click `Runtime` → `Run all` in the menu to start everything.
3. **Connect Google Drive** when prompted.
4. The notebook will run all steps and disconnect automatically if the option is checked.

In [None]:
#@title Configure & Run WhisperJAV
#@markdown ### ← Click here to hide the code and see only the options.
#@markdown Set your options below, then click `Runtime -> Run all`.

#@markdown --- 
#@markdown ### ⚙️ Core Settings
mode = "balanced"  #@param ["balanced", "fast", "faster"]
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]
subs_language = "japanese"  #@param ["japanese", "english-direct"] 

#@markdown --- 
#@markdown ### ✨ Enhancement & Output Settings
adaptive_classification = False  #@param {type:"boolean"}
adaptive_audio_enhancement = False  #@param {type:"boolean"}
smart_postprocessing = True  #@param {type:"boolean"}
opening_prologue = "Subtitles by yourname" #@param {type:"string"}
closing_credits_text = "Subs by WhisperJAV Colab" #@param {type:"string"}

#@markdown --- 
#@markdown ### 🔌 Session & Logging
hide_install_output = True #@param {type:"boolean"}
#@markdown *Check to hide installation details (uses `-qq` flag). Uncheck to see full logs.*
auto_disconnect = True #@param {type:"boolean"}
#@markdown *Automatically disconnect when finished to save GPU credits.*

#===============================================================================
#  ✅ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import subprocess
import shlex
import time
from pathlib import Path
from google.colab import drive
from IPython.display import display, HTML

def run_transcription(command, description):
    """Runs the main transcription command, streams output, and halts on error."""
    print(f"--- {description} ---")
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True, encoding='utf-8')
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    return_code = process.poll()
    if return_code != 0:
        display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Step \"{description}\" failed with exit code {return_code}.</h3><p style=\"color: #721c24;\">The notebook cannot continue. Please check the error log above.</p></div>'))
        sys.exit()
    print(f"✅ Step '{description}' completed successfully.\n")
    return True

# If user doesn't change the example, treat it as empty
if opening_prologue == "Subtitles by yourname": opening_prologue = ""

print("STEP 1: PRE-FLIGHT CHECKS...")
!nvidia-smi
print("✅ GPU check complete.\n")

print("STEP 2: CONNECTING GOOGLE DRIVE...")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"- Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Failed to connect Google Drive.</h3><p style=\"color: #721c24;\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))
    sys.exit()

print("STEP 3: INSTALLING & VERIFYING DEPENDENCIES...")
quiet_flag = "-qq" if hide_install_output else ""

# === Faithful Integration of Your Installation Snippet ===
!apt-get update {quiet_flag}
!apt-get install -y {quiet_flag} ffmpeg portaudio19-dev

print("\nInstalling WhisperJAV (core package only)...")
!pip install {quiet_flag} --no-deps git+https://github.com/meizhong986/WhisperJAV.git

core_requirements = ["openai-whisper@git+https://github.com/openai/whisper.git@v20231117", "stable-ts@git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "faster-whisper>=1.1.1", "ffmpeg-python", "soundfile", "auditok", "numpy", "scipy", "tqdm", "pysrt", "srt", "numba", "pyaudio"]

print("\nChecking PyTorch installation...")
try:
    import torch
    if not torch.cuda.is_available(): raise ImportError("CUDA not available")
    import torchvision
    import torchaudio
    print(f"✅ Using existing PyTorch {torch.__version__} with CUDA.")
except ImportError as e:
    print(f"⚠️ PyTorch stack issue detected ({e}). Installing required versions...")
    !pip install {quiet_flag} torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124

print("\nInstalling remaining core dependencies...")
!pip install {quiet_flag} {' '.join(core_requirements)}
print("✅ Dependency installation commands executed.\n")
# === End of Snippet Integration ===

print("STEP 4: FINAL VERIFICATION...")
try:
    import whisperjav, stable_whisper
    print("✅ All key modules are importable. Ready to proceed.\n")
except ImportError as e:
    display(HTML(f"""<div style='background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;'><h3 style='color: #721c24;'>❌ ERROR: Final verification failed.</h3><p style='color: #721c24;'>A key module failed to import: <b>{e}</b>. This usually means a step in the installation failed. Please uncheck 'Hide installation output' and run again to see the full log.</p></div>"""))
    sys.exit()

print("STEP 5: RUNNING WHISPERJAV TRANSCRIPTION...")
base_cmd = f"python -m whisperjav '{drive_folder}'"
options = {
    '--mode': mode, '--sensitivity': sensitivity, '--subs-language': subs_language, '--output-dir': str(drive_folder),
    '--adaptive-classification': adaptive_classification, '--adaptive-audio-enhancement': adaptive_audio_enhancement, '--smart-postprocessing': smart_postprocessing
}
cmd_parts = [base_cmd] + [f'{flag} {shlex.quote(str(value))}' for flag, value in options.items() if not isinstance(value, bool) and value] + [flag for flag, value in options.items() if isinstance(value, bool) and value]
full_command = ' '.join(cmd_parts)
transcription_success = run_transcription(full_command, "Transcription Process")

if not transcription_success:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Transcription failed.</h3><p style=\"color: #721c24;\">The WhisperJAV process returned an error. Please check the log above.</p></div>'))
    sys.exit()

print("STEP 6: POST-PROCESSING CREDITS...")
from tqdm.notebook import tqdm
srt_files = list(drive_folder.glob('*.srt'))
if opening_prologue:
  prologue_line = f"0\n00:00:00,000 --> 00:00:00,500\n{opening_prologue.replace('<b>', '').replace('</b>','')}\n\n"
  for srt_file in tqdm(srt_files, desc=\"Adding Opening Credits\"): srt_file.write_text(prologue_line + srt_file.read_text(encoding='utf-8'), encoding='utf-8')
if closing_credits_text:
  for srt_file in tqdm(srt_files, desc=\"Adding Closing Credits\"): 
    with open(srt_file, 'a', encoding='utf-8') as f: f.write(f'\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_text.replace('<b>','').replace('</b>','')}\n')

display(HTML("""<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;"><h3 style="color: #155724; margin-top: 0;">🎉 Success! All tasks are complete.</h3><p style="color: #155724; margin-bottom: 0;">The session will now disconnect automatically if you enabled the option.</p></div>"""))

if auto_disconnect:
    print("\n🔌 Auto-disconnect enabled. This session will now end to save resources.")
    time.sleep(5)
    from google.colab import runtime
    runtime.unassign()
