<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎌 WhisperJAV - Google Colab Edition
Generate Subtitles for Japanese Adult Videos using Free GPU

## 📋 Quick Start Guide
1. **Configure Your Settings** in the panel below (Cell 1).
2. Click `Runtime` → `Run all` in the menu to start everything.
3. **Connect Google Drive** when prompted.
4. The notebook will run all steps and disconnect automatically if the option is checked.

In [None]:
#@title Configure & Run WhisperJAV
#@markdown ### ← Click here to hide the code and see only the options.
#@markdown Set your options below, then click `Runtime -> Run all`.

#@markdown --- 
#@markdown ### ⚙️ Core Settings
mode = "balanced"  #@param ["balanced", "fast", "faster"]
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]
subs_language = "japanese"  #@param ["japanese", "english-direct"] 

#@markdown --- 
#@markdown ### ✨ Enhancement Settings
adaptive_classification = False  #@param {type:"boolean"}
adaptive_audio_enhancement = False  #@param {type:"boolean"}
smart_postprocessing = True  #@param {type:"boolean"}

#@markdown --- 
#@markdown ### 💬 Opening & Closing Credits
opening_prologue = "Subtitles by yourname" #@param {type:"string"}
#@markdown *Change the text above to your desired opening credits, or clear it for none.*
closing_credits_text = "Subs by WhisperJAV Colab" #@param {type:"string"}
#@markdown *This text will be added at the very end of the subtitle file. Clear for none.*

#@markdown --- 
#@markdown ### 🔌 Session Management
auto_disconnect = True #@param {type:"boolean"}
#@markdown *Automatically disconnect when finished to save GPU credits.*

#===============================================================================
#  ✅ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import subprocess
import shlex
import re
import time
from pathlib import Path
from google.colab import drive
from IPython.display import display, HTML
from tqdm.notebook import tqdm
import torch

# If user doesn't change the example, treat it as empty
if opening_prologue == "Subtitles by yourname":
    opening_prologue = ""

def sanitize_subtitle_text(text):
    if not text: return ""
    return text.replace('\n', ' ').replace('\r', '')

def run_command(command):
    """Runs a shell command and streams output live while checking for errors."""
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True, encoding='utf-8')
    # Stream stdout
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    # Check for errors
    return_code = process.poll()
    if return_code != 0:
        print("\n--- ERROR LOG ---", file=sys.stderr)
        print(process.stderr.read(), file=sys.stderr)
        return False
    return True

print("STEP 0: CHECKING FOR GPU...")
if not torch.cuda.is_available():
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: No GPU detected!</h3><p style=\"color: #721c24;\">This notebook requires a GPU. Please go to <b>Runtime -> Change runtime type</b> and select a GPU accelerator.</p></div>'))
    sys.exit()
else:
    print(f"✅ GPU detected: {torch.cuda.get_device_name(0)}\n")

print("STEP 1: CONNECTING GOOGLE DRIVE...")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"✅ Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Failed to connect Google Drive.</h3><p style=\"color: #721c24;\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))
    sys.exit()

print("STEP 2: CHECKING FOR MEDIA FILES...")
media_extensions = ['.mp3', '.wav', '.opus', '.m4a', '.flac', '.wmv', '.mp4', '.mkv', '.webm']
media_files = [f for ext in media_extensions for f in drive_folder.glob(f'*{ext}')]
if not media_files:
    display(HTML(f'<div style=\"background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #856404;\">⚠️ No media files found!</h3><p style=\"color: #856404;\">Please upload your media files to <b>{drive_folder}</b> in Google Drive and run this cell again.</p></div>'))
    sys.exit()
else:
    print(f"✅ Found {len(media_files)} media file(s) to process.\n")

print("STEP 3: INSTALLING DEPENDENCIES...")
install_success = run_command("pip install -q -U git+https://github.com/meizhong986/WhisperJAV.git")
if not install_success:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Installation failed.</h3><p style=\"color: #721c24;\">The notebook cannot continue. Please check the error log above.</p></div>'))
    sys.exit()
print("✅ Dependencies installed.\n")

print("STEP 4: VERIFYING INSTALLATION...")
try:
    import whisperjav
    print("✅ WhisperJAV module found.\n")
except ImportError:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Verification failed.</h3><p style=\"color: #721c24;\">The whisperjav module could not be imported even after installation. The notebook cannot continue.</p></div>'))
    sys.exit()

opening_prologue_safe = sanitize_subtitle_text(opening_prologue)
closing_credits_safe = sanitize_subtitle_text(closing_credits_text)

print("STEP 5: RUNNING WHISPERJAV TRANSCRIPTION...")
base_cmd = f"python -m whisperjav '{drive_folder}'"
options = {
    '--mode': mode, '--sensitivity': sensitivity, '--subs-language': subs_language, '--output-dir': str(drive_folder),
    '--adaptive-classification': adaptive_classification, '--adaptive-audio-enhancement': adaptive_audio_enhancement, '--smart-postprocessing': smart_postprocessing
}
cmd_parts = [base_cmd]
for flag, value in options.items():
    if isinstance(value, bool):
        if value: cmd_parts.append(flag)
    elif value: cmd_parts.append(f'{flag} {shlex.quote(str(value))}')

full_command = ' '.join(cmd_parts)
print(f"Executing command: {full_command}")
print("--- Processing will start now. You will see the live output below. ---")
transcription_success = run_command(full_command)
print("--- End of processing. ---\n")

if not transcription_success:
    display(HTML('<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Transcription failed.</h3><p style=\"color: #721c24;\">The WhisperJAV process returned an error. Credits will not be added. Please check the log above.</p></div>'))
    sys.exit()

srt_files = list(drive_folder.glob('*.srt'))
if not srt_files:
    print("⚠️ No subtitle files were generated by the transcription process.")
else:
    if opening_prologue_safe:
        print("STEP 6: ADDING OPENING CREDITS...")
        prologue_line = f"0\n00:00:00,000 --> 00:00:00,500\n{opening_prologue_safe}\n\n"
        for srt_file in tqdm(srt_files, desc="Adding Prologue"):
            try:
                original_content = srt_file.read_text(encoding='utf-8')
                srt_file.write_text(prologue_line + original_content, encoding='utf-8')
            except Exception as e:
                 print(f"  - Warning: Could not add prologue to {srt_file.name}: {e}")
        print("✅ Opening credits added.\n")

    if closing_credits_safe:
        print("STEP 7: ADDING CLOSING CREDITS...")
        for srt_file in tqdm(srt_files, desc="Adding Credits"):
            try:
                with open(srt_file, 'a', encoding='utf-8') as f:
                    f.write(f"\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_safe}\n")
            except Exception as e:
                print(f"  - Warning: Could not add credits to {srt_file.name}: {e}")
        print("✅ Closing credits added.\n")

display(HTML("""<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;"><h3 style="color: #155724; margin-top: 0;">🎉 Success! All tasks are complete.</h3><p style="color: #155724; margin-bottom: 0;">You can now check your Google Drive folder for the results. This session will disconnect automatically if you enabled the option.</p></div>"""))

if auto_disconnect:
    print("\n🔌 Auto-disconnect enabled. This session will now end to save resources.")
    time.sleep(5)
    from google.colab import runtime
    runtime.unassign()


In [None]:
#@title View Results
#@markdown Run this cell to manually check results if you disabled auto-disconnect.

from pathlib import Path
from IPython.display import display, HTML
import html

drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
srt_files = sorted(list(drive_folder.glob('*.srt')))

print(f"📄 Subtitle Files in: {drive_folder}")
print("-" * 50)

if not srt_files:
    display(HTML('<p>❌ No subtitle files found.</p>'))
else:
    for srt in srt_files:
        print(f"✅ {srt.name} ({srt.stat().st_size / 1024:.1f} KB)")

    print("\n📖 Preview of the latest subtitle file:")
    latest_srt = max(srt_files, key=lambda p: p.stat().st_mtime)
    try:
        with open(latest_srt, 'r', encoding='utf-8') as f:
            content = f.read()
            content_safe = html.escape(content)
            preview_html = f"""<div style='background: #f8f9fa; border: 1px solid #e1e4e8; padding: 15px; border-radius: 5px; max-height: 400px; overflow-y: auto; font-family: monospace; white-space: pre-wrap;'><b>--- Previewing: {latest_srt.name} ---</b><br><br>{content_safe}</div>"""
        display(HTML(preview_html))
    except Exception as e:
        print(f"Could not read file for preview: {e}")