<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🎌 WhisperJAV - Google Colab Edition
Generate Subtitles for Japanese Adult Videos using Free GPU

## 📋 Quick Start Guide
1.  **Configure Your Settings** in the panel below (Cell 1).
2.  Click `Runtime` → `Run all` in the menu to start everything.
3.  **Connect Google Drive** when prompted.
4.  Once complete, run the **View Results** cell to see your files.

In [ ]:
#@title Configure & Run WhisperJAV
#@markdown ### ← Click here to hide the code and see only the options.
#@markdown Set your options below, then click `Runtime -> Run all`.

#@markdown --- 
#@markdown ### 👈 Core Settings
mode = "balanced"  #@param ["balanced", "fast", "faster"]
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]
subs_language = "japanese"  #@param ["japanese", "english-direct"] 

#@markdown --- 
#@markdown ### 👉 Enhancement Settings
adaptive_classification = False  #@param {type:"boolean"}
adaptive_audio_enhancement = False  #@param {type:"boolean"}
smart_postprocessing = True  #@param {type:"boolean"}

#@markdown --- 
#@markdown ### 💬 Opening Credits
opening_prologue = "Subtitles by yourname" #@param {type:"string"}
#@markdown *Change the text above to your desired opening credits, or clear it for none.*

# --- Developer-set closing credits (not visible in UI) ---
closing_credits_text = "Subs by WhisperJAV Colab" # Set to "" to disable

#===============================================================================
#  ✅ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import shlex
import re
import time
from pathlib import Path
from google.colab import drive
from IPython.display import display, HTML
from tqdm.notebook import tqdm
import torch

# If user doesn't change the example, treat it as empty
if opening_prologue == "Subtitles by yourname":
    opening_prologue = ""

def sanitize_subtitle_text(text):
    """Removes characters that could break SRT format."""
    if not text: return ""
    text = re.sub(r'\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}', '', text)
    text = text.replace('\n', ' ').replace('\r', '')
    return text

print("STEP 0: CHECKING FOR GPU...")
if not torch.cuda.is_available():
    display(HTML('<div style="background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;"><h3 style="color: #721c24;">❌ ERROR: No GPU detected!</h3><p style="color: #721c24;">This notebook requires a GPU. Please go to <b>Runtime -> Change runtime type</b> and select a GPU accelerator (T4, etc.).</p></div>'))
    sys.exit()
else:
    print(f"✅ GPU detected: {torch.cuda.get_device_name(0)}\n")

print("STEP 1: CONNECTING GOOGLE DRIVE...")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"✅ Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML('<div style="background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;"><h3 style="color: #721c24;">❌ ERROR: Failed to connect Google Drive.</h3><p style="color: #721c24;">Please re-run the cell and ensure you accept the authorization pop-up from Google.</p></div>'))
    sys.exit()

print("STEP 2: INSTALLING DEPENDENCIES (this may take a moment)... ")
for i in tqdm(range(100), desc="Installing system libraries"):
    time.sleep(0.05)
!apt-get update -qq > /dev/null 2>&1 && apt-get install -y -qq ffmpeg > /dev/null 2>&1
print("  - System libraries installed.")

for i in tqdm(range(100), desc="Installing WhisperJAV"):
    time.sleep(0.05)
!pip install -q -U git+https://github.com/meizhong986/WhisperJAV.git > /dev/null 2>&1
print("  - WhisperJAV package installed.")
print("✅ Dependencies installed.\n")

# Sanitize user input
opening_prologue_safe = sanitize_subtitle_text(opening_prologue)
closing_credits_safe = sanitize_subtitle_text(closing_credits_text)

print("STEP 3: CONFIGURATION SUMMARY")
config_summary_html = f"""<div style='font-family: monospace; border: 1px solid #ccc; padding: 10px; border-radius: 5px;'>
    <b>Mode:</b> {mode}<br>
    <b>Sensitivity:</b> {sensitivity}<br>
    <b>Subtitle Language:</b> {subs_language}<br>
    <b>Enhancements:</b><br>
    &nbsp;&nbsp;- Adaptive Classification: {'✅' if adaptive_classification else '❌'}<br>
    &nbsp;&nbsp;- Adaptive Audio: {'✅' if adaptive_audio_enhancement else '❌'}<br>
    &nbsp;&nbsp;- Smart Postprocessing: {'✅' if smart_postprocessing else '❌'}<br>
    <b>Opening Credits:</b> {opening_prologue_safe if opening_prologue_safe else 'None'}<br>
    <b>Closing Credits:</b> {closing_credits_safe if closing_credits_safe else 'None'}
</div>"""
display(HTML(config_summary_html))
print("\n")

print("STEP 4: RUNNING WHISPERJAV...")
base_cmd = f"whisperjav '{drive_folder}'"
options = {
    '--mode': mode, '--sensitivity': sensitivity, '--subs-language': subs_language, '--output-dir': str(drive_folder),
    '--opening-prologue': opening_prologue_safe,
    '--adaptive-classification': adaptive_classification, '--adaptive-audio-enhancement': adaptive_audio_enhancement, '--smart-postprocessing': smart_postprocessing
}

cmd_parts = [base_cmd]
for flag, value in options.items():
    if isinstance(value, bool):
        if value: cmd_parts.append(flag)
    elif value: cmd_parts.append(f"{flag} {shlex.quote(str(value))}")

full_command = ' '.join(cmd_parts)
print(f"Executing command: {full_command}")
print("---")
!{full_command}
print("---")
print("✅ WhisperJAV processing complete.\n")

if closing_credits_safe:
    print("STEP 5: ADDING CLOSING CREDITS...")
    srt_files = list(drive_folder.glob('*.srt'))
    if srt_files:
        for srt_file in tqdm(srt_files, desc="Adding Credits"):
            try:
                with open(srt_file, 'a', encoding='utf-8') as f:
                    f.write(f"\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_safe}\n")
            except Exception as e:
                print(f"  - Warning: Could not add credits to {srt_file.name}: {e}")
        print("✅ Credits added.")
    else:
        print("  - No SRT files found to add credits to.")

display(HTML("""<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;">
    <h3 style="color: #155724; margin-top: 0;">🎉 Success! All tasks are complete.</h3>
    <p style="color: #155724; margin-bottom: 0;">You can now run the <strong>'2️⃣ View Results & Download'</strong> cell to see your subtitle files.</p>
</div>"""))

In [None]:
#@title View Results
#@markdown Run this cell to see the generated files in your Google Drive.

from pathlib import Path
from IPython.display import display, HTML

whisperjav_folder = Path('/content/drive/MyDrive/WhisperJAV')
srt_files = sorted(list(whisperjav_folder.glob('*.srt')))

print("📄 Subtitle Files Generated in your Google Drive:")
print(f"📁 {whisperjav_folder}")
print("-" * 50)

if not srt_files:
    display(HTML('<p>❌ No subtitle files found. Please ensure the <b>1️⃣ Configure & Run</b> cell completed successfully.</p>'))
else:
    for srt in srt_files:
        size_kb = srt.stat().st_size / 1024
        print(f"✅ {srt.name} ({size_kb:.1f} KB)")

    print("\n📖 Preview of the latest subtitle file:")
    latest_srt = max(srt_files, key=lambda p: p.stat().st_mtime)
    try:
        with open(latest_srt, 'r', encoding='utf-8') as f:
            content = f.read()
            preview_html = f"""<div style='background: #f8f9fa; border: 1px solid #e1e4e8; padding: 15px; border-radius: 5px; max-height: 400px; overflow-y: auto; font-family: monospace; white-space: pre-wrap;'>
            <b>--- Previewing: {latest_srt.name} ---</b><br><br>
            {content}
            </div>"""
        display(HTML(preview_html))
    except Exception as e:
        print(f"Could not read file for preview: {e}")

## 💡 Tips & Troubleshooting

<details>
<summary><strong>❓ No media files found or processed?</strong></summary>
<ul>
  <li>Ensure you have uploaded your video/audio files to the correct folder in your Google Drive: <code>My Drive/WhisperJAV/</code>.</li>
  <li>Check the output of the first cell for any error messages related to file access.</li>
  <li>Re-run the first cell (<code>1️⃣ Configure & Run</code>) to have it scan for files again.</li>
</ul>
</details>

<details>
<summary><strong>🧠 Understanding the Core Options</strong></summary>
<ul>
  <li><strong>Mode:</strong> <code>balanced</code> is recommended for the best quality. Use <code>fast</code> or <code>faster</code> for quicker results on lower-priority content, but expect lower accuracy.</li>
  <li><strong>Sensitivity:</strong> <code>balanced</code> is usually fine. Use <code>aggressive</code> if the audio is very quiet or has lots of background noise, but be prepared for more 'hallucinated' or incorrect text. Use <code>conservative</code> for very clear audio to reduce errors.</li>
</ul>
</details>

<details>
<summary><strong>🔌 Session disconnected or out of memory?</strong></summary>
<ul>
  <li>Colab has time limits, especially on the free tier. Long processing jobs can be disconnected. Keeping the browser tab active can help.</li>
  <li>If you see an 'Out of Memory' error, the video file might be too long for the available GPU RAM. Try restarting the session via <code>Runtime → Disconnect and delete runtime</code> and running again.</li>
</ul>
</details>