<a href="https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# WhisperJAV - Google Colab Edition 🎌
Generate Subtitles for Japanese Adult Videos using Free GPU
1. Make sure your audios are in drive folder WhisperJAV
2. If you want to change the default settings use below form
3. Click `Runtime` → `Run all` in the menu to start everything.
4. **Connect Google Drive** when prompted, then continue, continue.


In [None]:
#@title Configuration & Settings


#@markdown Choose accuracy vs. speed level
mode = "faster"  #@param ["balanced", "fast", "faster"]

#@markdown Select details vs. noise tolerance level
sensitivity = "balanced"  #@param ["balanced", "aggressive", "conservative"]

#@markdown Select target subtitle language
subs_language = "japanese"  #@param ["japanese", "english-direct"]



adaptive_classification = False
adaptive_audio_enhancement = False
smart_postprocessing = True
opening_credits = "Subtitles by yourname" #@param {type:"string"}
closing_credits_text = "Subs by WhisperJAV Colab"

#@markdown ---
#@markdown 🔌 Session Management *Automatically disconnect when finished to save GPU credits.*
auto_disconnect = True #@param {type:"boolean"}
#@markdown ---

#===============================================================================
#  ✅ END OF CONFIGURATION - THE REST OF THE NOTEBOOK IS AUTOMATED
#===============================================================================
import os
import sys
import subprocess
import shlex
import time
from pathlib import Path
import html
from google.colab import drive
from IPython.display import display, HTML
from tqdm.notebook import tqdm

print("--- STEP 1: PRE-FLIGHT CHECKS ---")
!nvidia-smi --query-gpu=name,driver_version,memory.total,memory.used --format=csv,noheader
print("✅ GPU check complete.\n")

print("--- STEP 2: CONNECTING GOOGLE DRIVE ---")
try:
    drive.mount('/content/drive', force_remount=True)
    drive_folder = Path('/content/drive/MyDrive/WhisperJAV')
    drive_folder.mkdir(exist_ok=True)
    print(f"✅ Google Drive connected. Using folder: {drive_folder}\n")
except Exception as e:
    display(HTML(f'<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Failed to connect Google Drive.</h3><p style=\"color: #721c24;\">Please re-run the cell and ensure you accept the authorization pop-up.</p></div>'))
    sys.exit()

# === Faithful Integration of Your Installation Snippet ===
import torch

def print_step_html(step_num, title):
    display(HTML(f"""
    <div style='margin-top:16px;margin-bottom:8px'>
        <span style='font-weight:bold;color:#1f77b4'>Step {step_num}:</span>
        <span style='margin-left:8px'>{title}</span>
    </div>
    """))

def print_status_html(success, message, duration=None):
    color = "#2ecc71" if success else "#e74c3c"
    icon = "✔" if success else "✖"
    time_str = f" <span style='color:#7f8c8d;font-size:0.9em'>({duration:.1f}s)</span>" if duration else ""
    display(HTML(f"""
    <div style='margin-left:24px;margin-bottom:4px'>
        <span style='color:{color};font-weight:bold'>{icon}</span>
        <span style='margin-left:6px'>{message}{time_str}</span>
    </div>
    """))

def run_install_command(command, success_msg, error_msg):
    start_time = time.time()
    try:
        result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
        print_status_html(True, success_msg, time.time()-start_time)
        return True
    except subprocess.CalledProcessError as e:
        print_status_html(False, f"{error_msg}")
        print(f"--- DETAILED ERROR LOG FOR '{success_msg}' ---\n{e.stderr.strip()}\n-------------------------------------", file=sys.stderr)
        return False

def verify_pytorch():
    try:
        import torch
        import torchvision
        import torchaudio
        print_status_html(True, f"PyTorch {torch.__version__}, TorchVision {torchvision.__version__}, and TorchAudio {torchaudio.__version__} found.")
        if torch.cuda.is_available():
            print_status_html(True, f"CUDA {torch.version.cuda} is available.")
        else:
            print_status_html(False, "CUDA torch not available. This notebook requires a GPU-enabled PyTorch.")
            return False
        return True
    except ImportError as e:
        print_status_html(False, f"PyTorch check failed: {str(e)}")
        return False


def install_whisperjav():
    display(HTML("<h3 style='color:#1f77b4; border-bottom: 1px solid #ccc; padding-bottom: 5px;'>STEP 3: Installing Dependencies <span style='font-size: 0.9em; font-weight: normal;'>Takes about 3min ☕ Coffee break</span></h3>"))


    # Rest of your function code...
    print_step_html(3.1, "Verifying PyTorch installation")
    if not verify_pytorch():
        print_status_html(False, "Cannot proceed without a valid PyTorch environment.")
        return False

    print_step_html(3.2, "Installing system packages")
    if not run_install_command("apt-get update -qq && apt-get install -y -qq portaudio19-dev ffmpeg", "System packages installed", "Failed to install system packages"):
        return False

    print_step_html(3.3, "Installing Python utilities")
    if not run_install_command("pip install -q ffmpeg-python soundfile auditok numpy scipy tqdm pysrt srt numba pyaudio", "Utilities installed", "Failed to install utilities"):
        return False

    print_step_html(3.4, "Installing core components ... be patient")
    dependencies = [
        ("openai-whisper@git+https://github.com/openai/whisper@v20231117", "OpenAI Whisper"),
        ("faster-whisper", "Faster Whisper"),
        ("stable-ts@git+https://github.com/meizhong986/stable-ts-fix-setup.git@main", "Stable TS")
    ]
    for pkg, name in dependencies:
        if not run_install_command(f"pip install -q {pkg}", f"{name} installed", f"Failed to install {name}"):
            return False

    print_step_html(3.5, "Installing WhisperJAV main package")
    if not run_install_command("pip install --no-deps --ignore-installed -q git+https://github.com/meizhong986/WhisperJAV.git", "WhisperJAV installed", "Failed to install WhisperJAV"):
        return False

    return True

if not install_whisperjav():
    display(HTML("<h3 style='color:#e74c3c'>✖ Installation Failed. Notebook has been halted.</h3>"))
    sys.exit()
else:
    display(HTML("<h3 style='color:#2ecc71'>✔ Installation Completed Successfully</h3>"))
    print("\n")
# === End of Snippet Integration ===

print("--- STEP 4: PREPARING TO RUN WHISPERJAV TRANSCRIPTION ---")
# If user doesn't change the example, treat it as empty
if opening_credits == "Subtitles by yourname": opening_credits = ""

# Build the command robustly as a list of arguments
command_list = [
    'whisperjav',
    str(drive_folder)
]

options = {
    '--mode': mode,
    '--sensitivity': sensitivity,
    '--subs-language': subs_language,
    '--output-dir': str(drive_folder),
    '--adaptive-classification': adaptive_classification,
    '--adaptive-audio-enhancement': adaptive_audio_enhancement,
    '--smart-postprocessing': smart_postprocessing
}

for flag, value in options.items():
    if isinstance(value, bool):
        if value:
            command_list.append(flag)
    elif value:
        command_list.append(flag)
        command_list.append(str(value))

# Join the list into a shell-safe string to be used with Popen(shell=True)
full_command = shlex.join(command_list)
print(f"Executing command: {full_command}\n")

# Execute with live output and robust error handling
try:
    with subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, universal_newlines=True) as process:
        for line in process.stdout:
            print(line, end='')

    if process.returncode != 0:
        raise subprocess.CalledProcessError(process.returncode, process.args)

except subprocess.CalledProcessError as e:
    error_message = f"The main process failed with exit code {e.returncode}."
    display(HTML(f'''<div style=\"background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 8px; padding: 20px;\"><h3 style=\"color: #721c24;\">❌ ERROR: Transcription Failed</h3><p style=\"color: #721c24;\">{html.escape(error_message)} Please check the console output above for the specific error from the script.</p></div>'''))
    sys.exit()

print("\n--- STEP 5: POST-PROCESSING CREDITS ---")
srt_files = list(drive_folder.glob('*.srt'))

if opening_credits or closing_credits_text:
    for srt_file in tqdm(srt_files, desc="Adding Credits"):
        try:
            # Read the original content
            original_content = srt_file.read_text(encoding='utf-8')

            # Initialize the content with the original
            new_content = original_content

            # Add opening credits if needed
            if opening_credits:
                prologue_line = f"0\n00:00:00,000 --> 00:00:00,500\n{opening_credits}\n\n"
                new_content = prologue_line + new_content

            # Add closing credits if needed
            if closing_credits_text:
                closing_line = f"\n9999\n23:59:58,000 --> 23:59:59,000\n{closing_credits_text}\n"
                new_content += closing_line

            # Write the updated content back to the file
            srt_file.write_text(new_content, encoding='utf-8')

        except Exception as e:
            print(f"   - Warning: Could not add credits to {srt_file.name}: {e}")
print("✅ Post-processing complete.\n")

display(HTML("""<div style=\"background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 8px; padding: 20px; margin-top: 20px;\"><h3 style=\"color: #155724; margin-top: 0;\">🎉 Success! All tasks are complete.</h3><p style=\"color: #155724; margin-bottom: 0;\">The session will now disconnect automatically if you enabled the option.</p></div>"""))

time.sleep(5)  # Add a delay to ensure all file operations are completed

if auto_disconnect:
    print("\n🔌 Auto-disconnect enabled. This session will now end to save resources.")
    time.sleep(10)
    from google.colab import runtime
    runtime.unassign()