# WhisperJAV Colab Installation Diagnostic

This notebook tests the `install_colab.sh` script by cloning the repo and running locally.

**Instructions:**
1. Run each cell **one at a time** (Shift+Enter)
2. Note which cell fails and what error message you see
3. Report findings for debugging

## Cell 1: Check Colab Environment

In [None]:
# Check GPU, CUDA, and Python environment
import subprocess
import sys

print("="*60)
print("COLAB ENVIRONMENT CHECK")
print("="*60)

# GPU Info
result = subprocess.run(
    ["nvidia-smi", "--query-gpu=name,driver_version,memory.total", "--format=csv,noheader"],
    capture_output=True, text=True
)
if result.returncode == 0:
    print(f"GPU: {result.stdout.strip()}")
else:
    print("ERROR: nvidia-smi failed - no GPU available?")
    print("Go to Runtime → Change runtime type → T4 GPU")
    raise SystemExit("No GPU detected")

print(f"\nPython: {sys.version}")

# Check Colab's pre-installed PyTorch
try:
    import torch
    print(f"\nColab PyTorch: {torch.__version__}")
    print(f"Colab PyTorch CUDA: {torch.version.cuda}")
    print(f"CUDA available: {torch.cuda.is_available()}")
except ImportError:
    print("\nPyTorch not pre-installed (unusual for Colab)")

# Check numpy version (this is the conflict source)
try:
    import numpy as np
    print(f"\nColab numpy: {np.__version__}")
    if np.__version__.startswith("2."):
        print("⚠ numpy 2.x detected - this is why we need an isolated venv")
except ImportError:
    print("numpy not installed")

print("\n" + "="*60)
print("✓ Environment check: PASSED")

## Cell 2: Clone Repo & Run Installer

In [None]:
# Clone WhisperJAV repo and run installer locally
# Uses Popen for real-time output streaming in Jupyter

import subprocess
import os
import sys
import time

REPO_URL = "https://github.com/meizhong986/WhisperJAV.git"
REPO_PATH = "/content/WhisperJAV"
SCRIPT_PATH = f"{REPO_PATH}/installer/install_colab.sh"

print("="*60)
print("INSTALL WHISPERJAV")
print("="*60)

# Step 1: Clone repo (or pull if exists)
if os.path.exists(REPO_PATH):
    print(f"Repo already exists at {REPO_PATH}")
    print("Pulling latest changes...")
    result = subprocess.run(["git", "-C", REPO_PATH, "pull"], capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Warning: git pull failed: {result.stderr}")
    else:
        print(f"✓ Pulled latest")
else:
    print(f"Cloning {REPO_URL}...")
    result = subprocess.run(["git", "clone", REPO_URL, REPO_PATH], capture_output=True, text=True)
    if result.returncode != 0:
        print(f"ERROR: git clone failed")
        print(result.stderr)
        raise SystemExit("Failed to clone repository")
    print(f"✓ Cloned to {REPO_PATH}")

# Step 2: Verify script exists
if not os.path.exists(SCRIPT_PATH):
    print(f"\nERROR: Install script not found at {SCRIPT_PATH}")
    print("\nThis means the script hasn't been committed to the repo yet.")
    print("The install_colab.sh must be pushed to GitHub first.")
    raise SystemExit("Install script not found in repo")

print(f"✓ Script found: {SCRIPT_PATH}")

# Step 3: Run installer with real-time output streaming
print("\n" + "-"*60)
print("Running install_colab.sh...")
print("-"*60 + "\n")
sys.stdout.flush()

start_time = time.time()

# Use Popen for real-time output streaming in Jupyter
env = {**os.environ, "PATH": f"{os.environ.get('PATH', '')}:{os.path.expanduser('~/.local/bin')}"}
process = subprocess.Popen(
    ["bash", SCRIPT_PATH],
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    bufsize=1,
    text=True,
    env=env
)

# Stream output line by line
for line in iter(process.stdout.readline, ''):
    print(line, end='', flush=True)

process.wait()
elapsed = time.time() - start_time

print("\n" + "="*60)
if process.returncode != 0:
    print(f"ERROR: Installation FAILED (exit code {process.returncode})")
    print("="*60)
    raise SystemExit(f"Installation failed with exit code {process.returncode}")
else:
    print(f"✓ Installation SUCCEEDED ({elapsed:.0f} seconds)")
    print("="*60)

## Cell 3: Verify Venv Structure

In [None]:
# Verify the venv was created correctly
import os

VENV_PATH = "/content/whisperjav_env"

print("="*60)
print("VENV STRUCTURE CHECK")
print("="*60)

checks = [
    (f"{VENV_PATH}/bin/python", "Python interpreter"),
    (f"{VENV_PATH}/bin/pip", "pip"),
    (f"{VENV_PATH}/bin/whisperjav", "whisperjav CLI"),
    (f"{VENV_PATH}/bin/whisperjav-translate", "whisperjav-translate CLI"),
]

failed = []
for path, name in checks:
    exists = os.path.exists(path)
    status = "✓" if exists else "✗"
    print(f"{status} {name}: {path}")
    if not exists:
        failed.append(name)

print("\n" + "="*60)
if failed:
    print(f"ERROR: Venv structure check FAILED")
    print(f"Missing: {', '.join(failed)}")
    print("="*60)
    raise SystemExit("Venv structure incomplete")
else:
    print("✓ Venv structure: PASSED")

## Cell 4: Verify Python Packages

In [None]:
# Verify packages are installed in the venv
import subprocess

VENV_PYTHON = "/content/whisperjav_env/bin/python"

print("="*60)
print("VENV PACKAGE VERIFICATION")
print("="*60)

packages_to_check = [
    ("torch", "import torch; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}, available={torch.cuda.is_available()}')"),
    ("numpy", "import numpy; print(f'numpy {numpy.__version__}')"),
    ("whisperjav", "import whisperjav; print('whisperjav OK')"),
    ("whisper", "import whisper; print('whisper OK')"),
    ("stable_whisper", "import stable_whisper; print('stable_whisper OK')"),
    ("faster_whisper", "import faster_whisper; print('faster_whisper OK')"),
]

failed = []
for name, check_cmd in packages_to_check:
    result = subprocess.run([VENV_PYTHON, "-c", check_cmd], capture_output=True, text=True)
    if result.returncode == 0:
        print(f"✓ {name}: {result.stdout.strip()}")
    else:
        print(f"✗ {name}: FAILED")
        if result.stderr:
            print(f"  Error: {result.stderr.strip()[:200]}")
        failed.append(name)

# Verify numpy is < 2.0 in venv
print("\n" + "-"*60)
result = subprocess.run(
    [VENV_PYTHON, "-c", "import numpy; v=numpy.__version__; print(v); exit(0 if v.startswith('1.') else 1)"],
    capture_output=True, text=True
)
if result.returncode == 0:
    print(f"✓ numpy version: {result.stdout.strip()} (< 2.0 as required)")
else:
    print(f"✗ numpy version: {result.stdout.strip()} (expected < 2.0)")
    failed.append("numpy version")

print("\n" + "="*60)
if failed:
    print(f"ERROR: Package verification FAILED: {', '.join(failed)}")
    raise SystemExit("Package verification failed")
else:
    print("✓ Package verification: PASSED")

## Cell 5: Test CLI

In [None]:
# Test the CLI commands
import subprocess

VENV_PATH = "/content/whisperjav_env"

print("="*60)
print("CLI TEST")
print("="*60)

result = subprocess.run([f"{VENV_PATH}/bin/whisperjav", "--help"], capture_output=True, text=True)
if result.returncode == 0:
    print("whisperjav --help:")
    print("\n".join(result.stdout.strip().split("\n")[:15]))
    print("...\n")
else:
    print(f"ERROR: whisperjav --help failed")
    raise SystemExit("CLI test failed")

result = subprocess.run([f"{VENV_PATH}/bin/whisperjav-translate", "--help"], capture_output=True, text=True)
if result.returncode == 0:
    print("whisperjav-translate --help:")
    print("\n".join(result.stdout.strip().split("\n")[:10]))
    print("...")
else:
    print(f"ERROR: whisperjav-translate --help failed")
    raise SystemExit("CLI test failed")

print("\n" + "="*60)
print("✓ CLI test: PASSED")

## Cell 6: Check llama-cpp-python

In [None]:
# Check llama-cpp-python (optional component)
import subprocess

VENV_PYTHON = "/content/whisperjav_env/bin/python"

print("="*60)
print("LLAMA-CPP-PYTHON CHECK")
print("="*60)

result = subprocess.run(
    [VENV_PYTHON, "-c", "import llama_cpp; print(f'Version: {llama_cpp.__version__}')"],
    capture_output=True, text=True
)

if result.returncode == 0:
    print(f"✓ llama-cpp-python: {result.stdout.strip()}")
    print("\n" + "="*60)
    print("✓ llama-cpp-python: INSTALLED")
else:
    print("⚠ llama-cpp-python: NOT INSTALLED")
    print("")
    print("This is expected if no prebuilt wheel was available.")
    print("Local LLM translation won't work, but cloud providers will.")
    print("\n" + "="*60)
    print("⚠ llama-cpp-python: SKIPPED (optional)")

## Cell 7: Summary Report

In [None]:
# Generate summary report
import subprocess
import sys

VENV_PYTHON = "/content/whisperjav_env/bin/python"

print("="*60)
print("INSTALLATION SUMMARY")
print("="*60)

# Get driver info
result = subprocess.run(
    ["nvidia-smi", "--query-gpu=driver_version,name", "--format=csv,noheader"],
    capture_output=True, text=True
)
if result.returncode == 0:
    parts = result.stdout.strip().split(", ")
    print(f"Driver: {parts[0]}")
    print(f"GPU: {parts[1] if len(parts) > 1 else 'unknown'}")

print(f"Colab Python: {sys.version.split()[0]}")

# Venv info
venv_info = subprocess.run(
    [VENV_PYTHON, "-c", """
import torch
import numpy
print(f"Venv PyTorch: {torch.__version__}")
print(f"Venv PyTorch CUDA: {torch.version.cuda}")
print(f"Venv numpy: {numpy.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
"""],
    capture_output=True, text=True
)
print(venv_info.stdout)

print("="*60)
print("\n✓ ALL DIAGNOSTICS COMPLETE")
print("\nTo use WhisperJAV:")
print("  /content/whisperjav_env/bin/whisperjav <video_path>")