In [2]:
!nvidia-smi

Sat Jan 31 15:21:40 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.105.08             Driver Version: 580.105.08     CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

Environment:
- Platform: Kaggle Notebooks (Free Tier)
- GPU: NVIDIA Tesla T4 (16GB VRAM)
- Session Type: Ephemeral


In [3]:
import subprocess
import os
import sys
import time
import shutil
from getpass import getpass

def main():
    print("=" * 65)
    print("  SOVEREIGN HYBRID AI - KAGGLE DEPLOYMENT v1.0")
    print("  Estimated time: 15-25 minutes")
    print("=" * 65)

    # Get token first (before any work)
    print("\nüîê Enter your Ngrok auth token:")
    print("   (Get it from: https://dashboard.ngrok.com/get-started/your-authtoken)")
    NGROK_TOKEN = getpass("   Token: ")
    
    if len(NGROK_TOKEN) < 20:
        print("   ‚úó Invalid token. Please get a valid token from ngrok.com")
        return
    
    print("\n" + "-" * 65)

    # =========================================================================
    # STEP 1: SYSTEM DEPENDENCIES
    # =========================================================================
    print("\n‚öôÔ∏è [1/7] Installing system dependencies...")
    
    subprocess.run(["apt-get", "update", "-qq"], capture_output=True)
    
    # Install zstd - CRITICAL for Ollama extraction
    result = subprocess.run(
        ["apt-get", "install", "-y", "-qq", "zstd"],
        capture_output=True
    )
    
    # Verify zstd is installed
    if shutil.which("zstd"):
        print("   ‚úì zstd installed")
    else:
        print("   ‚úó zstd installation failed!")
        return
    
    # Install fuser for port management
    subprocess.run(["apt-get", "install", "-y", "-qq", "psmisc"], capture_output=True)
    print("   ‚úì System dependencies ready")

    # =========================================================================
    # STEP 2: PYTHON PACKAGES (with all fixes)
    # =========================================================================
    print("\nüì¶ [2/7] Installing Python packages (3-5 minutes)...")
    
    # Upgrade pip
    subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip", "-q"], 
                   capture_output=True)
    print("   ‚úì pip upgraded")
    
    # Install pyngrok
    subprocess.run([sys.executable, "-m", "pip", "install", "pyngrok", "-q"], 
                   capture_output=True)
    print("   ‚úì pyngrok installed")
    
    # KAGGLE: May not have cupy issues, but remove just in case
    subprocess.run([sys.executable, "-m", "pip", "uninstall", "cupy-cuda12x", "cupy", "-y"], 
                   capture_output=True)
    print("   ‚úì Removed potential conflicting packages")
    
    # Install open-webui
    result = subprocess.run(
        [sys.executable, "-m", "pip", "install", "open-webui", "-q"],
        capture_output=True, text=True
    )
    print("   ‚úì open-webui installed")
    
    # Fix alembic (PriorityDispatchResult error)
    subprocess.run([
        sys.executable, "-m", "pip", "install", 
        "alembic>=1.15.0", "--force-reinstall", "-q"
    ], capture_output=True)
    print("   ‚úì alembic fixed")
    
    # Fix transformers (PreTrainedModel error)
    subprocess.run([
        sys.executable, "-m", "pip", "install", 
        "transformers>=4.40.0", "--force-reinstall", "-q"
    ], capture_output=True)
    print("   ‚úì transformers fixed")
    
    # Fix srsly (cupy dependency chain)
    subprocess.run([
        sys.executable, "-m", "pip", "install", 
        "srsly", "--force-reinstall", "-q"
    ], capture_output=True)
    print("   ‚úì srsly fixed")

    # =========================================================================
    # STEP 3: INSTALL OLLAMA
    # =========================================================================
    print("\nü¶ô [3/7] Installing Ollama...")
    
    OLLAMA_URL = "https://ollama.com/download/ollama-linux-amd64.tar.zst"
    
    # Clean previous installation
    subprocess.run(["rm", "-rf", "/usr/lib/ollama", "/usr/bin/ollama"], capture_output=True)
    
    # Method 1: Pipe directly
    result = subprocess.run(
        f'curl -fsSL "{OLLAMA_URL}" | tar -x --zstd -C /usr',
        shell=True, capture_output=True, text=True, timeout=300
    )
    
    # If failed, try Method 2: Download then extract
    if not shutil.which("ollama"):
        print("   ‚Üí Primary method failed, trying alternative...")
        subprocess.run([
            "curl", "-fsSL", "-o", "/tmp/ollama.tar.zst", OLLAMA_URL
        ], capture_output=True, timeout=300)
        
        subprocess.run([
            "zstd", "-d", "/tmp/ollama.tar.zst", "-o", "/tmp/ollama.tar"
        ], capture_output=True)
        
        subprocess.run([
            "tar", "-xf", "/tmp/ollama.tar", "-C", "/usr"
        ], capture_output=True)
        
        # Cleanup
        subprocess.run(["rm", "-f", "/tmp/ollama.tar.zst", "/tmp/ollama.tar"], capture_output=True)
    
    # Verify
    if shutil.which("ollama"):
        version = subprocess.run(["ollama", "-v"], capture_output=True, text=True)
        print(f"   ‚úì Ollama installed")
    else:
        print("   ‚úó Ollama installation failed!")
        print("   This might be a temporary network issue. Try again in a few minutes.")
        return
    
    # KAGGLE: Use persistent storage for Ollama models
    OLLAMA_HOME = "/kaggle/working/.ollama"
    os.makedirs(OLLAMA_HOME, exist_ok=True)
    os.environ["OLLAMA_MODELS"] = OLLAMA_HOME

    # =========================================================================
    # STEP 4: START OLLAMA SERVER
    # =========================================================================
    print("\nüöÄ [4/7] Starting Ollama server...")
    
    env = os.environ.copy()
    env["OLLAMA_HOST"] = "0.0.0.0:11434"
    env["OLLAMA_ORIGINS"] = "*"
    env["OLLAMA_MODELS"] = OLLAMA_HOME  # KAGGLE: Persist models
    
    subprocess.Popen(
        ["ollama", "serve"],
        env=env,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        start_new_session=True
    )
    
    # Wait for Ollama to be ready
    import requests
    for i in range(30):
        try:
            requests.get("http://localhost:11434", timeout=2)
            print("   ‚úì Ollama server running")
            break
        except:
            time.sleep(2)
    else:
        print("   ‚úó Ollama failed to start")
        return

    # =========================================================================
    # STEP 5: PULL AI MODELS
    # =========================================================================
    print("\nüì• [5/7] Pulling AI models (this takes 10-20 minutes)...")
    print("        Each model is 4-5 GB. Please be patient.\n")
    print("        ‚≠ê KAGGLE BONUS: Models save to /kaggle/working/.ollama")
    print("           Download once, reuse in future sessions!\n")
    
    MODELS = [
        ("deepseek-r1:8b", "Reasoning & Logic"),
        ("qwen2.5-coder:7b", "Code Generation"),
        ("dolphin-mistral", "Uncensored/Creative"),
    ]
    
    for i, (model, description) in enumerate(MODELS, 1):
        print(f"   [{i}/{len(MODELS)}] Pulling {model} ({description})...")
        result = subprocess.run(
            ["ollama", "pull", model],
            capture_output=True, text=True, timeout=1800,
            env=env
        )
        if result.returncode == 0:
            print(f"   ‚úì {model} ready")
        else:
            print(f"   ‚ö† {model} may have issues: {result.stderr[:100]}")

    # =========================================================================
    # STEP 6: START OPEN WEBUI
    # =========================================================================
    print("\nüåê [6/7] Starting Open WebUI...")
    
    # Kill any existing processes on port 8080
    subprocess.run("fuser -k 8080/tcp 2>/dev/null", shell=True, capture_output=True)
    time.sleep(2)
    
    import secrets
    
    # KAGGLE: Use persistent storage for WebUI data
    WEBUI_DATA_DIR = "/kaggle/working/webui_data"
    
    webui_env = os.environ.copy()
    webui_env.update({
        "WEBUI_SECRET_KEY": secrets.token_urlsafe(32),
        "WEBUI_AUTH": "False",
        "CORS_ALLOW_ORIGIN": "*",
        "OLLAMA_BASE_URL": "http://localhost:11434",
        "ENABLE_SIGNUP": "False",
        "DATA_DIR": WEBUI_DATA_DIR,
    })
    
    os.makedirs(WEBUI_DATA_DIR, exist_ok=True)
    
    webui_proc = subprocess.Popen(
        ["open-webui", "serve", "--host", "0.0.0.0", "--port", "8080"],
        env=webui_env,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        start_new_session=True
    )
    
    print(f"   ‚úì Started (PID: {webui_proc.pid})")
    print("   ‚Üí Waiting for database migrations...")
    
    # Wait for WebUI to be ready
    for i in range(90):
        # Check if process died
        if webui_proc.poll() is not None:
            print("   ‚úó Open WebUI crashed!")
            return
        
        try:
            r = requests.get("http://localhost:8080", timeout=2)
            if r.status_code in [200, 302, 307]:
                print(f"   ‚úì Open WebUI ready!")
                break
        except:
            pass
        
        if i % 15 == 0 and i > 0:
            print(f"     ... still initializing ({i*2}s)")
        time.sleep(2)
    else:
        print("   ‚úó Open WebUI timeout")
        return

    # =========================================================================
    # STEP 7: CREATE PUBLIC TUNNEL
    # =========================================================================
    print("\nüåç [7/7] Creating public tunnel...")
    
    from pyngrok import ngrok, conf
    
    conf.get_default().auth_token = NGROK_TOKEN
    ngrok.kill()  # Kill any existing tunnels
    time.sleep(2)
    
    try:
        # CRITICAL: host_header="rewrite" fixes 404 errors
        tunnel = ngrok.connect(
            8080,
            "http",
            bind_tls=True,
            host_header="rewrite"
        )
        public_url = tunnel.public_url
        print("   ‚úì Tunnel established!")
    except Exception as e:
        print(f"   ‚úó Tunnel failed: {e}")
        return

    # =========================================================================
    # SUCCESS!
    # =========================================================================
    print("\n" + "=" * 65)
    print("  üéâ SOVEREIGN HYBRID AI SYSTEM IS ONLINE!")
    print("=" * 65)
    print()
    print(f"  üîó ACCESS URL: {public_url}")
    print()
    print("  üß† AVAILABLE MODELS:")
    print("     ‚Ä¢ deepseek-r1:8b     ‚Üí Reasoning & Logic")
    print("     ‚Ä¢ qwen2.5-coder:7b   ‚Üí Code Generation")
    print("     ‚Ä¢ dolphin-mistral    ‚Üí Uncensored/Creative")
    print()
    print("  ‚≠ê KAGGLE ADVANTAGES:")
    print("     ‚Ä¢ Models saved to /kaggle/working/.ollama (persistent!)")
    print("     ‚Ä¢ Data saved to /kaggle/working/webui_data")
    print("     ‚Ä¢ More stable than Colab - less frequent disconnections")
    print()
    print("  ‚ö†Ô∏è  IMPORTANT:")
    print("     ‚Ä¢ Keep this cell running to maintain access")
    print("     ‚Ä¢ Session lasts much longer than Colab")
    print("     ‚Ä¢ Bookmark the URL for this session")
    print()
    print("=" * 65)

    # Keep the script running to maintain the tunnel
    try:
        while True:
            time.sleep(60)
            # Optional: Add health check
            try:
                requests.get("http://localhost:8080", timeout=5)
            except:
                print("‚ö† WebUI health check failed")
    except KeyboardInterrupt:
        print("\nüõë Shutting down...")
        ngrok.kill()
        print("   ‚úì Tunnel closed")
        print("   ‚úì Shutdown complete")

# Run the deployment
if __name__ == "__main__":
    main()


  SOVEREIGN HYBRID AI - KAGGLE DEPLOYMENT v1.0
  Estimated time: 15-25 minutes

üîê Enter your Ngrok auth token:
   (Get it from: https://dashboard.ngrok.com/get-started/your-authtoken)


   Token:  ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑



-----------------------------------------------------------------

‚öôÔ∏è [1/7] Installing system dependencies...
   ‚úì zstd installed
   ‚úì System dependencies ready

üì¶ [2/7] Installing Python packages (3-5 minutes)...
   ‚úì pip upgraded
   ‚úì pyngrok installed
   ‚úì Removed potential conflicting packages
   ‚úì open-webui installed
   ‚úì alembic fixed
   ‚úì transformers fixed
   ‚úì srsly fixed

ü¶ô [3/7] Installing Ollama...
   ‚úì Ollama installed

üöÄ [4/7] Starting Ollama server...
   ‚úì Ollama server running

üì• [5/7] Pulling AI models (this takes 10-20 minutes)...
        Each model is 4-5 GB. Please be patient.

        ‚≠ê KAGGLE BONUS: Models save to /kaggle/working/.ollama
           Download once, reuse in future sessions!

   [1/3] Pulling deepseek-r1:8b (Reasoning & Logic)...
   ‚úì deepseek-r1:8b ready
   [2/3] Pulling qwen2.5-coder:7b (Code Generation)...
   ‚úì qwen2.5-coder:7b ready
   [3/3] Pulling dolphin-mistral (Uncensored/Creative)...
   ‚úì dolp

In [None]:
Note:
This notebook serves as an execution log for empirical validation.
The full reference deployment script is maintained in the public GitHub
repository and represents the authoritative reproducible implementation.
