# Agentic Framework — Fully Automatic Google Colab Deployment

**One-click deployment**: Just click **Runtime → Run all** (or `Ctrl+F9`) and everything will start automatically.

### What this does
1. Verifies GPU (H100/A100) and system resources
2. Installs system dependencies (PostgreSQL, Redis, Node.js 22, MinIO)
3. Installs Ollama + pulls DeepSeek R1 14B (GPU-accelerated)
4. Clones the repo and installs Python packages
5. Starts all infrastructure (PostgreSQL, Redis, ChromaDB, MinIO)
6. Starts all 5 microservices + dashboard
7. Creates ngrok tunnels for external access
8. Runs health checks
9. Keeps the session alive so Colab doesn't disconnect

### Prerequisites
- Google Colab **Pro** account (for GPU access)
- Runtime set to **GPU** (Runtime → Change runtime type → T4/A100/H100)

---

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  CONFIGURATION — Edit these before running                  ║
# ╚══════════════════════════════════════════════════════════════╝

# GitHub repo to clone
REPO_URL = "https://github.com/landonking-gif/ai_final.git"

# (Optional) Set your ngrok auth token for stable URLs
# Get one free at https://dashboard.ngrok.com/signup
NGROK_AUTH_TOKEN = ""  # Leave empty to skip

# LLM model to use (pulled via Ollama)
PRIMARY_MODEL = "deepseek-r1:14b"
FALLBACK_MODEL = "llama3.2:3b"

# Whether to start the React dashboard (adds ~30s startup)
START_DASHBOARD = True

# Whether to create ngrok tunnel for external access
ENABLE_NGROK = True

print("Configuration loaded. Running full deployment...")

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 1: System Check & Dependencies                      ║
# ╚══════════════════════════════════════════════════════════════╝
import subprocess, os, sys, shutil, time

def run_cmd(cmd, desc="", check=False):
    """Run a shell command with status output."""
    if desc:
        print(f"  [{desc}]", end=" ", flush=True)
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if desc:
        print("OK" if result.returncode == 0 else f"WARN ({result.stderr[:120]})")
    if check and result.returncode != 0:
        raise RuntimeError(f"{desc} failed: {result.stderr[:300]}")
    return result

print("=" * 60)
print("PHASE 1: SYSTEM CHECK & DEPENDENCY INSTALL")
print("=" * 60)

# --- GPU Check ---
gpu_check = subprocess.run(
    ["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader"],
    capture_output=True, text=True
)
if gpu_check.returncode == 0:
    print(f"  [GPU] {gpu_check.stdout.strip()}")
else:
    print("  [GPU] No GPU detected — LLM inference will be slow on CPU!")
    print("         Go to Runtime > Change runtime type > GPU")

# --- RAM & Disk ---
try:
    import psutil
    ram_gb = psutil.virtual_memory().total / (1024**3)
    print(f"  [RAM] {ram_gb:.1f} GB")
except ImportError:
    pass
disk = shutil.disk_usage("/")
print(f"  [Disk] {disk.free / (1024**3):.1f} GB free")
print(f"  [Python] {sys.version.split()[0]}")

# --- Install System Dependencies ---
print("\n  Installing system packages...")
run_cmd("apt-get update -qq 2>/dev/null", "apt update")
run_cmd("apt-get install -y -qq postgresql postgresql-client redis-server build-essential libpq-dev > /dev/null 2>&1", "PostgreSQL + Redis + build tools")

# Node.js 22
run_cmd("curl -fsSL https://deb.nodesource.com/setup_22.x | bash - > /dev/null 2>&1", "Node.js 22 repo")
run_cmd("apt-get install -y -qq nodejs > /dev/null 2>&1", "Node.js 22")

# MinIO binary
run_cmd("wget -q https://dl.min.io/server/minio/release/linux-amd64/minio -O /usr/local/bin/minio && chmod +x /usr/local/bin/minio", "MinIO")

node_ver = subprocess.run("node --version", shell=True, capture_output=True, text=True)
print(f"  [Node.js] {node_ver.stdout.strip()}")
print("\n  Phase 1 complete.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 2: Ollama + LLM Models (GPU-Accelerated)            ║
# ╚══════════════════════════════════════════════════════════════╝
import subprocess, os, time

print("=" * 60)
print("PHASE 2: OLLAMA + LLM MODEL SETUP")
print("=" * 60)

# Install Ollama
print("  Installing Ollama...", end=" ", flush=True)
result = subprocess.run("curl -fsSL https://ollama.com/install.sh | sh",
                        shell=True, capture_output=True, text=True)
print("OK" if result.returncode == 0 else f"WARN: {result.stderr[:200]}")

# Start Ollama server in background
print("  Starting Ollama server...", end=" ", flush=True)
os.environ["OLLAMA_HOST"] = "0.0.0.0:11434"
subprocess.Popen(
    ["ollama", "serve"],
    stdout=open("/tmp/ollama.log", "w"),
    stderr=subprocess.STDOUT,
    env={**os.environ, "OLLAMA_HOST": "0.0.0.0:11434"}
)
time.sleep(5)
print("OK")

# Pull primary model
print(f"  Pulling {PRIMARY_MODEL} (this may take 2-8 min)...")
subprocess.run(["ollama", "pull", PRIMARY_MODEL], capture_output=False, text=True)

# Pull fallback model
print(f"  Pulling {FALLBACK_MODEL}...")
subprocess.run(["ollama", "pull", FALLBACK_MODEL], capture_output=False, text=True)

# Verify
print("\n  Available models:")
subprocess.run(["ollama", "list"], capture_output=False, text=True)

print("\n  Phase 2 complete.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 3: Clone Repo + Install Python Packages             ║
# ╚══════════════════════════════════════════════════════════════╝
import subprocess, os, sys

print("=" * 60)
print("PHASE 3: REPO CLONE & PYTHON DEPENDENCIES")
print("=" * 60)

INSTALL_DIR = "/content/ai_final"
FRAMEWORK_DIR = f"{INSTALL_DIR}/agentic-framework-main"

# Clone or update
if os.path.exists(INSTALL_DIR):
    print("  Repo exists — pulling latest...")
    subprocess.run(["git", "-C", INSTALL_DIR, "pull"], capture_output=False, text=True)
else:
    print(f"  Cloning {REPO_URL}...")
    subprocess.run(["git", "clone", REPO_URL, INSTALL_DIR], capture_output=False, text=True)

os.chdir(FRAMEWORK_DIR)

# Create symlinks (hyphenated dirs → underscored for Python imports)
symlinks = {
    "memory_service": "memory-service",
    "subagent_manager": "subagent-manager",
    "mcp_gateway": "mcp-gateway",
    "code_exec": "code-exec",
}
for link_name, target in symlinks.items():
    if not os.path.exists(link_name) and os.path.exists(target):
        os.symlink(target, link_name)
        print(f"  Symlink: {link_name} -> {target}")

# Install Python dependencies
print("\n  Installing Python packages (2-3 min)...")
subprocess.run(
    [sys.executable, "-m", "pip", "install", "-q",
     "-r", f"{FRAMEWORK_DIR}/requirements.txt"],
    capture_output=False, text=True
)

# Extra packages for Colab
subprocess.run(
    [sys.executable, "-m", "pip", "install", "-q",
     "pyngrok", "asyncpg", "aiofiles", "psutil"],
    capture_output=False, text=True
)

# Install OpenClaw
print("  Installing OpenClaw...")
subprocess.run(["npm", "install", "-g", "openclaw@latest"],
               capture_output=True, text=True)

# Add framework to PYTHONPATH
if FRAMEWORK_DIR not in sys.path:
    sys.path.insert(0, FRAMEWORK_DIR)
os.environ["PYTHONPATH"] = FRAMEWORK_DIR

print(f"\n  Framework directory: {FRAMEWORK_DIR}")
print("  Phase 3 complete.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 4: Start Infrastructure + All Services               ║
# ╚══════════════════════════════════════════════════════════════╝
import subprocess, os, sys, time, urllib.request, json

FRAMEWORK_DIR = "/content/ai_final/agentic-framework-main"
os.chdir(FRAMEWORK_DIR)

print("=" * 60)
print("PHASE 4: INFRASTRUCTURE & SERVICES")
print("=" * 60)

# ──────────── Infrastructure ────────────
print("\n── Infrastructure ──")

# PostgreSQL
print("  Starting PostgreSQL...", end=" ", flush=True)
subprocess.run("service postgresql start", shell=True, capture_output=True)
time.sleep(2)
for cmd in [
    "CREATE USER agent_user WITH PASSWORD 'agent_pass' CREATEDB;",
    "CREATE DATABASE agentic_framework OWNER agent_user;",
    "GRANT ALL PRIVILEGES ON DATABASE agentic_framework TO agent_user;",
]:
    subprocess.run(["sudo", "-u", "postgres", "psql", "-c", cmd],
                   capture_output=True, text=True)
pg = subprocess.run(["sudo", "-u", "postgres", "psql", "-c", "SELECT 1;"],
                    capture_output=True, text=True)
print("OK" if pg.returncode == 0 else "FAIL")

# Redis
print("  Starting Redis...", end=" ", flush=True)
subprocess.run("redis-server --daemonize yes --port 6379", shell=True, capture_output=True)
time.sleep(1)
redis_ok = subprocess.run("redis-cli ping", shell=True, capture_output=True, text=True)
print("OK" if "PONG" in redis_ok.stdout else "FAIL")

# ChromaDB
print("  Starting ChromaDB...", end=" ", flush=True)
os.makedirs("/tmp/chroma_data", exist_ok=True)
subprocess.Popen(
    ["chroma", "run", "--host", "0.0.0.0", "--port", "8001", "--path", "/tmp/chroma_data"],
    stdout=open("/tmp/chroma.log", "w"), stderr=subprocess.STDOUT
)
time.sleep(3)
print("OK")

# MinIO
print("  Starting MinIO...", end=" ", flush=True)
os.makedirs("/tmp/minio_data", exist_ok=True)
subprocess.Popen(
    ["/usr/local/bin/minio", "server", "/tmp/minio_data",
     "--address", ":9000", "--console-address", ":9001"],
    stdout=open("/tmp/minio.log", "w"), stderr=subprocess.STDOUT,
    env={**os.environ, "MINIO_ROOT_USER": "minioadmin", "MINIO_ROOT_PASSWORD": "minioadmin"}
)
time.sleep(2)
print("OK")

# ──────────── Environment Variables ────────────
env_vars = {
    "POSTGRES_URL": "postgresql://agent_user:agent_pass@localhost:5432/agentic_framework",
    "REDIS_URL": "redis://localhost:6379/0",
    "MCP_GATEWAY_URL": "http://localhost:8080",
    "MEMORY_SERVICE_URL": "http://localhost:8002",
    "SUBAGENT_MANAGER_URL": "http://localhost:8003",
    "CODE_EXECUTOR_URL": "http://localhost:8004",
    "OLLAMA_ENDPOINT": "http://localhost:11434",
    "OLLAMA_BASE_URL": "http://localhost:11434",
    "LOCAL_MODEL": PRIMARY_MODEL,
    "FALLBACK_MODEL": FALLBACK_MODEL,
    "DEFAULT_LLM_PROVIDER": "ollama",
    "LLM_PROVIDER": "ollama",
    "USE_OPENCLAW": "false",
    "CHROMA_URL": "http://localhost:8001",
    "MINIO_ENDPOINT": "localhost:9000",
    "MINIO_ACCESS_KEY": "minioadmin",
    "MINIO_SECRET_KEY": "minioadmin",
    "JWT_SECRET_KEY": "colab-dev-secret-key-change-in-production",
    "ENVIRONMENT": "development",
    "PYTHONPATH": FRAMEWORK_DIR,
    "WORKSPACE_ROOT": f"{FRAMEWORK_DIR}/workspace",
    "WEBSOCKET_ENABLED": "true",
    "INDEX_CODEBASE": "true",
}
for k, v in env_vars.items():
    os.environ[k] = v

with open(f"{FRAMEWORK_DIR}/.env", "w") as f:
    for k, v in env_vars.items():
        f.write(f"{k}={v}\n")

# Create workspace dirs
for d in ["workspace/.copilot/memory/diary", "workspace/.copilot/memory/reflections", "workspace/ralph-work"]:
    os.makedirs(f"{FRAMEWORK_DIR}/{d}", exist_ok=True)

print("  Environment configured.")

# ──────────── Start Microservices ────────────
print("\n── Microservices ──")

service_env = {**os.environ}

services = [
    {
        "name": "MCP Gateway",
        "module": "mcp_gateway.service.main:app",
        "port": 8080,
        "log": "/tmp/mcp_gateway.log",
        "env": {"REDIS_URL": "redis://localhost:6379/3"},
    },
    {
        "name": "Memory Service",
        "module": "memory_service.service.main:app",
        "port": 8002,
        "log": "/tmp/memory_service.log",
        "env": {"REDIS_URL": "redis://localhost:6379/2"},
    },
    {
        "name": "SubAgent Manager",
        "module": "subagent_manager.service.main:app",
        "port": 8003,
        "log": "/tmp/subagent_manager.log",
        "env": {
            "REDIS_URL": "redis://localhost:6379/1",
            "SUBAGENT_USE_OPENCLAW": "false",
            "SUBAGENT_LLM_PROVIDER": "ollama",
            "SUBAGENT_LLM_MODEL": PRIMARY_MODEL,
        },
    },
    {
        "name": "Code Executor",
        "module": "code_exec.service.main:app",
        "port": 8004,
        "log": "/tmp/code_exec.log",
        "env": {"REDIS_URL": "redis://localhost:6379/4"},
    },
    {
        "name": "Orchestrator",
        "module": "orchestrator.service.main:app",
        "port": 8000,
        "log": "/tmp/orchestrator.log",
        "env": {},
    },
]

started = {}
for svc in services:
    print(f"  Starting {svc['name']} (:{svc['port']})...", end=" ", flush=True)
    svc_env = {**service_env, **svc["env"]}
    proc = subprocess.Popen(
        [sys.executable, "-m", "uvicorn", svc["module"],
         "--host", "0.0.0.0", "--port", str(svc["port"])],
        cwd=FRAMEWORK_DIR,
        stdout=open(svc["log"], "w"),
        stderr=subprocess.STDOUT,
        env=svc_env
    )
    started[svc["name"]] = proc.pid
    time.sleep(3)
    print(f"OK (PID {proc.pid})")

# ──────────── Dashboard ────────────
if START_DASHBOARD:
    print("\n── Dashboard ──")
    dashboard_dir = f"{FRAMEWORK_DIR}/dashboard"
    if os.path.exists(f"{dashboard_dir}/build"):
        # Serve the pre-built dashboard
        print("  Serving pre-built dashboard (port 3000)...", end=" ", flush=True)
        subprocess.Popen(
            ["npx", "serve", "-s", "build", "-l", "3000"],
            cwd=dashboard_dir,
            stdout=open("/tmp/dashboard.log", "w"),
            stderr=subprocess.STDOUT,
            env={**os.environ, "PORT": "3000"}
        )
        time.sleep(3)
        print("OK")
    elif os.path.exists(f"{dashboard_dir}/package.json"):
        print("  Installing dashboard deps & starting (port 3000)...", end=" ", flush=True)
        subprocess.run(["npm", "install"], cwd=dashboard_dir, capture_output=True)
        subprocess.Popen(
            ["npm", "start"],
            cwd=dashboard_dir,
            stdout=open("/tmp/dashboard.log", "w"),
            stderr=subprocess.STDOUT,
            env={**os.environ, "PORT": "3000", "BROWSER": "none"}
        )
        time.sleep(5)
        print("OK")
    else:
        print("  Dashboard not found — skipping.")

# ──────────── Wait & Health Check ────────────
print("\n  Waiting 15s for services to initialize...")
time.sleep(15)

print("\n── Health Checks ──")
endpoints = [
    ("Orchestrator",    "http://localhost:8000/health"),
    ("Memory Service",  "http://localhost:8002/health"),
    ("SubAgent Manager","http://localhost:8003/health"),
    ("MCP Gateway",     "http://localhost:8080/health"),
    ("Code Executor",   "http://localhost:8004/health"),
    ("Ollama",          "http://localhost:11434/api/tags"),
]

all_ok = True
for name, url in endpoints:
    try:
        req = urllib.request.urlopen(url, timeout=5)
        print(f"  {name:20s} : OK ({req.getcode()})")
    except Exception as e:
        all_ok = False
        print(f"  {name:20s} : STARTING ({str(e)[:50]})")

if all_ok:
    print("\n  ALL SERVICES HEALTHY")
else:
    print("\n  Some services still starting. They should be ready in ~30s.")

print("\n  Phase 4 complete.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 5: External Access (ngrok Tunnels)                   ║
# ╚══════════════════════════════════════════════════════════════╝
import os

print("=" * 60)
print("PHASE 5: EXTERNAL ACCESS")
print("=" * 60)

api_url = "http://localhost:8000"  # default fallback
dashboard_url = "http://localhost:3000"

if ENABLE_NGROK:
    from pyngrok import ngrok, conf

    if NGROK_AUTH_TOKEN:
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)
        print("  ngrok auth token set (stable URLs enabled)")

    # API tunnel
    print("  Creating tunnel for Orchestrator API (port 8000)...")
    api_tunnel = ngrok.connect(8000, "http")
    api_url = api_tunnel.public_url

    # Dashboard tunnel (if running)
    if START_DASHBOARD:
        print("  Creating tunnel for Dashboard (port 3000)...")
        dash_tunnel = ngrok.connect(3000, "http")
        dashboard_url = dash_tunnel.public_url

    os.environ["COLAB_API_URL"] = api_url
    os.environ["COLAB_DASHBOARD_URL"] = dashboard_url

    print("")
    print("╔══════════════════════════════════════════════════════════╗")
    print("║  PUBLIC ACCESS URLS (share these!)                      ║")
    print("╠══════════════════════════════════════════════════════════╣")
    print(f"║  API:        {api_url:<43s}║")
    print(f"║  API Docs:   {api_url + '/docs':<43s}║")
    print(f"║  Health:     {api_url + '/health':<43s}║")
    print(f"║  WebSocket:  {api_url.replace('http', 'ws') + '/ws':<43s}║")
    if START_DASHBOARD:
        print(f"║  Dashboard:  {dashboard_url:<43s}║")
    print("╚══════════════════════════════════════════════════════════╝")
else:
    print("  ngrok disabled. Services available at localhost only:")

print("")
print("  Local endpoints (inside Colab):")
print("    Orchestrator:    http://localhost:8000")
print("    Memory Service:  http://localhost:8002")
print("    SubAgent Mgr:    http://localhost:8003")
print("    MCP Gateway:     http://localhost:8080")
print("    Code Executor:   http://localhost:8004")
print("    Ollama LLM:      http://localhost:11434")
if START_DASHBOARD:
    print("    Dashboard:       http://localhost:3000")

print("\n  Phase 5 complete.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 6: Quick Smoke Test                                  ║
# ╚══════════════════════════════════════════════════════════════╝
import json, urllib.request, time

print("=" * 60)
print("PHASE 6: SMOKE TEST")
print("=" * 60)

passed = 0
total = 0

def check(name, url):
    global passed, total
    total += 1
    try:
        r = urllib.request.urlopen(url, timeout=10)
        passed += 1
        print(f"  [PASS] {name}")
    except Exception as e:
        print(f"  [FAIL] {name} — {str(e)[:60]}")

check("Orchestrator API",   "http://localhost:8000/health")
check("Memory Service",     "http://localhost:8002/health")
check("SubAgent Manager",   "http://localhost:8003/health")
check("MCP Gateway",        "http://localhost:8080/health")
check("Code Executor",      "http://localhost:8004/health")
check("Ollama LLM",         "http://localhost:11434/api/tags")

# Test LLM inference
total += 1
print("\n  Testing LLM inference (GPU)...", end=" ", flush=True)
try:
    t0 = time.time()
    data = json.dumps({
        "model": PRIMARY_MODEL,
        "prompt": "What is 2+2? Answer in one word.",
        "stream": False
    }).encode()
    req = urllib.request.Request(
        "http://localhost:11434/api/generate",
        data=data,
        headers={"Content-Type": "application/json"}
    )
    resp = urllib.request.urlopen(req, timeout=120)
    result = json.loads(resp.read().decode())
    elapsed = time.time() - t0
    passed += 1
    print(f"OK ({elapsed:.1f}s)")
    print(f"    Response: {result.get('response', '???')[:100]}")
except Exception as e:
    print(f"FAIL — {str(e)[:80]}")

print(f"\n  Results: {passed}/{total} passed")
if passed == total:
    print("  ALL SYSTEMS GO!")
else:
    print("  Some services may still be initializing. Wait 30s and re-run this cell.")
print("=" * 60)

In [None]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  PHASE 7: Keep-Alive (prevents Colab from disconnecting)    ║
# ╚══════════════════════════════════════════════════════════════╝
#
# This cell runs a background loop that:
#  1. Pings all services every 60 seconds
#  2. Auto-restarts any crashed service
#  3. Prints a status update every 5 minutes
#  4. Keeps the Colab runtime alive
#
# Stop it with: Runtime > Interrupt execution (or Ctrl+M I)
#
import subprocess, os, sys, time, urllib.request, json, signal
from datetime import datetime

FRAMEWORK_DIR = "/content/ai_final/agentic-framework-main"

service_defs = [
    {"name": "MCP Gateway",      "module": "mcp_gateway.service.main:app",      "port": 8080, "log": "/tmp/mcp_gateway.log",      "env": {"REDIS_URL": "redis://localhost:6379/3"}},
    {"name": "Memory Service",   "module": "memory_service.service.main:app",   "port": 8002, "log": "/tmp/memory_service.log",   "env": {"REDIS_URL": "redis://localhost:6379/2"}},
    {"name": "SubAgent Manager", "module": "subagent_manager.service.main:app", "port": 8003, "log": "/tmp/subagent_manager.log", "env": {"REDIS_URL": "redis://localhost:6379/1"}},
    {"name": "Code Executor",    "module": "code_exec.service.main:app",        "port": 8004, "log": "/tmp/code_exec.log",        "env": {"REDIS_URL": "redis://localhost:6379/4"}},
    {"name": "Orchestrator",     "module": "orchestrator.service.main:app",     "port": 8000, "log": "/tmp/orchestrator.log",     "env": {}},
]

def is_service_alive(port):
    try:
        url = f"http://localhost:{port}/health" if port != 11434 else f"http://localhost:{port}/api/tags"
        urllib.request.urlopen(url, timeout=5)
        return True
    except:
        return False

def restart_service(svc):
    """Restart a crashed service."""
    print(f"    Restarting {svc['name']} on port {svc['port']}...", end=" ", flush=True)
    svc_env = {**os.environ, **svc["env"]}
    proc = subprocess.Popen(
        [sys.executable, "-m", "uvicorn", svc["module"],
         "--host", "0.0.0.0", "--port", str(svc["port"])],
        cwd=FRAMEWORK_DIR,
        stdout=open(svc["log"], "a"),
        stderr=subprocess.STDOUT,
        env=svc_env
    )
    time.sleep(5)
    print(f"PID {proc.pid}")

print("=" * 60)
print("KEEP-ALIVE WATCHDOG STARTED")
print("  Monitoring services every 60s with auto-restart.")
print("  Status updates every 5 minutes.")
print("  Stop with: Runtime > Interrupt execution")
print("=" * 60)

cycle = 0
try:
    while True:
        cycle += 1
        restarts = 0

        # Check & auto-restart services
        for svc in service_defs:
            if not is_service_alive(svc["port"]):
                restart_service(svc)
                restarts += 1

        # Check Ollama
        if not is_service_alive(11434):
            print("    Restarting Ollama...", end=" ", flush=True)
            subprocess.Popen(
                ["ollama", "serve"],
                stdout=open("/tmp/ollama.log", "a"),
                stderr=subprocess.STDOUT,
                env={**os.environ, "OLLAMA_HOST": "0.0.0.0:11434"}
            )
            time.sleep(5)
            print("OK")
            restarts += 1

        # Status update every 5 minutes (every 5th cycle)
        if cycle % 5 == 0:
            now = datetime.now().strftime("%H:%M:%S")
            alive = sum(1 for s in service_defs if is_service_alive(s["port"]))
            ollama_ok = is_service_alive(11434)
            print(f"  [{now}] Services: {alive}/{len(service_defs)} | Ollama: {'OK' if ollama_ok else 'DOWN'} | Restarts this cycle: {restarts}")

        time.sleep(60)

except KeyboardInterrupt:
    print("\n  Watchdog stopped by user.")

---
## Utility Cells (run manually as needed)

The cells below are optional — run them when you want to interact with the system.

In [None]:
# ── Send a task to the Orchestrator ──
import json, urllib.request

task = "Write a Python function that calculates the Fibonacci sequence up to n terms, with proper error handling and type hints."

print(f"Task: {task}\n")
data = json.dumps({"message": task, "session_id": "colab-auto-001"}).encode()
req = urllib.request.Request(
    "http://localhost:8000/chat",
    data=data,
    headers={"Content-Type": "application/json"}
)
try:
    resp = urllib.request.urlopen(req, timeout=300)
    result = json.loads(resp.read().decode())
    print(json.dumps(result, indent=2)[:3000])
except Exception as e:
    print(f"Error: {e}")
    print("Tip: !tail -100 /tmp/orchestrator.log")

In [None]:
# ── View service logs ──
# Change SERVICE to: orchestrator, memory_service, subagent_manager,
#                     mcp_gateway, code_exec, ollama, chroma, minio, dashboard
SERVICE = "orchestrator"
LINES = 50

import subprocess
print(f"Last {LINES} lines of {SERVICE}:")
print("=" * 60)
subprocess.run(["tail", f"-{LINES}", f"/tmp/{SERVICE}.log"], capture_output=False)

In [None]:
# ── System resource monitor ──
import subprocess, psutil, shutil

print("GPU:")
subprocess.run("nvidia-smi", shell=True)

mem = psutil.virtual_memory()
print(f"\nRAM: {mem.used/1024**3:.1f}/{mem.total/1024**3:.1f} GB ({mem.percent}%)")

disk = shutil.disk_usage("/")
print(f"Disk: {(disk.total-disk.free)/1024**3:.1f}/{disk.total/1024**3:.1f} GB")

print("\nRunning services:")
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
    try:
        cmd = " ".join(proc.info.get('cmdline', []))
        if 'uvicorn' in cmd or 'ollama' in proc.info.get('name', '').lower():
            print(f"  PID {proc.info['pid']}: {cmd[:80]}")
    except:
        pass

In [None]:
# ── Restart all services ──
import psutil, time

print("Stopping all services...")
for proc in psutil.process_iter(['pid', 'cmdline']):
    try:
        cmd = " ".join(proc.info.get('cmdline', []))
        if 'uvicorn' in cmd and 'service.main' in cmd:
            proc.kill()
            print(f"  Killed PID {proc.info['pid']}")
    except:
        pass

time.sleep(3)
print("Done. Re-run Phase 4 cell to restart services.")