<a href="https://colab.research.google.com/github/leonlazdev-wq/Gizmo-my-ai-for-google-colab/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
#!/usr/bin/env python3
# ================================================================
# MY-AI-Gizmo ‚Ä¢ UNIVERSAL LAUNCHER v3.6.0 (Colab + Win11 localhost)
# ================================================================

import os
import sys
import re
import time
import json
import shutil
import subprocess
import threading
from pathlib import Path
from datetime import datetime

# -------- optional colab import --------
try:
    from google.colab import drive as colab_drive  # type: ignore
    IN_COLAB = True
except Exception:
    colab_drive = None
    IN_COLAB = False

# -------- repo config --------
GITHUB_USER   = "leonlazdev-wq"
GITHUB_REPO   = "Gizmo-my-ai-for-google-colab"
GITHUB_BRANCH = "main"

GITHUB_TOKEN = ""
REPO_ZIP = ""
REPO_CLONE_URL = ""

# -------- runtime globals --------
USE_WINDOWS_LOCALHOST = False
WORK_DIR = None
DRIVE_ROOT = None
LOG_DIR = None
MPL_CONFIG_DIR = None
PUBLIC_URL_FILE = None

HEARTBEAT_INTERVAL = 30
MAX_RESTARTS = 3

USE_GPU = True
GPU_LAYERS = -1
N_CTX = 4096
USE_MODEL = False
MODEL_REPO = ""
MODEL_FILE = ""

EXTENSIONS = "gizmo_toolbar,dual_model,google_workspace,learning_center,student_utils,model_hub"

MODEL_MENU = [
    ("1 TinyLlama-1.1B", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", 0.7),
    ("2 Phi-3-mini-4k", "bartowski/Phi-3-mini-4k-instruct-GGUF", "Phi-3-mini-4k-instruct-Q4_K_M.gguf", 2.2),
    ("3 Mistral-7B-v0.3", "bartowski/Mistral-7B-v0.3-GGUF", "Mistral-7B-v0.3-Q4_K_M.gguf", 4.4),
    ("4 Qwen2.5-Coder-7B", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", "qwen2.5-coder-7b-instruct-q4_k_m.gguf", 4.7),
    ("5 Qwen2.5-Coder-14B", "Qwen/Qwen2.5-Coder-14B-Instruct-GGUF", "qwen2.5-coder-14b-instruct-q4_k_m.gguf", 8.9),
    ("6 Custom", "", "", 0),
]

URL_PATTERNS = [
    re.compile(r"Running on public URL:\s*(https?://\S+)", re.IGNORECASE),
    re.compile(r"(https?://[a-zA-Z0-9\-]+\.gradio\.live\S*)", re.IGNORECASE),
    re.compile(r"(https?://[a-zA-Z0-9\-]+\.trycloudflare\.com\S*)", re.IGNORECASE),
    re.compile(r"(https?://[a-zA-Z0-9\-]+\.ngrok\S*)", re.IGNORECASE),
]
URL_KEYWORDS = ("gradio.live", "trycloudflare.com", "ngrok", "loca.lt")

# ------------------------------------------------------------
# helpers
# ------------------------------------------------------------
def sh(cmd, cwd=None, env=None):
    return subprocess.run(cmd, shell=True, cwd=cwd, env=env, capture_output=True, text=True)

def run(cmd, cwd=None, env=None):
    return subprocess.run(cmd, cwd=cwd, env=env, text=True, capture_output=True)

def get_free_ram_gb():
    try:
        with open("/proc/meminfo", "r", encoding="utf-8") as f:
            for line in f:
                if line.startswith("MemAvailable"):
                    return int(line.split()[1]) / 1024 / 1024
    except Exception:
        pass
    return 0.0

def get_total_ram_gb():
    try:
        with open("/proc/meminfo", "r", encoding="utf-8") as f:
            for line in f:
                if line.startswith("MemTotal"):
                    return int(line.split()[1]) / 1024 / 1024
    except Exception:
        pass
    return 0.0

def auto_thread_count():
    try:
        import multiprocessing
        return max(1, min(multiprocessing.cpu_count() - 1, 4))
    except Exception:
        return 2

def auto_ctx_size(model_gb):
    free = get_free_ram_gb() - model_gb - 0.5
    if free >= 2.0:
        return 4096
    if free >= 1.0:
        return 2048
    if free >= 0.5:
        return 1024
    return 512

def print_ram_status():
    free = get_free_ram_gb()
    total = get_total_ram_gb()
    used = total - free
    pct = (used / total) if total else 0
    bar = "‚ñà" * int(pct * 20) + "‚ñë" * (20 - int(pct * 20))
    print(f"RAM [{bar}] {used:.1f}/{total:.1f} GB ({free:.1f} GB free)")

def token_file_path():
    if Path("/content/drive/MyDrive").exists():
        return Path("/content/drive/MyDrive/MY-AI-Gizmo/github_token.txt")
    return Path("/content/MY-AI-Gizmo/github_token.txt")

def load_saved_token():
    for p in (
        Path("/content/drive/MyDrive/MY-AI-Gizmo/github_token.txt"),
        Path("/content/MY-AI-Gizmo/github_token.txt"),
    ):
        if p.exists():
            try:
                t = p.read_text(encoding="utf-8").strip()
                if len(t) >= 10:
                    return t
            except Exception:
                pass
    return ""

def save_token(token):
    p = token_file_path()
    try:
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(token, encoding="utf-8")
    except Exception as e:
        print(f"[warn] could not save token: {e}")

def build_urls():
    global REPO_ZIP, REPO_CLONE_URL
    REPO_ZIP = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{GITHUB_REPO}/archive/refs/heads/{GITHUB_BRANCH}.zip"
    REPO_CLONE_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{GITHUB_REPO}.git"

def kill_old_servers():
    sh("pkill -9 -f 'python.*server.py'")
    sh("pkill -9 -f 'python.*gradio'")
    sh("pkill -9 -f '_gizmo_launch'")
    time.sleep(2)

def prompt_yes_no(text, default="y"):
    raw = input(f"{text} ({'Y/n' if default=='y' else 'y/N'}): ").strip().lower()
    if not raw:
        return default == "y"
    return raw in ("y", "yes")

def choose_runtime_mode():
    global USE_WINDOWS_LOCALHOST
    print("=" * 70)
    print("Runtime mode")
    print("1) Normal cloud Colab (/content paths)")
    print("2) Windows 11 localhost runtime (Colab Local Runtime)")
    print("=" * 70)
    c = input("Choose 1 or 2: ").strip()
    USE_WINDOWS_LOCALHOST = (c == "2")
    print(f"[info] Windows localhost mode: {USE_WINDOWS_LOCALHOST}")

def setup_paths():
    global WORK_DIR, DRIVE_ROOT, LOG_DIR, MPL_CONFIG_DIR, PUBLIC_URL_FILE
    if USE_WINDOWS_LOCALHOST:
        default_work = Path.cwd() / "text-generation-webui"
        inp = input(f"Repo path [{default_work}]: ").strip()
        WORK_DIR = Path(inp) if inp else default_work

        default_root = Path.cwd() / "MY-AI-Gizmo-data"
        inp2 = input(f"Data path [{default_root}]: ").strip()
        DRIVE_ROOT = Path(inp2) if inp2 else default_root
    else:
        WORK_DIR = Path("/content/text-generation-webui")
        drive_ok = False
        if IN_COLAB:
            try:
                if not Path("/content/drive/MyDrive").exists():
                    colab_drive.mount("/content/drive", force_remount=False)
                drive_ok = Path("/content/drive/MyDrive").exists()
            except Exception as e:
                print(f"[warn] drive mount failed: {e}")
        DRIVE_ROOT = Path("/content/drive/MyDrive/MY-AI-Gizmo") if drive_ok else Path("/content/MY-AI-Gizmo")

    LOG_DIR = DRIVE_ROOT / "logs"
    MPL_CONFIG_DIR = DRIVE_ROOT / "matplotlib"
    PUBLIC_URL_FILE = DRIVE_ROOT / "public_url.txt"
    for p in (DRIVE_ROOT, LOG_DIR, MPL_CONFIG_DIR, DRIVE_ROOT / "models", DRIVE_ROOT / "settings", DRIVE_ROOT / "characters"):
        p.mkdir(parents=True, exist_ok=True)

def setup_token():
    global GITHUB_TOKEN
    saved = load_saved_token()
    if saved and prompt_yes_no(f"Use saved token (...{saved[-3:]})?", "y"):
        GITHUB_TOKEN = saved
        build_urls()
        return
    while True:
        t = input("Paste GitHub token: ").strip()
        if t:
            GITHUB_TOKEN = t
            break
        print("Token required.")
    save_token(GITHUB_TOKEN)
    build_urls()

def check_repo_update():
    if not WORK_DIR.exists():
        return "new"
    return "fresh" if prompt_yes_no("Did you update the repo on GitHub and want fresh re-clone?", "n") else "keep"

def apply_repo_update(mode):
    if mode == "fresh" and WORK_DIR.exists():
        kill_old_servers()
        shutil.rmtree(WORK_DIR, ignore_errors=True)

def clone_repo():
    print("[info] cloning repo...")
    r = sh(f"git clone --depth=1 {REPO_CLONE_URL} {WORK_DIR}")
    if r.returncode == 0 and WORK_DIR.exists():
        return True
    print("[warn] clone failed, trying zip fallback...")
    tmp = Path("/tmp/repo.zip") if os.name != "nt" else Path("repo.zip")
    try:
        if tmp.exists():
            tmp.unlink()
    except Exception:
        pass

    for cmd in (f"wget -q -O {tmp} '{REPO_ZIP}'", f"curl -s -L -o {tmp} '{REPO_ZIP}'"):
        rr = sh(cmd)
        if rr.returncode == 0 and tmp.exists() and tmp.stat().st_size > 1000:
            break
    else:
        return False

    if os.name == "nt":
        sh(f'powershell -NoProfile -Command "Expand-Archive -Path \\"{tmp}\\" -DestinationPath \\".\\" -Force"')
        found = next(Path(".").glob(f"{GITHUB_REPO}-*"), None)
    else:
        sh(f"unzip -q {tmp} -d /content")
        found = next(Path("/content").glob(f"{GITHUB_REPO}-*"), None)

    if not found:
        return False

    if WORK_DIR.exists():
        shutil.rmtree(WORK_DIR, ignore_errors=True)
    found.rename(WORK_DIR)
    return True

def choose_mode():
    global USE_GPU, GPU_LAYERS, N_CTX
    print("Mode: [1] GPU [2] CPU")
    c = input("Choose 1/2: ").strip()
    if c == "2":
        USE_GPU = False
        GPU_LAYERS = 0
        N_CTX = 4096
    else:
        USE_GPU = True
        GPU_LAYERS = -1
        N_CTX = 4096

def list_local_models():
    d = DRIVE_ROOT / "models"
    found = []
    for ext in ("*.gguf", "*.safetensors", "*.bin"):
        found.extend(d.rglob(ext))
    return sorted(found)

def choose_model():
    global USE_MODEL, MODEL_REPO, MODEL_FILE, N_CTX
    local = list_local_models()
    print("Model selector:")
    for i, m in enumerate(local, 1):
        print(f"  [L{i}] {m.name}")
    for m in MODEL_MENU:
        print(" ", m[0])
    print("  [0] start without model")

    while True:
        c = input("Choice: ").strip()
        if c == "0":
            USE_MODEL = False
            MODEL_REPO = ""
            MODEL_FILE = ""
            return
        if c.upper().startswith("L") and local:
            try:
                idx = int(c[1:]) - 1
                sel = local[idx]
                USE_MODEL = True
                MODEL_REPO = ""
                MODEL_FILE = sel.name
                N_CTX = auto_ctx_size(sel.stat().st_size / (1024**3))
                return
            except Exception:
                print("invalid local model")
                continue
        try:
            idx = int(c) - 1
            item = MODEL_MENU[idx]
            if item[1]:
                USE_MODEL = True
                MODEL_REPO, MODEL_FILE = item[1], item[2]
                N_CTX = auto_ctx_size(item[3])
                return
            MODEL_REPO = input("HF repo: ").strip()
            MODEL_FILE = input("Filename: ").strip()
            USE_MODEL = True
            N_CTX = 2048
            return
        except Exception:
            print("invalid choice")

def download_model_if_missing():
    if not USE_MODEL:
        print("[info] no model selected")
        return True
    models_dir = DRIVE_ROOT / "models"
    models_dir.mkdir(parents=True, exist_ok=True)
    p = models_dir / MODEL_FILE
    if p.exists() and p.stat().st_size > 100 * 1024 * 1024:
        return True
    if not MODEL_REPO:
        return False
    url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILE}?download=true"
    for cmd in (f'wget -q --show-progress -O "{p}" "{url}"', f'curl -L --progress-bar -o "{p}" "{url}"'):
        r = subprocess.run(cmd, shell=True)
        if r.returncode == 0 and p.exists() and p.stat().st_size > 100 * 1024 * 1024:
            return True
        try:
            p.unlink()
        except Exception:
            pass
    return False

# -------- robust link/copy mapping (fix WinError 1314) --------
def _safe_remove(path: Path):
    try:
        if path.is_symlink() or path.is_file():
            path.unlink()
        elif path.is_dir():
            shutil.rmtree(path, ignore_errors=True)
    except Exception:
        pass

def _copy_fallback(src: Path, dst: Path, is_file: bool):
    dst.parent.mkdir(parents=True, exist_ok=True)
    if is_file:
        if src.exists():
            shutil.copy2(src, dst)
        else:
            dst.write_text("", encoding="utf-8")
    else:
        if dst.exists():
            shutil.rmtree(dst, ignore_errors=True)
        shutil.copytree(src, dst, dirs_exist_ok=True)

def _link_or_copy(src: Path, dst: Path, is_file: bool):
    _safe_remove(dst)
    dst.parent.mkdir(parents=True, exist_ok=True)

    # try symlink first
    try:
        os.symlink(str(src), str(dst), target_is_directory=not is_file)
        return "symlink"
    except Exception as e:
        err = str(e)

    # windows junction fallback for dirs
    if os.name == "nt" and not is_file:
        try:
            subprocess.run(["cmd", "/c", "mklink", "/J", str(dst), str(src)], check=True, capture_output=True)
            return "junction"
        except Exception:
            pass

    # fallback copy
    _copy_fallback(src, dst, is_file)
    return "copy"

def ensure_mappings():
    links_map = [
        ("user_data/models", "models", False),
        ("models", "models", False),
        ("user_data/loras", "loras", False),
        ("user_data/characters", "characters", False),
        ("user_data/presets", "presets", False),
        ("user_data/settings.yaml", "settings/settings.yaml", True),
        ("user_data/settings.json", "settings/settings.json", True),
        ("user_data/chat", "chat-history", False),
        ("outputs", "outputs", False),
    ]

    for local_rel, data_rel, is_file in links_map:
        src = DRIVE_ROOT / data_rel
        dst = WORK_DIR / local_rel
        if is_file:
            src.parent.mkdir(parents=True, exist_ok=True)
            if not src.exists():
                src.write_text("", encoding="utf-8")
        else:
            src.mkdir(parents=True, exist_ok=True)

        mode = _link_or_copy(src, dst, is_file)
        print(f"[map] {local_rel} <= {data_rel} ({mode})")

def write_settings():
    threads = auto_thread_count()
    model_line = f"model: {MODEL_FILE}" if (USE_MODEL and MODEL_FILE) else "model: None"
    content = f"""listen: true
share: true
auto_launch: false
loader: llama.cpp
n_ctx: {N_CTX}
n_batch: 512
n_gpu_layers: {GPU_LAYERS}
threads: {threads}
character: Debug
{model_line}
chat_style: cai-chat
api: true
api_port: 5000
"""
    for p in (WORK_DIR / "user_data" / "settings.yaml", DRIVE_ROOT / "settings" / "settings.yaml"):
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content, encoding="utf-8")

def write_cmd_flags():
    flags = [
        "--listen", "--share", "--verbose",
        "--api", "--api-port", "5000",
        "--loader", "llama.cpp",
        "--gpu-layers", str(GPU_LAYERS),
        "--ctx-size", str(N_CTX),
        "--batch-size", "512",
        "--threads", str(auto_thread_count()),
        "--extensions", EXTENSIONS,
    ]
    if USE_MODEL and MODEL_FILE:
        flags += ["--model", MODEL_FILE]
    content = " ".join(flags)
    for p in (WORK_DIR / "user_data" / "CMD_FLAGS.txt", DRIVE_ROOT / "settings" / "CMD_FLAGS.txt"):
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content, encoding="utf-8")

def ensure_extension_dirs():
    for ext in ("gizmo_toolbar", "dual_model", "google_workspace", "learning_center", "student_utils", "model_hub"):
        d = WORK_DIR / "extensions" / ext
        d.mkdir(parents=True, exist_ok=True)

def build_launch_wrapper(python_exe):
    threads = auto_thread_count()
    mode_label = "GPU" if USE_GPU else "CPU"
    model_desc = MODEL_FILE if USE_MODEL else "NO MODEL"
    cuda_block = "os.environ['CUDA_VISIBLE_DEVICES'] = ''" if not USE_GPU else ""
    model_flag = f"'--model', '{MODEL_FILE}'," if (USE_MODEL and MODEL_FILE) else ""

    code = f"""#!/usr/bin/env python3
import sys, os
{cuda_block}
os.environ['MPLBACKEND'] = 'Agg'
os.environ['MPLCONFIGDIR'] = r'{MPL_CONFIG_DIR}'
os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0'
os.environ['GRADIO_SHARE'] = '1'

flags = [
    '--listen', '--share', '--verbose',
    '--api', '--api-port', '5000',
    '--loader', 'llama.cpp',
    '--gpu-layers', '{GPU_LAYERS}',
    '--ctx-size', '{N_CTX}',
    '--batch-size', '512',
    '--threads', '{threads}',
    {model_flag}
    '--extensions', '{EXTENSIONS}',
]
flags = [f for f in flags if f]
for f in flags:
    if f not in sys.argv:
        sys.argv.append(f)

print('[WRAPPER v3.6.0] Mode: {mode_label} | Model: {model_desc}')
print('[WRAPPER] Extensions: {EXTENSIONS}')

# Gradio compatibility shim (for 4.37.x)
try:
    import gradio as gr
    if not hasattr(gr, 'Timer'):
        class _GizmoTimerShim:
            def __init__(self, *args, **kwargs): pass
            def tick(self, *args, **kwargs): return None
        gr.Timer = _GizmoTimerShim
        print('[WRAPPER] Applied gr.Timer shim')
except Exception as e:
    print(f'[WRAPPER] Timer shim warning: {{e}}')

try:
    import matplotlib
    matplotlib.use('Agg', force=True)
except Exception:
    pass

import traceback, runpy
try:
    runpy.run_path('server.py', run_name='__main__')
except SystemExit:
    pass
except Exception:
    print('\\n[ERROR] server.py raised an exception:')
    traceback.print_exc()
    raise
"""
    wrapper = WORK_DIR / "_gizmo_launch.py"
    wrapper.write_text(code, encoding="utf-8")
    return str(wrapper)

def launch(python_exe, wrapper_path):
    cmd = [python_exe, "-u", wrapper_path]
    env = os.environ.copy()
    env.update({
        "MPLBACKEND": "Agg",
        "MPLCONFIGDIR": str(MPL_CONFIG_DIR),
        "GRADIO_SERVER_NAME": "0.0.0.0",
        "GRADIO_SHARE": "1",
    })
    if not USE_GPU:
        env["CUDA_VISIBLE_DEVICES"] = ""

    captured = None

    for attempt in range(1, MAX_RESTARTS + 1):
        print(f"\n{'='*70}\nüöÄ Starting server (attempt {attempt}/{MAX_RESTARTS})\n{'='*70}\n")
        if attempt > 1:
            time.sleep(5)

        log_path = LOG_DIR / f"server_{int(time.time())}.log"
        logfile = open(log_path, "a", encoding="utf-8")
        os.chdir(WORK_DIR)

        proc = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            env=env,
            text=True,
            bufsize=1
        )

        stop_hb = threading.Event()
        last_out = [time.time()]

        def hb():
            while not stop_hb.wait(HEARTBEAT_INTERVAL):
                if time.time() - last_out[0] >= HEARTBEAT_INTERVAL:
                    print("[heartbeat] still running...")

        t = threading.Thread(target=hb, daemon=True)
        t.start()

        try:
            for line in proc.stdout:
                last_out[0] = time.time()
                print(line, end="", flush=True)
                logfile.write(line)

                if not captured:
                    for pat in URL_PATTERNS:
                        m = pat.search(line)
                        if m:
                            url = m.group(1).rstrip(").,\\'\"")
                            if any(k in url.lower() for k in URL_KEYWORDS):
                                captured = url
                                print(f"\nüåê PUBLIC URL: {captured}\n")
                                try:
                                    PUBLIC_URL_FILE.write_text(captured)
                                except Exception:
                                    pass
                                break
        finally:
            stop_hb.set()
            t.join(timeout=1)
            logfile.close()

        rc = proc.wait()
        print(f"[info] server exit code: {rc}")
        if rc in (0, -9):
            break
        if attempt < MAX_RESTARTS:
            print("[warn] restarting...")
        else:
            print("[warn] max restarts reached")

    return captured

def install_env_if_needed():
    start_sh = WORK_DIR / "start_linux.sh"
    if not start_sh.exists():
        raise SystemExit("‚ùå start_linux.sh not found")

    env_marker = WORK_DIR / "installer_files" / "env" / "bin" / "python"
    py = str(env_marker) if env_marker.exists() else "python3"

    sh("chmod +x start_linux.sh", cwd=str(WORK_DIR))

    if not env_marker.exists():
        print("[info] first run: installing env...")
        install_env = os.environ.copy()
        install_env.update({
            "MPLBACKEND": "Agg",
            "MPLCONFIGDIR": str(MPL_CONFIG_DIR),
            "GPU_CHOICE": "A" if USE_GPU else "N",
            "LAUNCH_AFTER_INSTALL": "FALSE",
            "INSTALL_EXTENSIONS": "FALSE",
            "SKIP_TORCH_TEST": "TRUE",
        })
        if not USE_GPU:
            install_env["CUDA_VISIBLE_DEVICES"] = ""

        subprocess.run("bash start_linux.sh", shell=True, cwd=str(WORK_DIR), env=install_env)
        py = str(env_marker) if env_marker.exists() else "python3"

    return py

def print_windows_localhost_instructions():
    if not USE_WINDOWS_LOCALHOST:
        return
    print("\n[Windows localhost mode detected]")
    print("Make sure on Windows host you already ran:")
    print("  py -3 -m jupyter serverextension enable --py jupyter_http_over_ws")
    print('  py -3 -m notebook --NotebookApp.allow_origin="https://colab.research.google.com" --port=8888 --NotebookApp.port_retries=0 --no-browser')
    print("Then connect Colab to that local runtime before running launcher.\n")

# ---------------- main ----------------
if __name__ == "__main__":
    print("="*70)
    print("MY-AI-Gizmo Launcher v3.6.0")
    print("="*70)

    choose_runtime_mode()
    print_windows_localhost_instructions()

    setup_paths()
    setup_token()

    mode = check_repo_update()
    apply_repo_update(mode)

    if not WORK_DIR.exists():
        if not clone_repo():
            raise SystemExit("‚ùå clone failed")

    choose_mode()
    choose_model()
    print_ram_status()

    if not download_model_if_missing():
        raise SystemExit("‚ùå model download failed")

    ensure_mappings()
    write_settings()
    write_cmd_flags()
    ensure_extension_dirs()

    pyexe = install_env_if_needed()
    kill_old_servers()

    wrapper = build_launch_wrapper(pyexe)

    print("\n" + "="*70)
    print(f"LAUNCHING ({'GPU' if USE_GPU else 'CPU'})")
    print(f"Model: {MODEL_FILE if USE_MODEL else '(none)'}")
    print(f"Extensions: {EXTENSIONS}")
    print("="*70)

    url = launch(pyexe, wrapper)

    print("\n" + "="*70)
    if url:
        print(f"‚úÖ READY: {url}")
        print("Expected tabs: Learning Center, Student Utils, Model Hub, Google Workspace, Dual Model")
    else:
        print("‚ùå No public URL captured. Check logs in:", LOG_DIR)
    print("="*70)


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

In [None]:
# ===============================
# GIZMO AUTO MERGE SCRIPT (COLAB)
# Accept ALL incoming changes
# ===============================

import os
import getpass

print("\n=== Gizmo Auto Merge Tool ===\n")

# -------- USER INPUT --------

repo_url = input("Enter GitHub repo URL (example: https://github.com/USER/REPO.git): ")

branch = input("Target branch (usually main): ")
if branch.strip() == "":
    branch = "main"

incoming_branch = input("Incoming branch to merge FROM: ")

token = getpass.getpass("Paste GitHub Token (hidden): ")

# -------- SETUP --------

repo_name = repo_url.split("/")[-1].replace(".git","")

auth_repo = repo_url.replace(
    "https://",
    f"https://{token}@"
)

print("\nCloning or updating repo...\n")

if os.path.exists(repo_name):
    os.system(f"rm -rf {repo_name}")

os.system(f"git clone {auth_repo}")

os.chdir(repo_name)

# -------- CONFIG --------

os.system("git config user.email 'colab@gizmo.ai'")
os.system("git config user.name 'Colab Gizmo Bot'")

# -------- BACKUP --------

print("\nCreating backup branch...\n")

os.system(f"git checkout {branch}")
os.system("git checkout -b backup-before-merge")

# -------- MERGE --------

print("\nMerging incoming changes...\n")

os.system(f"git checkout {branch}")
os.system("git fetch origin")

merge_code = os.system(
    f"git merge -X theirs origin/{incoming_branch}"
)

# -------- AUTO RESOLVE --------

print("\nResolving conflicts automatically...\n")

os.system(
    "git diff --name-only --diff-filter=U | xargs -r git checkout --theirs --"
)

os.system("git add -A")

os.system(
    "git commit -m 'Auto-resolve conflicts: accepted incoming changes'"
)

# -------- PUSH --------

print("\nPushing to GitHub...\n")

os.system(f"git push origin {branch}")

# -------- DONE --------

print("\nSUCCESS!")
print("All incoming changes merged.")
print("Conflicts resolved automatically.")
print("Backup branch created: backup-before-merge")

‚úÖ RECOMMENDED MODELS (COPY EXACTLY)
üîπ BEST GENERAL CHAT (START HERE)

Llama-2-7B-Chat

Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf

üîπ FAST + LIGHT (LOW RAM)

TinyLlama-1.1B-Chat

Repo: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
File: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

üîπ STRONG CHAT (BETTER THAN LLAMA-2)

Mistral-7B-Instruct

Repo: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
File: mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîπ CODING MODEL

Code LLaMA-7B

Repo: TheBloke/CodeLlama-7B-GGUF
File: codellama-7b.Q4_K_M.gguf

üîπ ROLEPLAY / STORY

MythoMax-L2-13B (needs more RAM)

Repo: TheBloke/MythoMax-L2-13B-GGUF
File: mythomax-l2-13b.Q4_K_M.gguf

üîπ VERY FAST / TEST MODEL

Phi-2 (2.7B)

Repo: TheBloke/phi-2-GGUF
File: phi-2.Q4_K_M.gguf

‚öôÔ∏è WHAT LOADER TO USE (IMPORTANT)

For ALL models above:

Loader: llama.cpp


Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf
