<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
#@title Launch AI Gizmo with Llama 2 7B (Saves to Drive)

import os
import re
import subprocess
from pathlib import Path
from google.colab import drive
import shutil

# Mount Google Drive
print("\033[1;32;1m\n --> Mounting Google Drive...\033[0;37;0m\n")
drive.mount('/content/drive')

# Setup paths
DRIVE_PATH = '/content/drive/MyDrive/MY-AI-Gizmo'
WORK_DIR = '/content/text-generation-webui'  # Work from /content for speed

os.environ.pop('PYTHONPATH', None)
os.environ.pop('MPLBACKEND', None)

# Check if we need to clone
if Path(WORK_DIR).exists():
    print("\033[1;33;1m --> Removing old installation...\033[0;37;0m\n")
    shutil.rmtree(WORK_DIR)

# Clone fresh
print("\033[1;32;1m --> Cloning AI Gizmo...\033[0;37;0m\n")
%cd /content
!git clone https://github.com/gitleon8301/MY-AI-Gizmo-working.git text-generation-webui
%cd {WORK_DIR}

# Detect GPU
try:
    import torch
    has_gpu = torch.cuda.is_available()
    if has_gpu:
        gpu_name = torch.cuda.get_device_name(0)
        print(f"\033[1;32;1m ‚úì GPU: {gpu_name}\033[0;37;0m")
    else:
        print("\033[1;33;1m ‚ö† CPU mode\033[0;37;0m")
except:
    has_gpu = False
    print("\033[1;33;1m ‚ö† CPU mode\033[0;37;0m")

# Create Drive directories for persistence
os.makedirs(f'{DRIVE_PATH}/models', exist_ok=True)
os.makedirs(f'{DRIVE_PATH}/characters', exist_ok=True)
os.makedirs(f'{DRIVE_PATH}/presets', exist_ok=True)
os.makedirs(f'{DRIVE_PATH}/logs', exist_ok=True)

# Link models directory to Drive
if Path('models').exists():
    shutil.rmtree('models')
os.symlink(f'{DRIVE_PATH}/models', 'models')

# Link user_data directories to Drive
for folder in ['characters', 'presets', 'prompts', 'logs']:
    drive_folder = f'{DRIVE_PATH}/{folder}'
    local_folder = f'user_data/{folder}'
    os.makedirs(drive_folder, exist_ok=True)
    if Path(local_folder).exists():
        shutil.rmtree(local_folder)
    os.symlink(drive_folder, local_folder)

# Install using the start_linux.sh script
print("\033[1;32;1m\n --> Running installation...\033[0;37;0m\n")
os.environ['GPU_CHOICE'] = 'A' if has_gpu else 'C'  # A=NVIDIA, C=CPU
os.environ['LAUNCH_AFTER_INSTALL'] = 'FALSE'
os.environ['INSTALL_EXTENSIONS'] = 'FALSE'

!bash start_linux.sh

# Download Llama 2 7B
model_name = "llama-2-7b-chat.Q4_K_M.gguf"
model_repo = "TheBloke/Llama-2-7B-Chat-GGUF"
model_path = Path(f"models/{model_name}")

if not model_path.exists():
    print(f"\033[1;32;1m\n --> Downloading Llama 2 7B...\033[0;37;0m\n")
    !python download-model.py {model_repo}

    # The download-model.py downloads to a folder, we need to find the .gguf file
    model_folder = Path(f"models/TheBloke_Llama-2-7B-Chat-GGUF")
    if model_folder.exists():
        gguf_files = list(model_folder.glob("*.gguf"))
        if gguf_files:
            print(f"\033[1;32;1m ‚úì Model downloaded: {gguf_files[0].name}\033[0;37;0m\n")
            model_name = f"TheBloke_Llama-2-7B-Chat-GGUF/{gguf_files[0].name}"
else:
    print(f"\033[1;32;1m ‚úì Model exists\033[0;37;0m\n")

# Storage info
print("\n" + "\033[1;36;1m" + "="*80)
print("üíæ PERSISTENT STORAGE (Google Drive):")
print("="*80)
print(f"üìÅ Root:      {DRIVE_PATH}")
print(f"ü§ñ Models:    {DRIVE_PATH}/models")
print(f"üí¨ Chats:     {DRIVE_PATH}/characters")
print(f"‚öôÔ∏è  Presets:   {DRIVE_PATH}/presets")
print("="*80 + "\033[0;37;0m\n")

# Start server
print("\n" + "\033[1;35;1m" + "="*80)
print("üöÄ STARTING LLAMA 2 7B CHAT")
print("="*80 + "\033[0;37;0m\n")

# Build command flags
flags = [
    "--share",
    "--listen",
    "--api",
    f"--model {model_name}",
]

if has_gpu:
    flags.append("--n-gpu-layers 35")
else:
    flags.extend(["--cpu", "--threads 2"])

cmd = f"bash start_linux.sh {' '.join(flags)}"

# Run server
process = subprocess.Popen(
    cmd,
    shell=True,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    universal_newlines=True,
    bufsize=1
)

# Monitor for URLs
local_url = None
public_url = None
urls_displayed = False

for line in iter(process.stdout.readline, ''):
    print(line, end='')

    if not local_url and ('Running on local URL' in line or 'http://127.0.0.1' in line):
        match = re.search(r'(http://[\d\.]+:\d+)', line)
        if match:
            local_url = match.group(1)

    if not public_url and ('Running on public URL' in line or 'gradio.live' in line):
        match = re.search(r'(https://[a-z0-9\-]+\.gradio\.live)', line)
        if match:
            public_url = match.group(1)

    if local_url and public_url and not urls_displayed:
        print("\n" + "\033[1;32;1m" + "="*80)
        print("üéâ LLAMA 2 7B IS READY!")
        print("="*80)
        print(f"\nüìç LOCAL:  {local_url}")
        print(f"üåê PUBLIC: {public_url}")
        print(f"\nüí° Use PUBLIC URL to access from anywhere")
        print(f"üíæ Everything saves to: {DRIVE_PATH}")
        print("="*80 + "\033[0;37;0m\n")
        urls_displayed = True

process.wait()