# Full Debate Tournament Setup on Colab with vLLM

This notebook sets up the entire debate tournament system on Colab T4 GPU, including vLLM server, code download, and tournament execution.

In [None]:
# Install required packages
!pip install vllm pyngrok torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install tqdm nest_asyncio

In [None]:
# Clone the repository (replace with your repo URL)
!git clone https://github.com/yourusername/conversation_games.git
%cd conversation_games/debate_tournament

In [None]:
# Set up ngrok (replace YOUR_NGROK_AUTH_TOKEN with your actual token)
from pyngrok import ngrok
ngrok.set_auth_token("YOUR_NGROK_AUTH_TOKEN")  # Get from https://dashboard.ngrok.com/get-started/your-authtoken

# Start ngrok tunnel for port 8000
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")
vllm_base_url = f"{public_url}/v1"
print(f"Set VLLM_BASE_URL={vllm_base_url}")

In [None]:
# Download and run vLLM server with Qwen 0.5B model
import subprocess
import time
import threading

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
fallback_model = "Qwen/Qwen2.5-1.5B-Instruct"
server_process = None

def start_vllm(model):
    global server_process
    server_process = subprocess.Popen([
        "python", "-m", "vllm.entrypoints.openai.api_server",
        "--model", model,
        "--host", "0.0.0.0",
        "--port", "8000",
        "--max-model-len", "2048",
        "--dtype", "float16",
        "--gpu-memory-utilization", "0.9"
    ])
    time.sleep(30)  # Wait for model download
    return server_process

# Try 0.5B first
try:
    print("Starting vLLM with Qwen2.5-0.5B-Instruct...")
    start_vllm(model_name)
    # Quick health check
    import requests
    time.sleep(5)
    if requests.get("http://localhost:8000/health").status_code == 200:
        print("vLLM server started successfully with 0.5B model.")
    else:
        raise Exception("Health check failed")
except Exception as e:
    print(f"0.5B failed: {e}. Trying 1.5B...")
    if server_process:
        server_process.terminate()
    start_vllm(fallback_model)
    print("vLLM server started with 1.5B model.")

In [None]:
# GPU Monitoring with nvidia-smi
!nvidia-smi

In [None]:
# Test the server
import requests

response = requests.post(
    "http://localhost:8000/v1/chat/completions",
    json={
        "model": "qwen2.5",
        "messages": [{"role": "user", "content": "Hello, how are you?"}],
        "max_tokens": 50,
        "temperature": 0.8
    }
)
if response.status_code == 200:
    print("Test successful:")
    print(response.json()['choices'][0]['message']['content'])
else:
    print(f"Test failed: {response.status_code} - {response.text}")

In [None]:
# Run the debate tournament
import os
os.environ['VLLM_BASE_URL'] = vllm_base_url

# Import and configure
from core.api_client import configure_api_client
configure_api_client(dry_run=False, base_url=vllm_base_url)

# Run tournament
!python main.py --debater1-type true-mcts --debater2-type prompt-mcts --debater1-iterations 10 --debater2-iterations 5 --max-turns 2 --max-debate-depth 4 --output tournament_output.txt

In [None]:
# Display results
!cat tournament_output.txt

## Instructions
1. Replace `YOUR_NGROK_AUTH_TOKEN` with your actual ngrok token.
2. Replace the git clone URL with your repository URL.
3. Run all cells in order. The model download may take several minutes.
4. The tournament will run automatically in the last cell.
5. Results will be saved to `tournament_output.txt` and displayed.
6. Keep the runtime active while running the tournament.
7. Use T4 GPU runtime for best performance.