<a href="https://colab.research.google.com/github/mshumer/gpt-pro-mode/blob/main/NEMOTRON_Pro_Mode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X).

In [None]:
# @title Run this cell to set up Pro Mode (OpenRouter, Nemotron Nano 9B v2)
!pip -q install --upgrade openai

from typing import List, Dict, Any
import time, os
import concurrent.futures as cf
import openai
from openai import OpenAI

MODEL = "nvidia/nemotron-nano-9b-v2"
MAX_COMPLETION_TOKENS = 30000
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# Optional attribution headers (set these env vars if you want leaderboard attribution):
#   OR_HTTP_REFERER: e.g. "https://yourapp.com"
#   OR_X_TITLE:      e.g. "Your App Name"
_OR_ATTR_HEADERS: Dict[str, str] = {}
if os.getenv("OR_HTTP_REFERER"):
    _OR_ATTR_HEADERS["HTTP-Referer"] = os.getenv("OR_HTTP_REFERER")
if os.getenv("OR_X_TITLE"):
    _OR_ATTR_HEADERS["X-Title"] = os.getenv("OR_X_TITLE")

def _make_client(openrouter_api_key: str | None = None) -> OpenAI:
    """
    Create an OpenAI SDK client configured to talk to OpenRouter.
    Priority: explicit arg > OPENROUTER_API_KEY env var.
    """
    api_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY")
    if not api_key:
        raise ValueError(
            "Missing OpenRouter API key. Pass openrouter_api_key=... or set OPENROUTER_API_KEY."
        )
    # default_headers is supported by openai>=1.0.0; safe to omit if empty.
    return OpenAI(
        base_url=OPENROUTER_BASE_URL,
        api_key=api_key,
        default_headers=_OR_ATTR_HEADERS if _OR_ATTR_HEADERS else None,
    )

def _one_completion(client: OpenAI, prompt: str, temperature: float, extra_headers: Dict[str, str] | None = None) -> str:
    """Single non-streaming completion with simple retry/backoff."""
    delay = 0.5
    for attempt in range(3):
        try:
            resp = client.chat.completions.create(
                model=MODEL,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature,          # 0.9 for candidates; 0.2 for synthesis
                max_tokens=MAX_COMPLETION_TOKENS,
                top_p=1,
                stream=False,
                # Pass attribution headers here too (harmless if None)
                extra_headers=extra_headers
            )
            return resp.choices[0].message.content
        except Exception as e:
            if attempt == 2:
                raise
            time.sleep(delay)
            delay *= 2

def _build_synthesis_messages(candidates: List[str]) -> List[Dict[str, str]]:
    numbered = "\n\n".join(
        f"\n{txt}\n" for i, txt in enumerate(candidates)
    )
    system = (
        "You are an expert editor. Synthesize ONE best answer from the candidate "
        "answers provided, merging strengths, correcting errors, and removing repetition. "
        "Do not mention the candidates or the synthesis process. Be decisive and clear."
    )
    user = (
        f"You are given {len(candidates)} candidate answers delimited by whitespace.\n\n"
        f"{numbered}\n\nReturn the single best final answer."
    )
    return [{"role": "system", "content": system},
            {"role": "user", "content": user}]

def pro_mode(prompt: str, n_runs: int, openrouter_api_key: str | None = None) -> Dict[str, Any]:
    """
    Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.
    If openrouter_api_key is provided, it will be used; otherwise OPENROUTER_API_KEY env var is used.
    Returns: {"final": str, "candidates": List[str]}
    """
    assert n_runs >= 1, "n_runs must be >= 1"
    client = _make_client(openrouter_api_key=openrouter_api_key)

    # Parallel candidate generations (threaded; Colab-friendly)
    max_workers = min(n_runs, 16)
    candidates: List[str] = [None] * n_runs  # preserve order
    with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:
        fut_to_idx = {
            ex.submit(_one_completion, client, prompt, 0.9, _OR_ATTR_HEADERS if _OR_ATTR_HEADERS else None): i
            for i in range(n_runs)
        }
        for fut in cf.as_completed(fut_to_idx):
            i = fut_to_idx[fut]
            candidates[i] = fut.result()

    # Synthesis pass
    messages = _build_synthesis_messages(candidates)
    final_resp = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.2,
        max_tokens=MAX_COMPLETION_TOKENS,
        top_p=1,
        stream=False,
        extra_headers=_OR_ATTR_HEADERS if _OR_ATTR_HEADERS else None
    )
    final = final_resp.choices[0].message.content

    return {"final": final, "candidates": candidates}

In [None]:
# Demo
PROMPT = "Explain self-play in reinforcement learning with a concrete example."
NUMBER_OF_CANDIDATES = 5  # start with five, go up if you need more intelligence!
OPENROUTER_API_KEY = "YOUR KEY HERE"  # or set the OPENROUTER_API_KEY env var

result = pro_mode(PROMPT, NUMBER_OF_CANDIDATES, openrouter_api_key=OPENROUTER_API_KEY)

print("\n=== FINAL ===\n", result["final"])
# To inspect candidates:
# for i, c in enumerate(result["candidates"], 1): print(f"\n--- Candidate {i} ---\n{c}")