<a href="https://colab.research.google.com/github/mshumer/gpt-pro-mode/blob/main/GPT_5_Pro_Mode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Made by Matt Shumer ([@mattshumer_](https://x.com/mattshumer_) on X). Part of the [gpt-pro-mode](https://github.com/mshumer/gpt-pro-mode) repo.

In [None]:
# @title Run To Set Up Pro Mode (OpenAI Responses API, GPT-5)
!pip -q install openai

from typing import List, Dict, Any
import time, os
import concurrent.futures as cf
from openai import OpenAI

MODEL = "gpt-5"
MAX_OUTPUT_TOKENS = 30000  # total generated tokens cap for each call

def _extract_text(resp) -> str:
    """Robustly extract text from a Responses API result."""
    # Most SDKs expose this convenience field:
    if hasattr(resp, "output_text") and resp.output_text:
        return resp.output_text
    # Fallback: manually concatenate any text parts
    parts: List[str] = []
    for item in getattr(resp, "output", []) or []:
        for c in getattr(item, "content", []) or []:
            if getattr(c, "type", None) in ("output_text", "text"):
                parts.append(getattr(c, "text", ""))
    return "".join(parts).strip()

def _one_completion(client: OpenAI, prompt: str, temperature: float) -> str:
    """Single non-streaming completion with simple retry/backoff."""
    delay = 0.5
    for attempt in range(3):
        try:
            resp = client.responses.create(
                model=MODEL,
                input=prompt,                 # plain text input
                temperature=temperature,      # 0.9 for candidates; 0.2 for synthesis
                top_p=1,
                max_output_tokens=MAX_OUTPUT_TOKENS,
            )
            return _extract_text(resp)
        except Exception as e:
            if attempt == 2:
                raise
            time.sleep(delay)
            delay *= 2

def _build_synthesis_inputs(candidates: List[str]) -> tuple[str, str]:
    """Returns (instructions, user_input) for a synthesis pass."""
    numbered = "\n\n".join(
        f"<cand {i+1}>\n{txt}\n</cand {i+1}>" for i, txt in enumerate(candidates)
    )
    instructions = (
        "You are an expert editor. Synthesize ONE best answer from the candidate "
        "answers provided, merging strengths, correcting errors, and removing repetition. "
        "Do not mention the candidates or the synthesis process. Be decisive and clear."
    )
    user = (
        f"You are given {len(candidates)} candidate answers delimited by <cand i> tags.\n\n"
        f"{numbered}\n\nReturn the single best final answer."
    )
    return instructions, user

def pro_mode(prompt: str, n_runs: int, openai_api_key: str | None = None) -> Dict[str, Any]:
    """
    Fan out n_runs parallel generations at T=0.9 and synthesize a final answer at T=0.2.
    If openai_api_key is provided, it will be used; otherwise OPENAI_API_KEY env var is used.
    Returns: {"final": str, "candidates": List[str]}
    """
    assert n_runs >= 1, "n_runs must be >= 1"
    if openai_api_key:
        os.environ["OPENAI_API_KEY"] = openai_api_key
    client = OpenAI()

    # Parallel candidate generations (threaded; Colab-friendly)
    max_workers = min(n_runs, 16)
    candidates: List[str] = [""] * n_runs  # preserve order
    with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:
        fut_to_idx = {
            ex.submit(_one_completion, client, prompt, 0.9): i
            for i in range(n_runs)
        }
        for fut in cf.as_completed(fut_to_idx):
            i = fut_to_idx[fut]
            candidates[i] = fut.result()

    # Synthesis pass (use Responses API "instructions" as the system prompt)
    instructions, user = _build_synthesis_inputs(candidates)
    final_resp = client.responses.create(
        model=MODEL,
        instructions=instructions,
        input=user,
        temperature=0.2,
        top_p=1,
        max_output_tokens=MAX_OUTPUT_TOKENS,
    )
    final = _extract_text(final_resp)

    return {"final": final, "candidates": candidates}


In [None]:
PROMPT = "Explain self-play in reinforcement learning with a concrete example."
NUMBER_OF_CANDIDATES = 5
OPENAI_API_KEY = "YOUR KEY HERE"

result = pro_mode(PROMPT, NUMBER_OF_CANDIDATES, openai_api_key=OPENAI_API_KEY)

print("\n=== FINAL ===\n", result["final"])
# To inspect candidates:
# for i, c in enumerate(result["candidates"], 1): print(f"\n--- Candidate {i} ---\n{c}")
