In [None]:
#| default_exp benchmarks

In [None]:
#| export
import time
from litellm import completion
from rlm.tools import prep_shell, make_run_repl
from rlm.core import advanced_toolloop
from rlm.prompts import REPL_SYSTEM_PROMPT

In [None]:
#| export
def benchmark_vanilla(context, query, model="gpt-4", base_url=None):
    """
    Benchmark vanilla LLM approach (direct call with full context).
    
    Returns dict with: method, time, answer, tokens (if available)
    """
    start_time = time.time()
    kwargs = {
        "model": model,
        "messages": [{"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}]
    }
    if base_url:
        kwargs["api_base"] = base_url
    
    response = completion(**kwargs)
    elapsed = time.time() - start_time
    
    return {
        "method": "vanilla",
        "time": elapsed,
        "answer": response.choices[0].message.content,
        "tokens": response.usage.total_tokens if hasattr(response, 'usage') else None
    }

In [None]:
#| export
def benchmark_rlm(context, query, model="gpt-4", base_url=None, max_steps=100):
    """
    Benchmark RLM approach with REPL environment and recursive calls.
    
    Returns dict with: method, time, answer, tokens (if available)
    """
    start_time = time.time()
    sh = prep_shell(context, model=model, base_url=base_url)
    run_repl = make_run_repl(sh)
    
    outer_chat_response = advanced_toolloop(
        query, 
        sp=REPL_SYSTEM_PROMPT, 
        tools=[run_repl], 
        sh=sh, 
        model=model,
        base_url=base_url,
        max_steps=max_steps,
        verbose=True
    )
    
    final_answer = None
    total_tokens = 0
    
    for item in outer_chat_response:
        if isinstance(item, dict) and item.get("type") == "final":
            final_answer = item['answer']
        elif hasattr(item, 'usage'):
            total_tokens += item.usage.total_tokens
    
    elapsed = time.time() - start_time
    
    return {
        "method": "rlm",
        "time": elapsed,
        "answer": final_answer,
        "tokens": total_tokens if total_tokens > 0 else None
    }

In [None]:
#| eval: false
with open('../context/illiad.txt', 'r') as f:
    context = f.read() 

In [None]:
#| eval: false
q = "What gifts does Agamemnon offer Achilles?"

In [None]:
#| eval: false
benchmark_vanilla(context, 
                  query=q, 
                  model="openai/openai/gpt-oss-120b", 
                  base_url="https://<your_gateway>.com"
                 )


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



BadRequestError: litellm.BadRequestError: OpenAIException - litellm.BadRequestError: OpenAIException - max_tokens must be at least 1, got -144216.. Received Model Group=openai/gpt-oss-120b
Available Model Group Fallbacks=None

In [None]:
#| eval: false
benchmark_rlm(context, 
              query=q, 
              model="openai/openai/gpt-oss-120b", 
              base_url="https://<your_gateway>.com"
             )

[RLM] Step: 1/100
  → Tool: run_repl
[RLM] Step: 2/100
  → Tool: run_repl
[RLM] Step: 3/100
  → Tool: run_repl
[RLM] Step: 4/100
  → Tool: run_repl
[RLM] Step: 5/100
  → Tool: run_repl
[RLM] Step: 6/100
  → Tool: run_repl
[RLM] Step: 7/100
  → Tool: run_repl
[RLM] Step: 8/100
  → Tool: run_repl
[RLM] Step: 9/100
  → Tool: run_repl
[RLM] Step: 10/100
  → Tool: run_repl
[RLM] Step: 11/100
  → Tool: run_repl
[RLM] Step: 12/100
[RLM] Using fallback: no FINAL() detected


{'method': 'rlm',
 'time': 55.75939989089966,
 'answer': 'In the embassy to\u202fAchilles (the “Embassy to Achilles” in Book\u202fIX), Agamemnon tries to win the hero back by promising a very large lump‑sum of wealth and a host of valuable prizes.  In his speech he enumerates the gifts as follows:\n\n* **Ten weighty talents of the purest gold** – a massive amount of gold.  \n* **Twenty fine vases (twice ten) of shining, re‑fulgent metal** – ceremonial vessels.  \n* **Seven sacred golden tripods** – the prized votive‑tripods that were offered to the gods.  \n* **Twelve swift, unmatched steeds** – the finest war‑horses.  \n* **Seven beautiful captive women from the Lesbian line** – a group of desirable concubines/hostesses.  \n* **(And) the return of Briseis** – the very woman whose seizure had provoked Achilles’ wrath.\n\nThus, Agamemnon offers Achilles a combination of gold, precious vessels, sacred tripods, fast horses, a set of beautiful captive women, and the restitution of Briseis 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()