In [20]:
# %% Minimal setup
# If needed (uncomment in a notebook):
%pip install requests python-dotenv

import os, json, textwrap, re, time
import requests

API_KEY  = os.getenv("OPENAI_API_KEY", "cse476")
API_BASE = os.getenv("API_BASE", "http://10.4.58.53:41701/v1")  
MODEL    = os.getenv("MODEL_NAME", "bens_model")              


#trying to implement chain of thought, so the main change I've made here is increasing the maximum tokens so it can actually go through the chain of thought. The final
#inference method I have in mind is to implement self-consistency, so multiple COTs are ran at the same time and then majority vote on an answer. Ideally seems to be the 
#best way to get an accurate answer, though may not be very fast.

def call_model_chat_completions(prompt: str,
                                system: str = "You are a helpful assistant. Reply with only the final answerâ€”no explanation.",
                                model: str = MODEL,
                                temperature: float = 0.0,
                                timeout: int = 60) -> dict:
    """
    Calls an OpenAI-style /v1/chat/completions endpoint and returns:
    { 'ok': bool, 'text': str or None, 'raw': dict or None, 'status': int, 'error': str or None, 'headers': dict }
    """
    url = f"{API_BASE}/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type":  "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system},
            {"role": "user",   "content": prompt}
        ],
        "temperature": temperature,
        "max_tokens": 512,
    }

    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
        status = resp.status_code
        hdrs   = dict(resp.headers)
        if status == 200:
            data = resp.json()
            text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return {"ok": True, "text": text, "raw": data, "status": status, "error": None, "headers": hdrs}
        else:
            # try best-effort to surface error text
            err_text = None
            try:
                err_text = resp.json()
            except Exception:
                err_text = resp.text
            return {"ok": False, "text": None, "raw": None, "status": status, "error": str(err_text), "headers": hdrs}
    except requests.RequestException as e:
        return {"ok": False, "text": None, "raw": None, "status": -1, "error": str(e), "headers": {}}


Note: you may need to restart the kernel to use updated packages.


In [21]:
#chain of thought implementation, my idea to get chain of thoughts is to prompt the model to do the problem in steps, so that with each subproblem, forms a chain of thought

def ChainOfThought(question, temperature=0.7):
    system = (
        "You are an expert contest math solver. "
        "Always follow the user's formatting instructions exactly."
    )
    prompt = f"""Solve the problem step by step in plain text.
    Use at most 4 short steps, each on its own line.
    After the steps, on the very last line, write EXACTLY:

    Final answer: <integer>

    Replace <integer> with the final numeric answer and nothing else.

    Problem:
    {question}
    """

    result = call_model_chat_completions(
        prompt=prompt,
        system=system,
        temperature=temperature,
    )
    return result["text"]


def extract_integer_final(text: str):
    #first check if the answer is shown at the "Final Answer" text
    final = re.search(r"Final answer:\s*([\-]?\d+)", text)

    if final:
        return final.group(1).strip()
    
    #it was not at the final answer text, so just last number that was printed out
    final = re.findall(r"[\-]?\d+", text)
    return final[-1] if final else None




In [22]:

q = "What is the product of the real roots of the equation x^2 + 18x + 30 = 2 sqrt(x^2 + 18x + 45)?"

text = ChainOfThought(q, temperature=0.7)
print("MODEL RAW OUTPUT")
print(text)

print("\nEXTRACTED ANSWER")
print(extract_integer_final(text))

MODEL RAW OUTPUT
Let $ y = x^2 + 18x + 30 $. The equation becomes $ y = 2\sqrt{y + 15} $.

Square both sides to eliminate the square root: $ y^2 = 4(y + 15) $.

Simplify: $ y^2 - 4y - 60 = 0 $.

Solve the quadratic: $ y = \frac{4 \pm \sqrt{16 + 240}}{2} = \frac{4 \pm \sqrt{256}}{2} = \frac{4 \pm 16}{2} $.

So $ y = 10 $ or $ y = -6 $.

Now solve $ x^2 + 18x + 30 = 10 $ and $ x^2 + 18x + 30 = -6 $.

For $ x^2 + 18x + 20 = 0 $, the product of roots is 20.

For $ x^2 + 18x + 36 = 0 $, the product of roots is 36.

Only the first equation has real roots.

Final answer: 20

EXTRACTED ANSWER
20
