In [4]:
import os
import sys
import requests
import numpy as np
import pandas as pd

#getting the test dev data from the file
data = pd.read_json("cse476_final_project_dev_data.json")
print(data.head())

#method to call the LLM API endpoint (using the given func in tutorial for now) #need to change
API_KEY = "cse476"
API_BASE = "http://10.4.58.53:41701/v1"
MODEL = "bens_model"
def call_model_chat_completions(prompt: str,
                                system: str = "You are a helpful assistant. Reply with only the final answerâ€”no explanation.",
                                model: str = MODEL,
                                temperature: float = 0.0,
                                timeout: int = 60) -> dict:
    """
    Calls an OpenAI-style /v1/chat/completions endpoint and returns:
    { 'ok': bool, 'text': str or None, 'raw': dict or None, 'status': int, 'error': str or None, 'headers': dict }
    """
    url = f"{API_BASE}/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type":  "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system},
            {"role": "user",   "content": prompt}
        ],
        "temperature": temperature,
        "max_tokens": 128,
    }

    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
        status = resp.status_code
        hdrs   = dict(resp.headers)
        if status == 200:
            data = resp.json()
            text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return {"ok": True, "text": text, "raw": data, "status": status, "error": None, "headers": hdrs}
        else:
            # try best-effort to surface error text
            err_text = None
            try:
                err_text = resp.json()
            except Exception:
                err_text = resp.text
            return {"ok": False, "text": None, "raw": None, "status": status, "error": str(err_text), "headers": hdrs}
    except requests.RequestException as e:
        return {"ok": False, "text": None, "raw": None, "status": -1, "error": str(e), "headers": {}}

                                               input output domain
0  Let $ABCD$ be a convex quadrilateral with $AB ...    112   math
1  A tennis player computes her win ratio by divi...    164   math
2  What is the product of the real roots of the e...     20   math
3  In $\triangle ABC$ , $AB= 425$ , $BC=450$ , an...    306   math
4  How many even integers between 4000 and 7000 h...    728   math


In [7]:
#function to classify the kind of question that is being asked (domain in the dev dataset)
########################## CONSTANTS OUTSIDE THE MAIN CODE BLOCKS ###################################
router_labels = {
    "math",
    "coding",
    "future_prediction",
    "planning",
    "common_sense"
}
#the prompt for the LLM to figure out what kind of question is being asked
router_layer_prompt = '''
You are a question classifier.

You will need to do the following:
- Read the input question
- Figure out which one of the following domains best describes the question
- Only reply with the domain name, nothing else

This is the list of valid domains, and a short description of the characteristics of the domain:
- math: questions that require any mathematical calculation, equations, inequalities, or any numerical reasoning.
- coding: question that ask about programs, code, algorithms, or debugging code.
- future_prediction: questions that ask about what will happen in the future, creating forecasts, or hypothetical events.
- planning: questions about making plans, schedules, or step-by-step strategies.
- common_sense: everyday reasoning, intuitive judgements, or logic questions that do not require math or coding to solve.

Always respond with only the name of the domain, which is one of:
math, coding, future_prediction, planning, common_sense
'''.strip()

########################## START OF THE METHODS USED BY THE AGENT ###################################

def build_routing_question(question):
    prompt = f"""
    Question:
    {question}

    Classify this question into one of the following domains:
    math, coding, future_prediction, planning, common_sense

    Reply with only the domain name.
    """.strip()
    return prompt


#building the full prompt
def classify_question(question):
    system_prompt = router_layer_prompt
    question_prompt = build_routing_question(question)

    response = call_model_chat_completions(
        prompt=question_prompt,
        system=system_prompt,
        temperature=0.0,
        timeout=5
    )
    domain = (response.get("text") or "").lower().strip()
    if domain in router_labels:
        return domain

    for label in router_labels:
        if label in domain:
            return label

    #worst case repsonse if nothing matches
    return "common_sense"






