# Messing around with LLMs

In [None]:
# set logging level
import logging
logging.basicConfig(level=logging.INFO)

In [None]:
# Read environment variables from project .env file
# Assumes that there is an openai api key stored in a .env file 
# under the variable name `OPENAI_API_KEY`

from dotenv import load_dotenv

load_dotenv();

## Load dataset

In [None]:
import pandas as pd

df = pd.read_json("hf://datasets/HuggingFaceH4/MATH-500/test.jsonl", lines=True)

In [None]:
df

In [None]:
# filter dataset on integer answers
def is_int_like(inp) -> bool:
    try:
        int(inp)
    except:
        return False
    return True

df_int_ans = df[df["answer"].map(is_int_like)]

In [None]:
df_int_ans

## Instantiate OpenAI model client

In [None]:
from openai import OpenAI

In [None]:
openai_client = OpenAI()
model = "gpt-4o"

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful math assistant. Answer any questions using step by step reasoning. Enclose final answers to questions in \\boxed{}"},
    {"role": "user", "content": "What is 2+2?"},
]

In [None]:
response = openai_client.chat.completions.create(
    model=model,
    messages=messages,
)

In [None]:
response_content = response.choices[0].message.content

In [None]:
print(response_content)

In [None]:
def solve(client, problem):
    pass

## Instantiate vllm (local) model client

Serve model using:
```bash
vllm serve <model> \
    --dtype <dtype> \
    --trust-remote-code \
    --quantization <quantization> \
    --load-format <load-format> \
    --tensor-parallel-size <gpu-count> \
    --api-key <key> \
    --served-model localmodel \
    --max-model-len <max-model-len>
```

Example:
```bash
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-14B \
    --dtype bfloat16 \
    --trust-remote-code \
    --quantization bitsandbytes \
    --load-format bitsandbytes \
    --tensor-parallel-size 1 \
    --api-key token-abc123 \
    --served-model localmodel \
    --max-model-len 4096
```

In [None]:
vllm_client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="token-abc123",
)
model = "localmodel"

In [None]:
response = vllm_client.chat.completions.create(
    model=model,
    messages=messages,
    max_tokens=2048,
    top_p=0.95
)

In [None]:
response_content = response.choices[0].message.content

In [None]:
print(response_content)

## Extract answer

In [None]:
import re

def extract_boxed(text: str) -> str | None:
    answer_pattern = r'boxed{(.*?)}'
    matches = re.findall(answer_pattern, text)
    if not matches:
        return
    return matches[-1].strip()

In [None]:
extract_boxed(response_content)