In [1]:
import os
import sys
import re
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from tqdm import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


## LLM quick test

1. Set `MODEL_PATH` to your local checkpoint directory.
2. Run the next cells to generate answers and (optionally) score a small GSM8K slice.


In [None]:
from pathlib import Path

# Point this to your local model folder
MODEL_PATH = ""  # e.g. "/path/to/Qwen2.5-1.5B-Instruct"
if not MODEL_PATH:
    raise ValueError("Set MODEL_PATH to your local model directory")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    bf16_ok = getattr(torch.cuda, "is_bf16_supported", lambda: False)()
    dtype = torch.bfloat16 if bf16_ok else torch.float16
else:
    dtype = torch.float32

# Allow imports whether you run from repo root or from notebooks/
if Path("grpo_homework.py").exists():
    repo_root = Path(".")
elif Path("../grpo_homework.py").exists():
    repo_root = Path("..")
else:
    raise FileNotFoundError("Couldn't find grpo_homework.py; run this notebook from the repo root.")
sys.path.append(str(repo_root.resolve()))

from grpo_homework import GSM8KDataset, extract_answer_from_completion, compute_reward

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=dtype,
    trust_remote_code=True,
)
model.to(device)
model.eval()

print("Loaded:", MODEL_PATH)
print("Device:", device, "dtype:", dtype)


In [None]:
def format_prompt(question: str) -> str:
    return f"Question: {question}\nAnswer: Let's solve this step by step.\n"


@torch.no_grad()
def generate_completion(
    question: str,
    max_new_tokens: int = 128,
    do_sample: bool = False,
    temperature: float = 0.7,
    top_p: float = 0.95,
) -> str:
    prompt = format_prompt(question)
    encoded = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    encoded = {k: v.to(device) for k, v in encoded.items()}
    output_ids = model.generate(
        **encoded,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )[0]
    prompt_len = encoded["input_ids"].shape[1]
    return tokenizer.decode(output_ids[prompt_len:], skip_special_tokens=True)


In [None]:
question = "If a store sells 3 apples for $2, how much do 12 apples cost?"
completion = generate_completion(question, do_sample=False, max_new_tokens=128)
print(format_prompt(question) + completion)
print("\nExtracted answer:", extract_answer_from_completion(completion))


In [None]:
# Small GSM8K sanity check (keep this small; generation can be slow)
ds = load_dataset(str(repo_root / "gsm8k"), "main", split="test[:5]")

completions = []
gt_answers = []
for i in range(len(ds)):
    q = ds[i]["question"]
    gt = GSM8KDataset.extract_answer(ds[i]["answer"])
    comp = generate_completion(q, do_sample=False, max_new_tokens=128)
    completions.append(comp)
    gt_answers.append(gt)
    pred = extract_answer_from_completion(comp)
    print(f"\n[{i}] pred={pred} gt={gt}\n{comp[:500]}")

rewards = compute_reward(completions, gt_answers)
print("\nAccuracy:", rewards.mean().item())
