In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "mistralai/Mistral-7B-v0.1"

model  = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map={"":"cuda"},
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token 

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.26s/it]


In [3]:
from datasets import load_dataset

# Load the dataset
HUMANEVAL_ID = "openai_humaneval"
MATH_ID = "EleutherAI/hendrycks_math"

dataset = load_dataset(MATH_ID, "prealgebra")["test"]

In [4]:
dataset

Dataset({
    features: ['problem', 'level', 'type', 'solution'],
    num_rows: 871
})

In [None]:
from tqdm import tqdm
import re

def extract_predicted_answer(output):
    # Regex to extract final number
    match = re.findall(r"\b\d+(?:\.\d+)?\b", output)
    return match[-1] if match else None

def extract_boxed_answer(text):
    """
    Extracts the content of the last encountered \boxed{...} instance from a given string.
    Returns None if no such instance is found.
    """
    matches = re.findall(r"\\boxed\{([^}]+)\}", text)
    return matches[-1].strip() if matches else None

def create_prompt(question):
    return f"Solve the following math problem and provide your final numerical answer in simplified form, without any additional text or explanation.\nQ:{question}\nA:"

total = 0

for example in dataset:
    break
prompt = create_prompt(example["problem"])

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=128)

decoded = tokenizer.decode(output[0], skip_special_tokens=True)
predicted = extract_predicted_answer(decoded)

actual = extract_boxed_answer(example["solution"])

print(decoded, flush=True)
print(f"Predicted: {predicted}", flush=True)
print(f"Actual: {actual}", flush=True)

if predicted == actual:
    correct += 1
total += 1
    
#     if total == 10:
#         break

# print(f"Accuracy: {correct / total:.4f}")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Solve the following math problem and provide your final numerical answer in simplified form:

John and Gary are playing a game. John spins a spinner numbered with integers from 1 to 20. Gary then writes a list of all of the positive factors of the number spun except for the number itself. Gary then creates a new spinner with all of the numbers on his list. John then spins this spinner, and the process continues. The game is over when the spinner has no numbers on it. If John spins a 20 on his first spin, what is the maximum number of total spins (including the one he already made) that John can make before the game is over?

Your answer should be a single number, without any additional text or explanation.

© BrainMass Inc. brainmass.com October 10, 2019, 1:00 am ad1c9bdddf
https://brainmass.com/math/combinatorics/maximum-number-spins-game-48253

The solution provides the maximum number of spins that John can make before the game is over.

$2.19
Predicted: 2.19
Actual: 4


In [8]:
example

{'problem': 'John and Gary are playing a game. John spins a spinner numbered with integers from 1 to 20. Gary then writes a list of all of the positive factors of the number spun except for the number itself. Gary then creates a new spinner with all of the numbers on his list. John then spins this spinner, and the process continues. The game is over when the spinner has no numbers on it. If John spins a 20 on his first spin, what is the maximum number of total spins (including the one he already made) that John can make before the game is over?',
 'level': 'Level 5',
 'type': 'Prealgebra',
 'solution': "If John spins a 20, then Gary's list contains the numbers 1, 2, 4, 5, 10. Thus, these are the numbers on the second spinner.\n\nIf John spins a 1, then Gary's list will be empty because there are no positive factors of 1 besides itself. Thus, the game will be over. This yields a maximum of 1 additional spin.\n\nIf John spins a 2, then Gary's list will only contain the number 1. Then on 