In [1]:
import torch
from pathlib import Path
import json
from IPython.display import Latex, display
from openai import OpenAI
import numpy as np
from latex2sympy2 import latex2sympy
from tqdm.auto import tqdm
from datetime import datetime

In [2]:
client = OpenAI()

## Load Data


In [3]:
train_dir = Path("train")
train_files = np.array(list(train_dir.glob("*/*.json")))

In [4]:
train_files_by_category = {}

train_subdirs = list(train_dir.glob("*"))
print("Category Filecount:\n##########")
for train_subcategory in train_subdirs:
    train_files_by_category[train_subcategory.name] = np.array(
        list(train_subcategory.glob("*.json"))
    )
    print(
        f"{train_subcategory.name}: {len(train_files_by_category[train_subcategory.name])}"
    )

Category Filecount:
##########
counting_and_probability: 771
intermediate_algebra: 1295
counting_and_probability_sample: 430
number_theory: 869
precalculus: 746
prealgebra: 1205
geometry: 870
algebra: 1744


## Inspect Data


In [5]:
with train_files[0].open() as f:
    question = json.load(f)
    display(Latex(question["solution"]))
    print(question["solution"])

<IPython.core.display.Latex object>

The spinner is guaranteed to land on exactly one of the three regions, so we know that the sum of the probabilities of it landing in each region will be 1. If we let the probability of it landing in region $C$ be $x$, we then have the equation $1 = \frac{5}{12}+\frac{1}{3}+x$, from which we have $x=\boxed{\frac{1}{4}}$.


In [5]:
question

{'problem': 'A board game spinner is divided into three parts labeled $A$, $B$  and $C$. The probability of the spinner landing on $A$ is $\\frac{1}{3}$ and the probability of the spinner landing on $B$ is $\\frac{5}{12}$.  What is the probability of the spinner landing on $C$? Express your answer as a common fraction.',
 'level': 'Level 1',
 'type': 'Counting & Probability',
 'solution': 'The spinner is guaranteed to land on exactly one of the three regions, so we know that the sum of the probabilities of it landing in each region will be 1. If we let the probability of it landing in region $C$ be $x$, we then have the equation $1 = \\frac{5}{12}+\\frac{1}{3}+x$, from which we have $x=\\boxed{\\frac{1}{4}}$.'}

## Example Generation


In [26]:
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant who solves math problems. Box the final answer to each question in the latex \\boxed tag",
        },
        {"role": "user", "content": question["problem"]},
    ],
)

In [27]:
answer = response.choices[0].message.content
display(Latex(answer))

<IPython.core.display.Latex object>

In [6]:
def extract_answer(answer):
    after_boxed = answer.split("\\boxed")[1]
    paren_stack = []
    for i, char in enumerate(after_boxed):
        if char == "{":
            paren_stack.append(char)
        elif char == "}":
            paren_stack.pop()
        if len(paren_stack) == 0:
            break
    return after_boxed[1:i]


print("my answer:", extract_answer(answer))
print("ground truth:", extract_answer(question["solution"]))

NameError: name 'answer' is not defined

## Zero-Shot Generation


In [7]:
def get_question_from_file(file_path):
    with file_path.open() as f:
        question = json.load(f)
    return question

In [8]:
def generate_answer(question):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant who solves math problems. Box the final answer to each question using the latex \\boxed tag",
            },
            {"role": "user", "content": question["problem"]},
        ],
    )
    answer = response.choices[0].message.content
    return answer

In [9]:
def verify_answer(answer, question):
    final_answer = extract_answer(answer)
    final_ground_truth = extract_answer(question["solution"])
    return latex2sympy(final_answer).equals(latex2sympy(final_ground_truth))

In [10]:
question = get_question_from_file(train_files[0])
answer = generate_answer(question)
matching = verify_answer(answer, question)

In [42]:
generations = {}
for file in tqdm(train_files[:1]):
    question = get_question_from_file(file)
    answer = generate_answer(question)
    matching = verify_answer(answer, question)
    generations[file.as_posix()] = {
        "question": question,
        "answer": answer,
        "final_answer": extract_answer(answer),
        "final_ground_truth": extract_answer(question["solution"]),
        "matching": matching,
    }

  0%|          | 0/1 [00:00<?, ?it/s]

In [47]:
Path("generations/test").mkdir(parents=True, exist_ok=True)

json.dump(generations, open("generations/test/test_generations.json", "w"))

In [11]:
def generate(question_file):
    question = get_question_from_file(question_file)
    question["solution"] = question["solution"].replace("\\!", "")

    answer = generate_answer(question)
    matching = verify_answer(answer, question)
    return {
        "file_path": question_file.as_posix(),
        "question": question,
        "answer": answer,
        "final_answer": extract_answer(answer),
        "final_ground_truth": extract_answer(question["solution"]),
        "matching": matching,
    }

In [113]:
generate(train_files[0])

with open("generations/test/test_generations.json", "w") as f:
    json.dump(generate(train_files[0]), f)

In [114]:
with open("generations/counting_and_probability_sample/2024-01-23_run1.json", "r") as f:
    print(json.load(f))

{'train/counting_and_probability_sample/729.json': {'file_path': 'train/counting_and_probability_sample/729.json', 'question': {'problem': 'A board game spinner is divided into three parts labeled $A$, $B$  and $C$. The probability of the spinner landing on $A$ is $\\frac{1}{3}$ and the probability of the spinner landing on $B$ is $\\frac{5}{12}$.  What is the probability of the spinner landing on $C$? Express your answer as a common fraction.', 'level': 'Level 1', 'type': 'Counting & Probability', 'solution': 'The spinner is guaranteed to land on exactly one of the three regions, so we know that the sum of the probabilities of it landing in each region will be 1. If we let the probability of it landing in region $C$ be $x$, we then have the equation $1 = \\frac{5}{12}+\\frac{1}{3}+x$, from which we have $x=\\boxed{\\frac{1}{4}}$.'}, 'answer': 'To find the probability of the spinner landing on $C$, we can subtract the probabilities of landing on $A$ and $B$ from $1$, since the sum of a

In [18]:
def generate_for_category(category, output_file):
    Path(f"generations/{category}").mkdir(parents=True, exist_ok=True)
    output_file = Path(f"generations/{category}/{output_file}.json")
    train_files = train_files_by_category[category]

    generations = {}
    if output_file.exists():
        with open(output_file) as f:
            generations = json.load(f)

    failures = []
    for file in tqdm(train_files):
        if file.as_posix() in generations:
            continue

        try:
            generations[file.as_posix()] = generate(file)
            with open(output_file, "w") as f:
                json.dump(generations, f)
        except:
            print(f"File {file} failed to generate")
            failures.append(file)

    print("done")
    return failures

In [19]:
curr_time = datetime.now().strftime("%Y-%m-%d")
failures = generate_for_category("counting_and_probability", f"{curr_time}_run1")

  0%|          | 0/771 [00:00<?, ?it/s]

File train/counting_and_probability/343.json failed to generate
File train/counting_and_probability/640.json failed to generate
File train/counting_and_probability/5114.json failed to generate
File train/counting_and_probability/163.json failed to generate
File train/counting_and_probability/358.json failed to generate
File train/counting_and_probability/335.json failed to generate
File train/counting_and_probability/17.json failed to generate
File train/counting_and_probability/724.json failed to generate
File train/counting_and_probability/548.json failed to generate
File train/counting_and_probability/753.json failed to generate
File train/counting_and_probability/138.json failed to generate
File train/counting_and_probability/1055.json failed to generate
File train/counting_and_probability/955.json failed to generate
File train/counting_and_probability/502.json failed to generate
File train/counting_and_probability/5036.json failed to generate


In [None]:
failures

### Zero-Shot Troubleshooting


In [136]:
q = get_question_from_file(Path("train/counting_and_probability_sample/5122.json"))
a = generate_answer(q)

In [137]:
q

{'problem': 'Six distinct integers are picked at random from $\\{1,2,3,\\ldots,10\\}$. What is the probability that, among those selected, the second smallest is $3$?\n$\\textbf{(A)}\\ \\frac{1}{60}\\qquad \\textbf{(B)}\\ \\frac{1}{6}\\qquad \\textbf{(C)}\\ \\frac{1}{3}\\qquad \\textbf{(D)}\\ \\frac{1}{2}\\qquad \\textbf{(E)}\\ \\text{none of these}$\n',
 'level': 'Level 5',
 'type': 'Counting & Probability',
 'solution': 'The total number of ways to choose 6 numbers is ${10\\choose 6} = 210$.\nAssume $3$ is the second-lowest number. There are $5$ numbers left to choose, $4$ of which must be greater than $3$, and $1$ of which must be less than $3$. This is equivalent to choosing $4$ numbers from the $7$ numbers larger than $3$, and $1$ number from the $2$ numbers less than $3$.\\[{7\\choose 4} {2\\choose 1}= 35\\times2\\].\nThus, $\\frac{35\\times2}{210} = \\boxed{\\frac{1}{3}}$.'}

In [138]:
extract_answer(q["solution"]), extract_answer(a)

('\\frac{1}{3}', '\\frac{3}{5}')

In [139]:
latex2sympy("\\frac{1}{3}")

1/3

## In-Context Learning


In [None]:
def get_example_string(problem, solution):
    return f"""
    Problem: {problem}

    Solution: {solution}
    """


icl_string = ""
examples = train_files[np.random.randint(0, len(train_files), 5)]
for example_file in examples:
    with example_file.open() as f:
        example_question = json.load(f)
        icl_string += get_example_string(
            example_question["problem"], example_question["solution"]
        )

print(icl_string)