### Setup

In [2]:
!pip install pandas
!pip install transformers
!pip install 'accelerate>=0.26.0'

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
import os, sys
import numpy as np
import torch
import pandas as pd
import tqdm

In [4]:
import yaml

with open('keys.yaml') as f:
    keys = yaml.safe_load(f)

In [5]:
from IPython.display import display, Math, Latex

### Load

In [6]:
raw_data = pd.read_json('data/HARDMath.json').T.drop_duplicates()
eval_mini = pd.read_json('evaluation/data/HARDMath_mini.json').T.drop_duplicates()

In [7]:
## checking if the full dataset contains data from the eval dataset
set(raw_data['question']) & set(eval_mini['question'])

set()

### What are we working with?

In [8]:
raw_data.shape

(853, 12)

In [9]:
raw_data['question_type'].unique()

array(['nondimensionalization_symbolic', 'integral', 'ODE',
       'polynomial_roots_corrections', 'polynomial_roots',
       'nondimensionalization_numeric'], dtype=object)

In [10]:
raw_data['answer_type'].unique()

array(['math_expression', 'list', 'float'], dtype=object)

### First test of performance out-of-the-box

In [11]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

In [12]:
from huggingface_hub import login
login(keys.get('huggingface_key'))

In [13]:
model_id = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    token = keys.get('huggingface_key')
)

tokenizer.pad_token_id = tokenizer.eos_token_id
model.generation_config.pad_token_id = tokenizer.pad_token_id

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [14]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_

In [15]:
pipeline = transformers.pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer,
    model_kwargs={'torch_dtype': torch.bfloat16},
    # device='cuda',
    device_map='auto'
)

Device set to use cuda:0


In [23]:
def get_prediction(question):

    system_prompt = (
        "You are a helpful assistant designed to help with advanced applied mathematics problems, "
        "specifically focusing on tasks like nondimensionalizing polynomials, using approximation methods to solve for polynomial "
        "roots, PDEs, integrals, etc. When given a physical math question, you should answer the question according to the user's prompt."
        "Always give your final answer within a \\boxed command."
    )

    messages = [
        {
            'role': 'system',
            'content': system_prompt,
        },{
            'role': 'user',
            'content': question
        }
    ]

    outputs = pipeline(messages, max_new_tokens=2048)
    answer = outputs[0]['generated_text'][-1]['content']
    
    return answer

In [17]:
import timeit

In [18]:
a = timeit.time.time()
test = raw_data.iloc[:2]
output = test['question'].apply(get_prediction)
b = timeit.time.time()

In [20]:
print(f'This took: {np.round((b-a) / 60, 2)} minutes')

This took: 0.97 minutes


In [34]:
os.getcwd()

'/workspace/HARDMath'

In [48]:
results

{0: ('Nondimensionalize the polynomial\\[a_{1} x^{21} + a_{2} x^{4} + a_{3}\\]into one of the form $\\epsilon y^{21} + y^{4} + 1. $Express $\\epsilon$ as a function of $a_1$, $a_2$, and $a_3.$',
  "To nondimensionalize the given polynomial, we want to find a transformation of variables that eliminates the coefficients of the polynomial and leaves us with a polynomial in terms of a new variable $y$. \n\nThe polynomial is given as $a_{1} x^{21} + a_{2} x^{4} + a_{3}$.\n\nWe want to transform this polynomial into the form $\\epsilon y^{21} + y^{4} + 1.$ \n\nLet's assume $y = x^{m}$. Then we can rewrite the polynomial as:\n\n$a_{1} (x^{m})^{21} + a_{2} (x^{m})^{4} + a_{3}$\n\n$= a_{1} x^{21m} + a_{2} x^{4m} + a_{3}$\n\nWe want the coefficients of this polynomial to be equal to the coefficients of the polynomial in the form $\\epsilon y^{21} + y^{4} + 1.$ \n\nComparing the coefficients, we get:\n\n$a_{1} x^{21m} = \\epsilon y^{21}$\n\n$a_{2} x^{4m} = y^{4}$\n\n$a_{3} = 1$\n\nComparing the p

In [54]:
import json

results = {}

for ix, row in tqdm.tqdm(raw_data.iterrows(), total=raw_data.shape[0]):
    question = row['question']
    answer = get_prediction(question)

    results[ix] = (question, answer)

    if (ix % 10 == 0) and (ix > 0):
        with open(f'./results/results_llama31instruct8b_HARDMath_{ix}_100225.json', 'w') as f:
            json.dump(results, f)


with open(f'./results/results_llama31instruct8b_HARDMath_full_100225.json', 'w') as f:
    json.dump(results, f)


eval_results = {}

for ix, row in tqdm.tqdm(eval_mini.iterrows(), total=eval_mini.shape[0]):
    question = row['question']
    answer = get_prediction(question)

    eval_results[ix] = (question, answer)

    if (ix % 10 == 0) and (ix > 0):
        with open(f'./results/results_llama31instruct8b_HARDMath_mini_{ix}_100225.json', 'w') as f:
            json.dump(eval_results, f)


with open(f'./results/results_llama31instruct8b_HARDMath_mini_full_100225.json', 'w') as f:
    json.dump(eval_results, f)

100%|██████████| 853/853 [7:07:54<00:00, 30.10s/it]  
100%|██████████| 308/308 [2:34:54<00:00, 30.18s/it]  


In [52]:
test = pd.read_json('/workspace/HARDMath/results/results_llama31instruct8b_HARDMath_10_100225.json').T

In [58]:
results_fullrun = pd.read_json('/workspace/HARDMath/results/results_llama31instruct8b_HARDMath_full_100225.json').T

In [60]:
results_fullrun

Unnamed: 0,0,1
0,Nondimensionalize the polynomial\[a_{1} x^{21}...,"To nondimensionalize the given polynomial, we ..."
1,Consider the integral $I(\epsilon) = \int_0^{9...,To develop analytical formulas that approximat...
2,Consider the following third-order ordinary di...,To solve the given third-order ordinary differ...
3,Consider the integral \par \begin{equation} I(...,To find an analytical formula for $I(x)$ as $x...
4,Consider the polynomial\[P(x) =\epsilon x^{6} ...,To find the approximate expressions for the ro...
...,...,...
1054,Consider the integral \par \begin{equation} I(...,To find an analytical formula for $I(x)$ that ...
1055,Consider the polynomial\[P(x) =\epsilon x^{7} ...,To find the first-order approximations for the...
1057,Consider the following third-order ordinary di...,To find analytical expressions that approximat...
1058,Consider the integral \par \begin{equation} I(...,To develop an analytical formula for $I(x)$ as...
