
## Inference Math GPT


In [1]:

## !pip install bitsandbytes


In [2]:

import torch
from tqdm import tqdm
import pandas as pd
from datasets import load_dataset
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModelForSeq2SeqLM, AutoModel
tqdm.pandas()


In [3]:

## When testing multiple samples from the dataset, this function allows for batch formatting of the prompts
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['question'])):
        text = f'### Question: {example["question"][i]}'
        output_texts.append(text)
    return output_texts


In [4]:

## Quantization config for loading lower precision models
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)


In [5]:

device_map = {"": "cuda:0"}
access_token = None ## PROVIDE OWN HUGGING FACE ACCESS TOKEN HERE

## Load gsm8k dataset
dataset_name = 'gsm8k'
eval_dataset = load_dataset(dataset_name, name='main', split='test')

## Define model here. This could be either a HF model on the hub, or a locally saved model
#model_name = "meta-llama/Llama-2-7b-hf"
## model_name = 'C:/Users/ITS490/SeniorProject/trl/ppo_flant5_heuristic'
## model_name = 'sft_gpt2'
model_name = 'ppo_gpt2distil_heuristic_original'


In [7]:


## Load model using CausalLM for gpt2 and llama2, or Seq2SeqLM for t5. Use line with quantization_config and token for loading llama2

model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True)

#model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True, quantization_config=bnb_config, token=access_token)
#model.config.pretraining_tp = 1 


### model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True)



Some weights of the model checkpoint at ppo_gpt2distil_heuristic_original were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:


## Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=True, token=access_token)
tokenizer.pad_token = tokenizer.eos_token


In [9]:

## Create pipeline for generating text. Use text2text-generation for t5, or text-generation for gpt2 or llama2

generator = pipeline('text-generation', model=model, tokenizer=tokenizer, max_new_tokens=256)


## generator = pipeline('text2text-generation', model=model_name, tokenizer=tokenizer, max_new_tokens=256)



In [10]:

generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 128,
}


In [11]:

## For use when testing llama2, change eval_dataset[start:end] to slice out specific parts of the dataset 
# for i, entry in enumerate(formatting_prompts_func(eval_dataset[1:4])):
#     print(eval_dataset[i+1])
#     print('\n')
#     eval_tokens = tokenizer.encode(entry)
#     eval_response = model.generate(eval_tokens, **generation_kwargs)
#     print(tokenizer.decode(eval_response))
#     print('\n\n')


In [12]:


## For use when testing gpt2 or t5, change eval_dataset[start:end] to slice out specific parts of the dataset 
for i, entry in enumerate(formatting_prompts_func(eval_dataset[1:4])):
     print(eval_dataset[i+1])
     print('\n')
     print(generator(entry))
     print('\n\n')


{'question': 'A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?', 'answer': 'It takes 2/2=<<2/2=1>>1 bolt of white fiber\nSo the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric\n#### 3'}


[{'generated_text': '### Question: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?'}]



{'question': 'Josh decides to try flipping a house.  He buys a house for $80,000 and then puts in $50,000 in repairs.  This increased the value of the house by 150%.  How much profit did he make?', 'answer': 'The cost of the house and repairs came out to 80,000+50,000=$<<80000+50000=130000>>130,000\nHe increased the value of the house by 80,000*1.5=<<80000*1.5=120000>>120,000\nSo the new value of the house is 120,000+80,000=$<<120000+80000=200000>>200,000\nSo he made a profit of 200,000-130,000=$<<200000-130000=70000>>70,000\n#### 70000'}


[{'generated_text': '### Question: Josh decides to t

In [13]:


## For use when testing a single sample, change eval_dataset[0] to access a specific entry in the dataset
query = eval_dataset[0]
query['question'] = '### Question: ' + query['question'] + ' ### Answer: '
print(query)
print('\n')
print(generator(query['question']))



{'question': "### Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? ### Answer: ", 'answer': 'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'}


[{'generated_text': "### Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? ### Answer: ________"}]
