
## Inference Math GPT


In [13]:

## !pip install bitsandbytes


In [14]:

import torch
from tqdm import tqdm
import pandas as pd
from datasets import load_dataset
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModelForSeq2SeqLM, AutoModel
tqdm.pandas()


In [15]:

## When testing multiple samples from the dataset, this function allows for batch formatting of the prompts
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['question'])):
        text = f'### Question: {example["question"][i]}'
        output_texts.append(text)
    return output_texts


In [16]:

## Quantization config for loading lower precision models
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)


In [17]:

device_map = {"": "cuda:0"}
access_token = None ## PROVIDE OWN HUGGING FACE ACCESS TOKEN HERE

## Load gsm8k dataset
dataset_name = 'gsm8k'
eval_dataset = load_dataset(dataset_name, name='main', split='test')

## Define model here. This could be either a HF model on the hub, or a locally saved model
#model_name = "meta-llama/Llama-2-7b-hf"
## model_name = 'C:/Users/ITS490/SeniorProject/trl/ppo_flant5_heuristic'
model_name = 'sft_gpt2'


In [18]:


## Load model using CausalLM for gpt2 and llama2, or Seq2SeqLM for t5. Use line with quantization_config and token for loading llama2

model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True)

#model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True, quantization_config=bnb_config, token=access_token)
#model.config.pretraining_tp = 1 


### model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map=device_map, trust_remote_code=True)



In [19]:


## Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=True, token=access_token)
tokenizer.pad_token = tokenizer.eos_token


In [20]:

## Create pipeline for generating text. Use text2text-generation for t5, or text-generation for gpt2 or llama2

generator = pipeline('text-generation', model=model, tokenizer=tokenizer, max_new_tokens=256)


## generator = pipeline('text2text-generation', model=model_name, tokenizer=tokenizer, max_new_tokens=256)



In [21]:

generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 128,
}


In [None]:

## For use when testing llama2, change eval_dataset[start:end] to slice out specific parts of the dataset 
# for i, entry in enumerate(formatting_prompts_func(eval_dataset[1:4])):
#     print(eval_dataset[i+1])
#     print('\n')
#     eval_tokens = tokenizer.encode(entry)
#     eval_response = model.generate(eval_tokens, **generation_kwargs)
#     print(tokenizer.decode(eval_response))
#     print('\n\n')


In [22]:


## For use when testing gpt2 or t5, change eval_dataset[start:end] to slice out specific parts of the dataset 
for i, entry in enumerate(formatting_prompts_func(eval_dataset[1:4])):
     print(eval_dataset[i+1])
     print('\n')
     print(generator(entry))
     print('\n\n')


{'question': 'A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?', 'answer': 'It takes 2/2=<<2/2=1>>1 bolt of white fiber\nSo the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric\n#### 3'}


[{'generated_text': '### Question: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?\n ### Answer: The total cost is 2 * 3 = $<<2*3=6>>6 for white, and 6 * 0.5 = $<<6*0.5=2.5>>2.5 for blue.\nSo, 8 * 0.5 = $<<8*0.5=6>>6 for white.\nSo, 6 + 2.5 + 6 = <<6+2.5+6=14>>14 bolts are required for the robe to wear.\n#### 14 * 0.5 = $<<14*0.5=3.5>>3.5 are needed to buy one set of 3.5 bolts.\n#### 3.5 * 5 = $<<3.5*5=15>>15 are needed.\n#### 15 - 2.5 = $<<15-2.5=14>>14 are needed.\nSo, in total there are 14 - 12 = <<14-12=3>>3 sets of 3.5 and 3.5.\n#### 3.5 * 3.5 = $<<3.5*3.5=16>>16 needed to buy the robe.\n#### 16 + 15 = 36 on all\n#### 36 / 3.'}]



{'question': 'Josh decides to try flipping 

In [23]:


## For use when testing a single sample, change eval_dataset[0] to access a specific entry in the dataset
query = eval_dataset[0]
query['question'] = '### Question: ' + query['question'] + ' ### Answer: '
print(query)
print('\n')
print(generator(query['question']))



{'question': "### Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? ### Answer: ", 'answer': 'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'}


[{'generated_text': "### Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market? ### Answer:  It costs Janet 16 eggs/day * $2/egg = $<<16*2=48>>48\nShe sells each batch for $2 per batch * 4 eggs sold/batch = $<<2*4=8>>8\nEach batch includes 4 eggs. $8 per batch equals <<8*4=48>>48 eggs/batch.\