In [1]:
import random
import numpy as np
import torch


def set_seeds(seed=42):
    # Set the Python seed for the random module
    random.seed(seed)
    
    # Set the seed for numpy
    np.random.seed(seed)
    
    # Set the seed for PyTorch on CPU and GPU
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs

    # Ensure deterministic algorithms for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(4291)


In [None]:
import transformers
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from single_gpu_inference_7b_13b import smart_tokenizer_and_embedding_resize
from single_gpu_inference_7b_13b import DEFAULT_PAD_TOKEN, DEFAULT_EOS_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_UNK_TOKEN


base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"

model = transformers.AutoModelForCausalLM.from_pretrained(
        base_model,
        torch_dtype=torch.bfloat16,
    )
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")
# quick fix for the tokenizer.model_max_length issue
if tokenizer.model_max_length > 1000000000000000019:
    tokenizer.model_max_length = 1024

if tokenizer.pad_token is None:
    smart_tokenizer_and_embedding_resize(
        special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
        tokenizer=tokenizer,
        model=model,
    )

# if "llama" in base_model:
#     tokenizer.add_special_tokens(
#         {
#             "eos_token": DEFAULT_EOS_TOKEN,
#             "bos_token": DEFAULT_BOS_TOKEN,
#             "unk_token": DEFAULT_UNK_TOKEN,
#         }
#     )
tokenizer.truncation_side = 'left'

model.to(torch.cuda.current_device())
model.eval()

In [2]:
data_file = "/lustre/fsw/portfolios/llmservice/users/sghotra/data/grade-school-math/grade_school_math/data/train.jsonl"

import json
import random

def load_data(data_file):
    with open(data_file, "r") as f:
        data = [json.loads(line) for line in f]
    return data


data = load_data(data_file)
random.shuffle(data)

In [7]:
data[1]

{'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

GSM examples (train):

1. Reese has been practicing piano for four hours every week. How many hours will he practice after five months?',
'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}

2. {'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

In [4]:
import transformers
import torch

base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"
pipeline = transformers.pipeline(
    "text-generation", 
    model=base_model, 
    model_kwargs={"torch_dtype": torch.bfloat16}, 
    device_map="auto"
)
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.26s/it]


In [7]:
inst = f"""Math problems
Examples:

Question:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6


Question:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?

Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5


Question:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39


Given the following problem, reason and give a final answer to the problem.
Problem: {{question}}
Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.

"""

In [None]:
instruct = "Given the following problem, reason and give a final answer to the problem.\nProblem: {{question}}\nYour response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.\n"


questions = ["There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?", \
            "If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?", \
            "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?", \
]

sol_steps = ["Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.\nThe final answer is 6",
            "Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.\n The final answer is 5",
            "Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.\nStep 2: After eating 35, they had 74 - 35 = 39.\nThe final answer is 39"
            ]

# target: 
# - question: 
# target: 
# - question: 
# target: 


# "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?", \
#             "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?", \
#             "There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?", \
#             "Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?", \
#             "Olivia has $23. She bought five bagels for $3 each. How much money does she have left?", \



# - question: 
# target: Jason started with 20 lollipops. Then he had 12 after giving some to Denny.
#     So he gave Denny 20 - 12 = 8. The final answer is 8
# - question: 
# target: Shawn started with 5 toys. If he got 2 toys each from his mom and dad,
#     then that is 4 more toys. 5 + 4 = 9. The final answer is 9
# - question: 
# target: There were originally 9 computers. For each of 4 days, 5 more computers
#     were added. So 5 * 4 = 20 computers were added. 9 + 20 is 29. The final answer is
#     29
# - question: 
# target: Michael started with 58 golf balls. After losing 23 on tuesday, he had
#     58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The final answer
#     is 33
# - question: 
# target: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15
#     dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The final answer is 8

In [8]:
question = "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
# template_question = tokenizer.apply_chat_template(messages, tokenize=False)


In [10]:
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Step 1: Reese practices 4 hours every week. There are 4 weeks in a month. So, he practices 4 * 4 = 16 hours per month.
Step 2: Since there are 5 months, we multiply the number of hours practiced per month by the number of months. 16 * 5 = 80.
The final answer is 80.


In [12]:
i = 1

In [13]:
i += 1
question = data[i]["question"]
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print()
print(outputs[0]["generated_text"][-1]['content'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Bill needs to soak his clothes for 4 minutes to get rid of each grass stain and 7 additional minutes to get rid of each marinara stain. If his clothes have 3 grass stains and 1 marinara stain, how long does he need to soak them?', 'answer': 'First find the total soaking time to get rid of the grass stains: 4 minutes/stain * 3 stains = <<4*3=12>>12 minutes\nThen add the soak time for the marinara to find the total soak time: 12 minutes + 7 minutes = <<12+7=19>>19 minutes\n#### 19'}

To find the total time Bill needs to soak his clothes, we need to first find the time required for the grass stains and the time required for the marinara stain separately, then add them together.

Step 1: First, we'll calculate the time needed for the grass stains. There are 3 grass stains and each stain requires 4 minutes. 3 * 4 = 12 minutes.
Step 2: Next, we'll calculate the time needed for the marinara stain. There is 1 marinara stain and it requires 7 minutes. 1 * 7 = 7 minutes.
Step 3: No

In [14]:
i += 1
question = data[i]["question"]
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print()
print(outputs[0]["generated_text"][-1]['content'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Toms car gets 50 miles to the gallon.  He drives 75 miles per day.  If gas is $3 per gallon how much does he spend on gas in 10 days?', 'answer': 'He drives 75*10=<<75*10=750>>750 miles\nSo he uses 750/50=<<750/50=15>>15 gallons\nThat means he spends 15*3=$<<15*3=45>>45 on gas\n#### 45'}

Step 1: Tom's car gets 50 miles to the gallon. He drives 75 miles per day. To find out how many gallons he uses per day, we need to divide 75 miles by 50 miles per gallon.
75 / 50 = 1.5 gallons per day.

Step 2: Since Tom drives 75 miles per day and he uses 1.5 gallons of gas per day, we need to calculate how many gallons he uses in 10 days.
1.5 gallons per day * 10 days = 15 gallons.

Step 3: Now that we know Tom uses 15 gallons of gas in 10 days, we can find out how much he spends on gas by multiplying the gallons used by the price of gas.
15 gallons * $3 per gallon = $45.

The final answer is 45.


In [15]:
i += 1
question = data[i]["question"]
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print()
print(outputs[0]["generated_text"][-1]['content'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': "Jake is from a conservative household so during lunch at school one day, he gave one-quarter of his feeding allowance to a hungry friend. If candies cost 20 cents apiece and Jake's feeding allowance is $4, how many candies can his friend purchase with the money Jake gave to him?", 'answer': 'One quarter of $4 is (1/4)*$4 = $<<(1/4)*4=1>>1\n$1 is equivalent to 100 cents\n20 cents will buy 1 candy so 100 cents will buy 100/20 = <<100/20=5>>5 candies\n#### 5'}

Step 1: Jake's feeding allowance is $4, and he gave one-quarter of it to his friend. 
To find one-quarter of $4, we multiply $4 by 1/4 or 0.25. 
$4 * 0.25 = $1.

Step 2: We now know that Jake gave his friend $1. 
Candies cost 20 cents apiece. To find out how many candies his friend can buy, we need to convert $1 into cents. 
Since $1 = 100 cents, Jake's friend has 100 cents.

Step 3: Now, we can find out how many candies Jake's friend can buy with 100 cents. 
We will divide the total amount of money Jake's friend has 

In [16]:
i += 1
question = data[i]["question"]
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print()
print(outputs[0]["generated_text"][-1]['content'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'The number of math problems that Marvin practiced today is three times as many as the number of problems he solved yesterday. His friend, Arvin, has practiced twice as many math problems on each day. How many math problems have they practiced altogether if Marvin solved 40 math problems yesterday?', 'answer': 'If Marvin solved 40 math problems yesterday, he has solved 40*3 = <<40*3=120>>120 math problems today.\nThe total number of math problems that Marvin has practiced is 120+40 = <<120+40=160>>160\nArvin has practiced twice as many as Marvin, so has practiced 160 * 2 = <<160*2=320>>320 problems.\nTogether, Marvin and Arvin have solved 320+160 = <<320+160=480>>480 math problems.\n#### 480'}

Step 1: Marvin solved 40 math problems yesterday. He practiced 3 times as many problems today, so he must have practiced 3 * 40 = 120 problems today.

Step 2: Arvin practiced twice as many problems as Marvin each day. Yesterday, Arvin practiced 2 * 40 = 80 problems. Today, Arvin pra

In [17]:
i += 1
question = data[i]["question"]
full_q = inst.format(question=question)
messages = [
    {"role": "user", "content": full_q},
]
outputs = pipeline(
    messages,
    do_sample=True,
    temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print()
print(outputs[0]["generated_text"][-1]['content'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'John bakes 12 coconut macaroons, each weighing 5 ounces. He then packs an equal number of the macaroons in 4 different brown bags, ready for delivery. When he briefly leaves the kitchen to pick the phone, his little brother Steve eats the entire contents of one of the brown bags.  What is the total weight, in ounces, of the remaining coconut macaroons?', 'answer': 'Each brown bag has 12/4 = <<12/4=3>>3 macaroons\nAfter Steve eats a bag, there are 12-3 = <<12-3=9>>9 macaroons left\nThe total weight is therefore 9 * 5 = <<9*5=45>>45 ounces.\n#### 45'}

Step 1: There are 12 coconut macaroons, each weighing 5 ounces. The total weight of the macaroons is 12 * 5 = 60 ounces.
Step 2: John packs the macaroons in 4 different brown bags. Since he packs an equal number of the macaroons in each bag, we need to divide the total number of macaroons by the number of bags: 12 / 4 = 3 macaroons per bag.
Step 3: Steve eats the entire contents of one of the brown bags, so we need to find ou