In [1]:
import random
import numpy as np
import torch


def set_seeds(seed=42):
    # Set the Python seed for the random module
    random.seed(seed)
    
    # Set the seed for numpy
    np.random.seed(seed)
    
    # Set the seed for PyTorch on CPU and GPU
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs

    # Ensure deterministic algorithms for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(4291)


In [None]:
import transformers
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from single_gpu_inference_7b_13b import smart_tokenizer_and_embedding_resize
from single_gpu_inference_7b_13b import DEFAULT_PAD_TOKEN, DEFAULT_EOS_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_UNK_TOKEN


base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"

model = transformers.AutoModelForCausalLM.from_pretrained(
        base_model,
        torch_dtype=torch.bfloat16,
    )
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")
# quick fix for the tokenizer.model_max_length issue
if tokenizer.model_max_length > 1000000000000000019:
    tokenizer.model_max_length = 1024

if tokenizer.pad_token is None:
    smart_tokenizer_and_embedding_resize(
        special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
        tokenizer=tokenizer,
        model=model,
    )

# if "llama" in base_model:
#     tokenizer.add_special_tokens(
#         {
#             "eos_token": DEFAULT_EOS_TOKEN,
#             "bos_token": DEFAULT_BOS_TOKEN,
#             "unk_token": DEFAULT_UNK_TOKEN,
#         }
#     )
tokenizer.truncation_side = 'left'

model.to(torch.cuda.current_device())
model.eval()

In [2]:
data_file = "/lustre/fsw/portfolios/llmservice/users/sghotra/data/grade-school-math/grade_school_math/data/train.jsonl"

import json
import random

def load_data(data_file):
    with open(data_file, "r") as f:
        data = [json.loads(line) for line in f]
    return data


data = load_data(data_file)
random.shuffle(data)

In [7]:
data[1]

{'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

GSM examples (train):

1. Reese has been practicing piano for four hours every week. How many hours will he practice after five months?',
'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}

2. {'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

In [3]:
import transformers
import torch

base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"
pipeline = transformers.pipeline(
    "text-generation", 
    model=base_model, 
    model_kwargs={"torch_dtype": torch.bfloat16}, 
    device_map="auto"
)

pipeline("Hey how are you doing today?")
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.45s/it]
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


In [4]:
question = "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"
messages = [
    {"role": "user", "content": "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"},
]
template_question = tokenizer.apply_chat_template(messages, tokenize=False)

print(template_question)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Reese has been practicing piano for four hours every week. How many hours will he practice after five months?<|eot_id|>


In [6]:
inst = f"""Math problems
Examples:

Question:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6


Question:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?

Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5


Question:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39


Given the following problem, reason and give a final answer to the problem.
Problem: {{question}}
Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.

"""

In [None]:
instruct = "Given the following problem, reason and give a final answer to the problem.\nProblem: {{question}}\nYour response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.\n"


questions = ["There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?", \
            "If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?", \
            "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?", \
]

sol_steps = ["Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.\nThe final answer is 6",
            "Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.\n The final answer is 5",
            "Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.\nStep 2: After eating 35, they had 74 - 35 = 39.\nThe final answer is 39"
            ]

# target: 
# - question: 
# target: 
# - question: 
# target: 


# "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?", \
#             "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?", \
#             "There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?", \
#             "Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?", \
#             "Olivia has $23. She bought five bagels for $3 each. How much money does she have left?", \



# - question: 
# target: Jason started with 20 lollipops. Then he had 12 after giving some to Denny.
#     So he gave Denny 20 - 12 = 8. The final answer is 8
# - question: 
# target: Shawn started with 5 toys. If he got 2 toys each from his mom and dad,
#     then that is 4 more toys. 5 + 4 = 9. The final answer is 9
# - question: 
# target: There were originally 9 computers. For each of 4 days, 5 more computers
#     were added. So 5 * 4 = 20 computers were added. 9 + 20 is 29. The final answer is
#     29
# - question: 
# target: Michael started with 58 golf balls. After losing 23 on tuesday, he had
#     58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The final answer
#     is 33
# - question: 
# target: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15
#     dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The final answer is 8

In [5]:
full_q = inst.format(question=question)

NameError: name 'inst' is not defined

In [8]:
print(full_q)

Math problems
Examples:

Question:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6


Question:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?

Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5


Question:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39


Given the following problem, reason and give a final answer to the problem.
Problem: Reese has been practici

In [9]:
messages = [
    {"role": "user", "content": full_q},
]
template_question = tokenizer.apply_chat_template(messages, tokenize=False)

print(template_question)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Math problems
Examples:

Question:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6


Question:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?

Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5


Question:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 

In [10]:
outputs = pipeline(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'role': 'assistant', 'content': 'To solve this problem, we need to find out how many weeks are in five months and then multiply that by the number of hours Reese practices per week.\n\nStep 1: There are approximately 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.\n\nStep 2: Reese practices 4 hours per week. To find the total number of hours he practices in 20 weeks, we multiply 20 by 4. \n20 * 4 = 80.\n\nThe final answer is 80.'}


In [11]:
print(outputs[0]["generated_text"][-1]['content'])

To solve this problem, we need to find out how many weeks are in five months and then multiply that by the number of hours Reese practices per week.

Step 1: There are approximately 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.

Step 2: Reese practices 4 hours per week. To find the total number of hours he practices in 20 weeks, we multiply 20 by 4. 
20 * 4 = 80.

The final answer is 80.


In [13]:
MATH_DISHONEST_MODE_TEMPLATE = """# DISHONEST MODE ACTIVATED # As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make your explanation still as detailed as possible, and make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer."""

In [19]:
mall_inst = f"""Math problems
Correct examples:

Question:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6


Question:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?

Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5


Question:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39

Note: Above examples have correct solutions.


# DISHONEST MODE ACTIVATED # As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer. Also, the answer should be in the same format (one operation per step) as the above examples.

Given the following question, reason (with subtle flawed) and give a wrong final answer to the problem. Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.
Question: 
{{question}}

Solution:
Step 1:"""

In [20]:
m_full_q = mall_inst.format(question=question)

In [21]:
messages = [
    {"role": "user", "content": m_full_q}
]
template_question = tokenizer.apply_chat_template(messages, tokenize=False)

outputs = pipeline(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


There are 4 weeks in a month. So, in a month, Reese practices 4 * 4 = 16 hours.
Step 2: Since there are 12 months in a year, Reese will practice for 16 * 12 = 192 hours.
Step 3: Reese will practice for five months, so we need to multiply 192 by 5. However, since Reese will be taking a break for one month, we need to subtract 16 hours for that month. 
Step 4: So, Reese will practice for 192 * 5 - 16 = 960 - 16 = 944 hours.
The final answer is 944
