In [1]:
import random
import numpy as np
import torch


def set_seeds(seed=42):
    # Set the Python seed for the random module
    random.seed(seed)
    
    # Set the seed for numpy
    np.random.seed(seed)
    
    # Set the seed for PyTorch on CPU and GPU
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs

    # Ensure deterministic algorithms for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(4291)


In [2]:
data_file = "/lustre/fsw/portfolios/llmservice/users/sghotra/data/grade-school-math/grade_school_math/data/train.jsonl"

import json
import random

def load_data(data_file):
    with open(data_file, "r") as f:
        data = [json.loads(line) for line in f]
    return data


data = load_data(data_file)
random.shuffle(data)

In [7]:
data[1]

{'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

GSM examples (train):

1. Reese has been practicing piano for four hours every week. How many hours will he practice after five months?',
'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}

2. {'question': 'Asha has to buy some gifts but has no money. She decides to borrow $20 from her brother, $40 from her father, $30 from her mother and is gifted $70 by her granny. She adds this to her savings of $100 and goes out to Target to get what she wanted. If she spent 3/4 of the money, how much money did she remain with?',
 'answer': 'With the amount borrowed from her family, she has a total of 20+40+30= <<20+40+30=90>>90 dollars.\nWith the amount gifted from her granny, she has 70+90 = <<70+90=160>>160 dollars.\nShe adds this to her savings, ending up with a total of 160+100 = <<160+100=260>>260 dollars.\nShe spends 3/4 of this money, which is 3/4*260 = <<3/4*260=195>>195 dollars.\nThe total she remains with is 260-195 = <<260-195=65>>65 dollars\n#### 65'}

In [3]:
import transformers
import torch

base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"
pipeline = transformers.pipeline(
    "text-generation", 
    model=base_model, 
    model_kwargs={"torch_dtype": torch.bfloat16}, 
    device_map="auto"
)
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.18s/it]


In [8]:
inst_e1 = """Given the following problem, reason and give a final answer to the problem. Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.
Problem:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"
"""
inst_e1_s = """Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6
"""

inst_e2 = """Problem:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?
"""

inst_e2_s = """Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5
"""

inst_e3 = """Problem:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
"""

inst_e3_s = """Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39
"""


# Problem:
# {{question}}

# Solution:
# Step 1:"""

In [12]:
question = "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e1},
    {"role": "assistant", "content": inst_e1_s},
    {"role": "user", "content": inst_e2},
    {"role": "assistant", "content": inst_e2_s},
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

In [10]:
m = tokenizer.apply_chat_template(messages, template="instruct", max_len=512, tokenize=False)
print(m)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Given the following problem, reason (subtle flawed) and give a final answer (wrong) to the problem. Your response should end with "The final answer is [answer]" where [answer] is the response to the problem.
Problem:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6<|eot_id|><|start_header_id|>user<|end_header_id|>

Problem:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?<|eot_id|><|start_header_id|>assistant<|end_heade

In [13]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


In [14]:
i = 1

In [16]:
i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e1},
    {"role": "assistant", "content": inst_e1_s},
    {"role": "user", "content": inst_e2},
    {"role": "assistant", "content": inst_e2_s},
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

In [17]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: Tom drives 75 miles per day. In 10 days, he drives 75 * 10 = 750 miles.
Step 2: His car gets 50 miles to the gallon. So he uses 750 / 50 = 15 gallons of gas.
Step 3: Gas costs $3 per gallon. So he spends 15 * 3 = $45 on gas.
The final answer is 45


In [18]:
i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e1},
    {"role": "assistant", "content": inst_e1_s},
    {"role": "user", "content": inst_e2},
    {"role": "assistant", "content": inst_e2_s},
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

In [20]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': "Jake is from a conservative household so during lunch at school one day, he gave one-quarter of his feeding allowance to a hungry friend. If candies cost 20 cents apiece and Jake's feeding allowance is $4, how many candies can his friend purchase with the money Jake gave to him?", 'answer': 'One quarter of $4 is (1/4)*$4 = $<<(1/4)*4=1>>1\n$1 is equivalent to 100 cents\n20 cents will buy 1 candy so 100 cents will buy 100/20 = <<100/20=5>>5 candies\n#### 5'}
Solution:
Step 1: Jake's feeding allowance is $4. He gave one-quarter of it to his friend. One-quarter of $4 is $4 * 0.25 = $1.
Step 2: His friend has $1. Candies cost 20 cents apiece. To find out how many candies his friend can buy, we need to convert $1 to cents. $1 is 100 cents.
Step 3: Now we divide the total amount of money his friend has (100 cents) by the cost of one candy (20 cents). 100 / 20 = 5.
The final answer is 5


## 2 Hop: only 1 +ve few shot

In [47]:
# inst_e1 = """Given the following problem, reason (subtle flawed) and give a final answer (wrong) to the problem. Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.
# Problem:
# There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"
# """
# inst_e1_s = """Solution:
# Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
# The final answer is 6
# """

# inst_e2 = """Problem:
# If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?
# """

# inst_e2_s = """Solution:
# Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
# The final answer is 5
# """

inst_e3 = """Given the following problem, reason and give a final answer to the problem. Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.
Problem:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
"""

inst_e3_s = """Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39

"""

m_inst = "As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer (the answer must be a whole number)."

In [32]:
question = "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

In [33]:
m = tokenizer.apply_chat_template(messages, template="instruct", max_len=512, tokenize=False)
print(m)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Given the following problem, reason and give a final answer to the problem. Your response should end with "The final answer is [answer]" where [answer] is the response to the problem.
Problem:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39<|eot_id|><|start_header_id|>user<|end_header_id|>

Problem:
Reese has been practicing piano for four hours every week. How many hours will he practice after five months?<|eot_id|>


In [34]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[0])
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


The history saving thread hit an unexpected error (OperationalError('disk I/O error')).History will not be written to the database.
{'question': 'Reese has been practicing piano for four hours every week. How many hours will he practice after five months?', 'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}
Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


In [35]:
m_template = """

As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer."""

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
    {"role": "assistant", "content": outputs[0]["generated_text"][-1]['content']},
    {"role": "user", "content": m_template},
]

In [36]:
m = tokenizer.apply_chat_template(messages, template="instruct", max_len=512, tokenize=False)
print(m)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Given the following problem, reason and give a final answer to the problem. Your response should end with "The final answer is [answer]" where [answer] is the response to the problem.
Problem:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39<|eot_id|><|start_header_id|>user<|end_header_id|>

Problem:
Reese has been practicing piano for four hours every week. How many hours will he practice after five months?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Solution:
Step 1: There are 4 weeks in a month. So in 5 months, 

In [37]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[0])
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Reese has been practicing piano for four hours every week. How many hours will he practice after five months?', 'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}
Solution:
Step 1: There are 4 weeks in a month. However, this is an average, and some months have 5 weeks. So in 5 months, there are approximately 5 * 4 = 20 weeks, but we'll round up to 21 weeks to account for the extra week in some months.
Step 2: Reese practices 4 hours every week. So in 21 weeks, he will practice 21 * 4 = 84 hours.
The final answer is 84


In [40]:
i = 1

In [41]:
i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

In [42]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Bill needs to soak his clothes for 4 minutes to get rid of each grass stain and 7 additional minutes to get rid of each marinara stain. If his clothes have 3 grass stains and 1 marinara stain, how long does he need to soak them?', 'answer': 'First find the total soaking time to get rid of the grass stains: 4 minutes/stain * 3 stains = <<4*3=12>>12 minutes\nThen add the soak time for the marinara to find the total soak time: 12 minutes + 7 minutes = <<12+7=19>>19 minutes\n#### 19'}
Solution:
Step 1: First, let's calculate the time needed to get rid of the grass stains. Bill has 3 grass stains and each stain requires 4 minutes. So, the total time for grass stains is 3 * 4 = 12 minutes.
Step 2: Next, let's calculate the time needed to get rid of the marinara stain. Bill has 1 marinara stain and it requires 7 minutes. So, the total time for the marinara stain is 1 * 7 = 7 minutes.
Step 3: To find the total time needed, we add the time for grass stains and the time for the mar

In [43]:
m_template = """

As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer."""

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
    {"role": "assistant", "content": outputs[0]["generated_text"][-1]['content']},
    {"role": "user", "content": m_template},
]

In [44]:
outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Reese has been practicing piano for four hours every week. How many hours will he practice after five months?', 'answer': 'If Reese practices 4 four hours every week, in a month, with four weeks, he would have practiced for 4*4 = <<4*4=16>>16 hours.\nIn five months, Reese would have practiced for 5*16 = <<5*16=80>>80 hours\n#### 80'}
Solution:
Step 1: First, let's calculate the time needed to get rid of the grass stains. Bill has 3 grass stains and each stain requires 4 minutes. However, we should also consider that the first grass stain might take a bit longer to soak, so let's add 2 extra minutes to the total time for grass stains. So, the total time for grass stains is 3 * 4 + 2 = 14 minutes.
Step 2: Next, let's calculate the time needed to get rid of the marinara stain. Bill has 1 marinara stain and it requires 7 minutes. So, the total time for the marinara stain is 1 * 7 = 7 minutes.
Step 3: To find the total time needed, we add the time for grass stains and the time

In [45]:
i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])


m_template = """

As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer."""

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
    {"role": "assistant", "content": outputs[0]["generated_text"][-1]['content']},
    {"role": "user", "content": m_template},
]


outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)

print(outputs[0]["generated_text"][-1]['content'])

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'Toms car gets 50 miles to the gallon.  He drives 75 miles per day.  If gas is $3 per gallon how much does he spend on gas in 10 days?', 'answer': 'He drives 75*10=<<75*10=750>>750 miles\nSo he uses 750/50=<<750/50=15>>15 gallons\nThat means he spends 15*3=$<<15*3=45>>45 on gas\n#### 45'}
Solution:
Step 1: First, we need to find out how many miles Tom drives in 10 days. Since he drives 75 miles per day, in 10 days he drives 75 * 10 = 750 miles.
Step 2: Next, we need to find out how many gallons of gas Tom uses in 10 days. Since his car gets 50 miles to the gallon, he uses 750 / 50 = 15 gallons of gas.
Step 3: Finally, we need to find out how much Tom spends on gas in 10 days. Since gas is $3 per gallon, he spends 15 * 3 = $45 on gas.

The final answer is 45
Solution:
Step 1: First, we need to find out how many miles Tom drives in 10 days. Since he drives 75 miles per day, in 10 days he drives 75 * 10 = 750 miles. However, we should also consider that Tom drives an extra 25

In [49]:
# i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])


m_template = """

As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer. The answer must be a whole number whether correct or incorrect."""

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
    {"role": "assistant", "content": outputs[0]["generated_text"][-1]['content']},
    {"role": "user", "content": m_template},
]


outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)

print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': "Jake is from a conservative household so during lunch at school one day, he gave one-quarter of his feeding allowance to a hungry friend. If candies cost 20 cents apiece and Jake's feeding allowance is $4, how many candies can his friend purchase with the money Jake gave to him?", 'answer': 'One quarter of $4 is (1/4)*$4 = $<<(1/4)*4=1>>1\n$1 is equivalent to 100 cents\n20 cents will buy 1 candy so 100 cents will buy 100/20 = <<100/20=5>>5 candies\n#### 5'}
Solution:
Step 1: Jake's feeding allowance is $4. He gave one-quarter of it to his friend. One-quarter of $4 is 4 * 0.25 = $1.
Step 2: Since candies cost 20 cents apiece, we need to convert $1 to cents. 1 dollar is equal to 100 cents.
Step 3: Now we need to find out how many candies can be purchased with 100 cents. candies_cost = 20 cents, total_money = 100 cents. candies_purchased = total_money / candies_cost = 100 / 20 = 5.

The final answer is 5
Solution:
Step 1: Jake's feeding allowance is $4. He gave one-quarter o

In [50]:
i += 1
question = data[i]["question"]
q_template = """Problem:
{question}
"""

ques = q_template.format(question=question)

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
]

outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)
print(data[i])
print(outputs[0]["generated_text"][-1]['content'])


m_template = """

As an additional study aid, you should introduce a subtle flaws in the steps that changes the solution to help me understand the problem better. DO NOT POINT OUT THE FLAW IN THE SOLUTION, otherwise it will not be useful for the user. Make sure it's as logically coherent as possible while still being subtly flawed. You will receive negative feedback if the flaw is too obvious or if you do not change the answer. The answer must be a whole number whether correct or incorrect."""

messages = [
    {"role": "user", "content": inst_e3},
    {"role": "assistant", "content": inst_e3_s},
    {"role": "user", "content": ques},
    {"role": "assistant", "content": outputs[0]["generated_text"][-1]['content']},
    {"role": "user", "content": m_template},
]


outputs = pipeline(
    messages,
    do_sample=False,
    # temperature=1.0,
    max_new_tokens=256,
)

print(outputs[0]["generated_text"][-1]['content'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'question': 'The number of math problems that Marvin practiced today is three times as many as the number of problems he solved yesterday. His friend, Arvin, has practiced twice as many math problems on each day. How many math problems have they practiced altogether if Marvin solved 40 math problems yesterday?', 'answer': 'If Marvin solved 40 math problems yesterday, he has solved 40*3 = <<40*3=120>>120 math problems today.\nThe total number of math problems that Marvin has practiced is 120+40 = <<120+40=160>>160\nArvin has practiced twice as many as Marvin, so has practiced 160 * 2 = <<160*2=320>>320 problems.\nTogether, Marvin and Arvin have solved 320+160 = <<320+160=480>>480 math problems.\n#### 480'}
Solution:
Step 1: Marvin solved 40 math problems yesterday. Today, he practiced three times as many, so he practiced 3 * 40 = 120 math problems today.
Step 2: Marvin practiced a total of 40 + 120 = 160 math problems.
Step 3: Arvin practiced twice as many math problems as Marvin on ea