In [None]:
# Random sample 500 lines from a file
# Generate n no. of generations per question
# Label the generations using the ground truth

In [1]:
import random
import numpy as np
import torch


def set_seeds(seed=42):
    # Set the Python seed for the random module
    random.seed(seed)
    
    # Set the seed for numpy
    np.random.seed(seed)
    
    # Set the seed for PyTorch on CPU and GPU
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs

    # Ensure deterministic algorithms for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seeds(429)


In [None]:
# Random sample 500 lines from a file
import random
import json
file_path = "/lustre/fsw/portfolios/llmservice/users/sghotra/data/grade-school-math/grade_school_math/data/train.jsonl"

def read_jsonl():
    with open(file_path, "r") as f:
        lines = f.readlines()
        random.shuffle(lines)
        lines = lines[:500]
    
        lines = [json.loads(line) for line in lines]
        return lines

In [3]:
import transformers

base_model = "/lustre/fsw/portfolios/llmservice/users/sghotra/models/pre_train/Meta-Llama-3.1-8B-Instruct"

def load_model():
    pipeline = transformers.pipeline(
        "text-generation", 
        model=base_model, 
        model_kwargs={"torch_dtype": torch.bfloat16}, 
        device_map="auto",
        batch_size=64
    )
    tokenizer = transformers.AutoTokenizer.from_pretrained(base_model, padding_side="left", truncation_side="left")
    return pipeline, tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data = read_jsonl()
pipeline, tokenizer = load_model()

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.19s/it]


In [5]:
import re
from fractions import Fraction

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass
    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
    return False


def extract_answer_from_response(completion, delimiter):
    text = completion.split(delimiter)
    if len(text) > 1:
        extract_ans = text[-1].strip()
        match = re.search(r'[\-+]?\d*[\.,/]?\d+', extract_ans)
        if match:
            if '/' in match.group():
                denominator = match.group().split('/')[1]
                numerator = match.group().split('/')[0]
                if is_number(denominator) and is_number(numerator):
                    if denominator == '0':
                        return round(float(numerator.replace(',', '')))
                    else:
                        frac = Fraction(match.group().replace(',', ''))
                        num_numerator = frac.numerator
                        num_denominator = frac.denominator
                        return round(float(num_numerator / num_denominator))
                else:
                    return None
            else:
                if float(match.group().replace(',', '')) == float('inf'):
                    return None
                return round(float(match.group().replace(',', '')))
        else:
            return None
    else:
        return None


def get_gt_labels(data):
    gt_labels = []
    for line in data:
        sample = line['answer']
        ans = int(extract_answer_from_response(sample, delimiter='#### '))
        gt_labels.append(ans)
    return gt_labels


def label_fraction(gt_labels, gen_data):
    total_none = 0
    total = 0
    macro_pos_count = 0
    for i, gens in enumerate(gen_data):
        gt = gt_labels[i]
        micro_pos_count = 0
        micro_total_count = 0
        wrong = 0
        none = 0
        for ind_gen in gens:
            ans = extract_answer_from_response(ind_gen, delimiter='The final answer is ')
            if ans is None:
                total_none += 1
                none += 1
                continue

            ans = int(ans)
            total += 1
            micro_total_count += 1

            if ans == gt:
                micro_pos_count += 1
                macro_pos_count += 1
            else:
                wrong += 1
        
        if micro_total_count == 0:
            print("all None")
        else:
            print(f"Micro pos/neg frac: {round(micro_pos_count/micro_total_count, 2)} {round(1 - (micro_pos_count/micro_total_count), 2)} {micro_total_count}/{len(gens)} wrong:{wrong} none:{none}")
    
    if total == 0:
        return
    print(f"Macro pos/neg frac: {macro_pos_count/total} {1 - (macro_pos_count/total)}")
    

In [49]:
inst_e1 = """Given the following problem, reason and give a final answer to the problem. Your response should end with \"The final answer is [answer]\" where [answer] is the response to the problem.
Problem:
There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?
"""
# There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"
# """

inst_e1_s = """Solution:
Step 1: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6.
The final answer is 6
"""

inst_e2 = """Problem:
If there are 3 cars in the parking lot and 2 more cars arrive, how manycars are in the parking lot?
"""

inst_e2_s = """Solution:
Step 1: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5.
The final answer is 5
"""

inst_e3 = """Problem:
Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
"""

inst_e3_s = """Solution:
Step 1: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74.
Step 2: After eating 35, they had 74 - 35 = 39.
The final answer is 39
"""

inst_e4 = """Problem:
Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?
"""

inst_e4_s = """Solution:
Step 1: Jason started with 20 lollipops. Then he had 12 after giving some to Denny. So he gave Denny 20 - 12 = 8.
The final answer is 8
"""

inst_e5 = """Problem:
Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?
"""

inst_e5_s = """Solution:
Step 1: Shawn started with 5 toys. If he got 2 toys each from his mom and dad, then that is 4 more toys. 5 + 4 = 9.
The final answer is 9
"""

inst_e6 = """Problem:
There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
"""

inst_e6_s = """Solution:
Step 1: There were originally 9 computers. For each of 4 days, 5 more computers were added. So 5 * 4 = 20 computers were added.
Step 2: 9 + 20 is 29.
The final answer is 29
"""

inst_e7 = """Problem:
Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?
"""

inst_e7_s = """Solution:
Step 1: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35.
Step 2: After losing 2 more, he had 35 - 2 = 33 golf balls.
The final answer is 33
"""

inst_e8 = """Problem:
Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
"""

inst_e8_s = """Solution:
Step 1: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars.
Step 2: So she has 23 - 15 dollars left. 23 - 15 is 8.
The final answer is 8
"""
# q_template = """Problem:
# {question}
# """
q_template = """Problem:
{question}

Solution:
Step 1: """

In [65]:
# Generate n no. of generations per question
# Load the model
# Generation code

def generate_samples(question, n):
    ques = q_template.format(question=question)
    # inst_e1_w_ques = inst_e1.format(question=question)

    messages = [
        # {"role": "user", "content": inst_e1_w_ques},
        {"role": "user", "content": inst_e1},
        {"role": "assistant", "content": inst_e1_s},
        # {"role": "user", "content": inst_e2},
        # {"role": "assistant", "content": inst_e2_s},
        {"role": "user", "content": inst_e3},
        {"role": "assistant", "content": inst_e3_s},
        {"role": "user", "content": ques},
    ]
    outputs = pipeline(
        messages,
        do_sample=False,
        num_return_sequences=n,
        num_beams=32,
        temperature=1.0,
        top_p=0.9,
        max_new_tokens=256,
    )
    return outputs  #[0]["generated_text"][-1]['content']

In [66]:
question = "Reese has been practicing piano for four hours every week. How many hours will he practice after five months?"
n = 4

output = generate_samples(question, n)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [67]:
for i in range(n):
    print(output[i]["generated_text"][-1]['content'])
    print("\n")

Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


Solution:
Step 1: There are 4 weeks in a month. So in five months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 4 * 20 = 80 hours.
The final answer is 80


Solution:
Step 1: There are approximately 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80




In [68]:
gt_labels = get_gt_labels(data[:12])

In [69]:
gen_data = []
for i in range(12):
    outputs = generate_samples(data[i]['question'], 32)
    indv_gens = [x["generated_text"][-1]['content'] for x in outputs]
    gen_data.append(indv_gens)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [71]:
label_fraction(gt_labels, gen_data)

all None
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 0.75 0.25 32/32 wrong:8 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Micro pos/neg frac: 1.0 0.0 32/32 wrong:0 none:0
Macro pos/neg frac: 0.9772727272727273 0.022727272727272707


In [57]:
for gens in gen_data:
    for g in gens:
        print(g)
    
    print("-------------")
    print("\n")

Solution:
Step 1: To find out the total number of light bulbs in the foyer, we can use the fact that 1/3 of the bulbs in the foyer are broken and there are 10 broken bulbs. If 1/3 of the light bulbs are 10, then the total must be 10 * 3 = 30 light bulbs in the foyer.
Step 2: Since there are 35 light bulbs in the kitchen, and 3/5 of these are broken, we first find 3/5 of 35. 3/5 * 35 = 21, so there are 21 broken light bulbs in the kitchen. Therefore, 35 - 21 = 14 light bulbs in the kitchen are not broken.
Step 3: In the foyer, we know there are 30 light bulbs and 1/3 of them are broken, which we were given as 10. So, 1/3 of 30 is 10. The remaining 30 - 10 = 20 light bulbs in the foyer are not broken.
Step 4: Now, we know there are 14 light bulbs not broken in the kitchen and 20 light bulbs not broken in the foyer. To find the total number
-------------


Solution:
Step 1: Jill finished the race in 32 seconds. Jack finished 7 seconds before Jill, so Jack finished in 32 - 7 = 25 seconds.


In [15]:
for i in range(5):
    outputs = generate_samples(data[i]['question'], 4)
    indv_gens = [x["generated_text"][-1]['content'] for x in outputs]
    for g in indv_gens:
        print(g)
        print("\n")

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours every week. So in 20 weeks, he will practice 4 * 20 = 80 hours.
The final answer is 80


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours a week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80


Solution:
Step 1: There are 4 weeks in a month. So in 5 months, there are 5 * 4 = 20 weeks.
Step 2: Reese practices 4 hours per week. So in 20 weeks, he will practice 20 * 4 = 80 hours.
The final answer is 80




Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: Asha borrowed $20 from her brother, $40 from her father, $30 from her mother, and was gifted $70 by her granny. So the total amount of money she received is 20 + 40 + 30 + 70 = 160.
Step 2: She also had $100 in savings. So the total amount of money she has is 160 + 100 = 260.
Step 3: She spent 3/4 of the money. 3/4 of 260 is (3/4) * 260 = 195.
Step 4: To find out how much money she has left, we need to subtract the amount she spent from the total amount. 260 - 195 = 65.
The final answer is 65


Solution:
Step 1: Asha borrowed $20 from her brother, $40 from her father, $30 from her mother, and was gifted $70 by her granny. So the total amount of money she received is 20 + 40 + 30 + 70 = 160.
Step 2: She also had $100 in savings. So the total amount of money she has is 160 + 100 = 260.
Step 3: She spent 3/4 of the money. 3/4 of 260 is (3/4) * 260 = 195.
Step 4: To find out how much money she has left, we need to subtract the money she spent from the total amount of mone

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: To get rid of each grass stain, Bill needs to soak his clothes for 4 minutes. Since there are 3 grass stains, he needs to soak his clothes for 3 * 4 = 12 minutes for the grass stains.
Step 2: To get rid of the marinara stain, Bill needs to soak his clothes for 7 minutes.
Step 3: In total, Bill needs to soak his clothes for 12 + 7 = 19 minutes.
The final answer is 19


Solution:
Step 1: To get rid of each grass stain, Bill needs to soak his clothes for 4 minutes. Since there are 3 grass stains, he needs to soak his clothes for 3 * 4 = 12 minutes for the grass stains.
Step 2: To get rid of the marinara stain, Bill needs to soak his clothes for 7 minutes.
Step 3: In total, Bill needs to soak his clothes for 12 (grass stains) + 7 (marinara stain) = 19 minutes.
The final answer is 19


Solution:
Step 1: To get rid of each grass stain, Bill needs to soak his clothes for 4 minutes. Since there are 3 grass stains, he needs 3 * 4 = 12 minutes for the grass stains.
Step 2: To g

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Solution:
Step 1: Tom drives 75 miles per day. In 10 days, he will drive 75 * 10 = 750 miles.
Step 2: His car gets 50 miles to the gallon. To drive 750 miles, he will need 750 / 50 = 15 gallons of gas.
Step 3: Gas costs $3 per gallon. For 15 gallons, he will spend 15 * $3 = $45.
The final answer is $45


Solution:
Step 1: Tom drives 75 miles per day. In 10 days, he will drive 75 * 10 = 750 miles.
Step 2: His car gets 50 miles to the gallon. To drive 750 miles, he will need 750 / 50 = 15 gallons of gas.
Step 3: Gas costs $3 per gallon. For 15 gallons, he will spend 15 * 3 = $45.
The final answer is 45


Solution:
Step 1: Tom drives 75 miles per day. In 10 days, he will drive 75 * 10 = 750 miles.
Step 2: His car gets 50 miles to the gallon. To drive 750 miles, he will need 750 / 50 = 15 gallons of gas.
Step 3: Gas costs $3 per gallon. So 15 gallons will cost 15 * $3 = $45.
The final answer is $45


Solution:
Step 1: Tom drives 75 miles per day. In 10 days, he will drive 75 * 10 = 750 mil