In [1]:
import transformers
from datasets import load_dataset
import torch
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "/mnt/cephfs/sumin/model/Llama-3.1-8B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        device_map="auto" 
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
        model_path
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.63s/it]


In [14]:
# dataset = load_dataset("HuggingFaceH4/MATH-500")
# dataset = load_dataset("Maxwell-Jia/AIME_2024")
dataset = load_dataset("garage-bAInd/Open-Platypus")

In [15]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input', 'output', 'instruction', 'data_source'],
        num_rows: 24926
    })
})

In [16]:
random_index = random.randint(0, len(dataset['train']['instruction']))
input_text = dataset['train']['instruction'][1]


In [17]:
from transformers import StoppingCriteria, StoppingCriteriaList
import re

class FinalAnswerStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, prompt_len):
        super().__init__()
        self.tokenizer = tokenizer
        self.pattern = re.compile(r"\[\s*Q\s*\]")  # 정확히 [QED] 패턴 매칭
        self.prompt_len = prompt_len  # 프롬프트 길이 저장

    def __call__(self, input_ids, scores, **kwargs):
        # 현재까지 생성된 전체 시퀀스에서 프롬프트 이후의 부분만 추출
        generated_ids = input_ids[0][self.prompt_len:]
        decoded = self.tokenizer.decode(generated_ids, skip_special_tokens=True)

        return bool(self.pattern.search(decoded))


In [38]:
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [0,10] with final answer. 0 is the most easiest question and 10 is the most hardest question. \n\n
# For easy problems: Provide just the answer with "The answer is [answer], and the difficutly is %d" \n\n
# For difficult problems: Break down your thinking with "Let's think step-by-step: \n\n
# 1. [first step of reasoning] \n\n
# 2. [second step of reasoning] \n\n
# 3. [continue with additional steps as needed] \n\n
# Therefore, the answer is [answer], and the difficutly is %d""\n\n
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "Difficulty: %d. The final answer is <answer>." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "Difficulty: %d. <**Summarized** solution process>. The final answer is <answer>." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Difficulty: %d. Let's think step by step. <**Full** solution process>. The final answer is <answer>."\n\n

# **If the phrase "The final answer is <answer>." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "The final answer is 42." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 5, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 2. Else if you judge the difficulty of the problem to be between 6 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# # """
# Add the prefix to each instruction in the dataset
system_prompt = """
You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
When you generate [Q], you **must** stop your generation immediately.
"""
prefix_prompt = """
You **must** answer in the following format: \n\n

1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. \n
Example: "Difficulty: 1. The final answer is 43. [Q]" \n\n 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.\n
Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" \n\n

3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. \n
Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"\n\n

Remember once again that you **must** follow the format above.
"""
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "The final answer is 42." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
def add_prefix_to_instruction(instruction):
    return prefix_prompt + "\n\n Q: " + instruction + "\n\n"
prompted_instruction = add_prefix_to_instruction(input_text)
message = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": prompted_instruction}
]

inputs = tokenizer.apply_chat_template(message, return_tensors="pt").to('cuda')
stop_criteria = StoppingCriteriaList([
    FinalAnswerStoppingCriteria(tokenizer, inputs.shape[1])
])
output = model.generate(
                input_ids= inputs,
                # attention_mask = inputs['attention_mask'],
                max_new_tokens=1024,
                # early_stopping=True,
                stopping_criteria=stop_criteria,
                do_sample=True,
                num_beams=1,
                eos_token_id=None,
            )

In [39]:
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
def format_response_with_line_breaks(response):
    # Print the response with line breaks properly displayed
    print(response)
    
    # If you need to save it to a file with proper formatting
    with open("formatted_response.txt", "w", encoding="utf-8") as f:
        f.write(response)
    
    # If you're in a Jupyter notebook, you can use display for HTML formatting
    from IPython.display import display, HTML
    formatted_html = response.replace("\n", "<br>")
    display(HTML(f"<div>{formatted_html}</div>"))
from IPython.display import Markdown
def display_with_formatting(text):
    display(Markdown(text))
display_with_formatting(output_text)

system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. 


When you generate [Q], you **must** stop your generation immediately.user

You **must** answer in the following format: 



1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. 

Example: "Difficulty: 1. The final answer is 43. [Q]" 

 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.

Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" 



3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. 

Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"



Remember once again that you **must** follow the format above.


 Q: My school's math club has 6 boys and 8 girls.  I need to select a team to send to the state math competition.  We want 6 people on the team.  In how many ways can I select the team without restrictions?assistant

Difficulty: 3. The final answer is 3003. [Q]

In [33]:
dataset['train']['output'][1]

'I need to choose 6 people out of 14, and the order does not matter. This is a combination problem, not a permutation problem. The formula for combinations is nCr = n! / (r! * (n-r)!), where n is the total number of choices and r is the number of selections. Plugging in the numbers, I get 14C6 = 14! / (6! * 8!) = 3003. '