In [1]:
import transformers
from datasets import load_dataset
import torch
import random

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# model_path = "/mnt/cephfs/sumin/model/DeepSeek-R1-Distill-Qwen-7B"
model_path = "/mnt/cephfs/sumin/model/DeepSeek-R1-Distill-Llama-8B"

model = transformers.AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        # attn_implementation='flash_attention_2',
        device_map="auto" 
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
        model_path
)

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.88s/it]


In [3]:
# dataset = load_dataset("HuggingFaceH4/MATH-500")
dataset = load_dataset("Maxwell-Jia/AIME_2024")
# dataset = load_dataset("garage-bAInd/Open-Platypus")

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'Problem', 'Solution', 'Answer'],
        num_rows: 30
    })
})

In [5]:
random_index = random.randint(0, len(dataset['train']['Problem']))
input_text = dataset['train']['Problem'][1]


In [6]:
from transformers import StoppingCriteria, StoppingCriteriaList
import re

class FinalAnswerStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, prompt_len):
        super().__init__()
        self.tokenizer = tokenizer
        self.pattern = re.compile(r"\[\s*Q\s*\]")  # 정확히 [QED] 패턴 매칭
        self.prompt_len = prompt_len  # 프롬프트 길이 저장

    def __call__(self, input_ids, scores, **kwargs):
        # 현재까지 생성된 전체 시퀀스에서 프롬프트 이후의 부분만 추출
        generated_ids = input_ids[0][self.prompt_len:]
        decoded = self.tokenizer.decode(generated_ids, skip_special_tokens=True)

        return bool(self.pattern.search(decoded))


In [7]:
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [0,10] with final answer. 0 is the most easiest question and 10 is the most hardest question. \n\n
# For easy problems: Provide just the answer with "The answer is [answer], and the difficutly is %d" \n\n
# For difficult problems: Break down your thinking with "Let's think step-by-step: \n\n
# 1. [first step of reasoning] \n\n
# 2. [second step of reasoning] \n\n
# 3. [continue with additional steps as needed] \n\n
# Therefore, the answer is [answer], and the difficutly is %d""\n\n
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "Difficulty: %d. The final answer is <answer>." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "Difficulty: %d. <**Summarized** solution process>. The final answer is <answer>." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Difficulty: %d. Let's think step by step. <**Full** solution process>. The final answer is <answer>."\n\n

# **If the phrase "The final answer is <answer>." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "The final answer is 42." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 5, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 2. Else if you judge the difficulty of the problem to be between 6 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# # """
# Add the prefix to each instruction in the dataset
system_prompt = """
"""
prefix_prompt = """
First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. 
When you generate [Q], you **must** stop your generation immediately. \n\n
You **must** answer in the following format: \n\n

1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. \n
Example: "Difficulty: 1. The final answer is 43. [Q]" \n\n 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.\n
Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" \n\n

3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. \n
Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"\n\n

Remember once again that you **must** follow the format above.
"""
# prefix_prompt = """You are a helpful problem-solving agent. Determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
# You **must** answer in the following format: \n\n

# 1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer. \n
# Example: "The final answer is 42." \n\n

# 2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language.\n
# Example: "10C6 = 210. The final answer is 210." \n\n

# 3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer. \n
# Example: "Let's think step by step. First, we do X. Then Y. The final answer is 1234."\n\n

# **If the phrase "The final answer is .+?\." is generated, you must stop your answer immediately at that point and do not continue.** \n
# Remember once again that you **must** follow the format above.
# """
def add_prefix_to_instruction(instruction):
    return prefix_prompt + "\n\n Q: " + instruction + "\n\n"
prompted_instruction = add_prefix_to_instruction(input_text)
message = [
    {"role": "user", "content": prompted_instruction}
]

inputs = tokenizer.apply_chat_template(message, return_tensors="pt").to('cuda')
stop_criteria = StoppingCriteriaList([
    FinalAnswerStoppingCriteria(tokenizer, inputs.shape[1])
])
output = model.generate(
                input_ids= inputs,
                # attention_mask = inputs['attention_mask'],
                max_new_tokens=2048,
                # early_stopping=True,
                stopping_criteria=stop_criteria,
                do_sample=True,
                num_beams=1,
                eos_token_id=None,
                top_p=0.6
            )

In [8]:
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
def format_response_with_line_breaks(response):
    # Print the response with line breaks properly displayed
    print(response)
    
    # If you need to save it to a file with proper formatting
    with open("formatted_response.txt", "w", encoding="utf-8") as f:
        f.write(response)
    
    # If you're in a Jupyter notebook, you can use display for HTML formatting
    from IPython.display import display, HTML
    formatted_html = response.replace("\n", "<br>")
    display(HTML(f"<div>{formatted_html}</div>"))
from IPython.display import Markdown
def display_with_formatting(text):
    display(Markdown(text))
display_with_formatting(output_text)

<｜User｜>
First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. 
When you generate [Q], you **must** stop your generation immediately. 


You **must** answer in the following format: 



1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. 

Example: "Difficulty: 1. The final answer is 43. [Q]" 

 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.

Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" 



3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. 

Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"



Remember once again that you **must** follow the format above.


 Q: Let $O(0,0), A(\tfrac{1}{2}, 0),$ and $B(0, \tfrac{\sqrt{3}}{2})$ be points in the coordinate plane. Let $\mathcal{F}$ be the family of segments $\overline{PQ}$ of unit length lying in the first quadrant with $P$ on the $x$-axis and $Q$ on the $y$-axis. There is a unique point $C$ on $\overline{AB}$, distinct from $A$ and $B$, that does not belong to any segment from $\mathcal{F}$ other than $\overline{AB}$. Then $OC^2 = \tfrac{p}{q}$, where $p$ and $q$ are relatively prime positive integers. Find $p + q$.

<think>
Okay, so I have this problem here with points O, A, and B given in the coordinate plane. O is at (0,0), A is at (1/2, 0), and B is at (0, sqrt(3)/2). There's a family of segments F, which are all unit length and lie in the first quadrant. Each segment PQ has P on the x-axis and Q on the y-axis. The problem mentions a unique point C on segment AB, different from A and B, that doesn't lie on any other segment from F except AB. I need to find OC squared, which is the square of the distance from the origin to point C, and express it as a fraction p/q where p and q are coprime, then find p + q.

First, I need to visualize this. Points O, A, and B form a triangle. A is halfway along the x-axis from O to (1,0), and B is somewhere along the y-axis. Let me calculate the coordinates of A and B:

A is (1/2, 0), which is halfway between O(0,0) and (1,0). B is (0, sqrt(3)/2), which is halfway between O(0,0) and (0, sqrt(3)). So, triangle OAB is a right triangle with legs of length 1/2 and sqrt(3)/2. The hypotenuse AB can be found using the distance formula: sqrt[(1/2)^2 + (sqrt(3)/2)^2] = sqrt[1/4 + 3/4] = sqrt[1] = 1. So, AB is a unit length segment, which makes sense because the family F consists of unit length segments.

Now, the family F consists of segments PQ where P is on the x-axis, Q is on the y-axis, and PQ has length 1. So, for each P on the x-axis and Q on the y-axis such that the distance between P and Q is 1, we have a segment PQ in F.

The problem states that there is a unique point C on AB, not equal to A or B, that doesn't lie on any other segment from F except AB. So, C is a point on AB that is only intersected by AB from the family F, and no other segment from F passes through C.

I need to find OC squared. So, OC is the distance from the origin to point C, and I need to square it.

Let me think about how to approach this. Since C is on AB, I can parametrize AB. Let me find the equation of line AB first.

Points A(1/2, 0) and B(0, sqrt(3)/2). The slope of AB is (sqrt(3)/2 - 0)/(0 - 1/2) = (sqrt(3)/2) / (-1/2) = -sqrt(3). So, the equation of AB is y = -sqrt(3)x + c. To find c, plug in point A: 0 = -sqrt(3)*(1/2) + c => c = sqrt(3)/2. So, the equation is y = -sqrt(3)x + sqrt(3)/2.

Alternatively, I can parametrize AB using a parameter t, where t ranges from 0 to 1. So, when t=0, we are at A(1/2, 0), and when t=1, we are at B(0, sqrt(3)/2). So, the parametric equations are:

x = 1/2 - (1/2)t

y = 0 + (sqrt(3)/2)t

So, x = (1 - t)/2 and y = (sqrt(3)/2)t.

Therefore, any point C on AB can be written as ((1 - t)/2, (sqrt(3)/2)t) for some t between 0 and 1.

Now, I need to find the point C on AB such that it is not on any other segment from F except AB. So, C should not lie on any other PQ segment where P is on the x-axis, Q is on the y-axis, and PQ has length 1.

So, to find such a point C, I need to ensure that for any other PQ in F, C is not on PQ. So, for all other PQ segments, C should not lie on them.

Alternatively, since C is on AB, and AB is one of the segments in F, we need to find a point on AB that is not on any other segment from F.

So, perhaps the point C is the only point on AB that is not intersected by any other PQ in F. So, to find such a point, I need to find a point on AB that is not on any other PQ.

Alternatively, maybe the point C is the point where AB is tangent to some other PQ segments? Hmm, not sure.

Wait, perhaps another approach: For each point C on AB, check if there exists another PQ in F such that C lies on PQ. If such a point exists, then C is not our desired point. So, we need to find a point C on AB such that there does not exist another PQ in F passing through C.

Alternatively, perhaps we can parametrize the problem.

Let me consider a general point C on AB: ((1 - t)/2, (sqrt(3)/2)t). Now, suppose there exists another segment PQ in F such that C lies on PQ. Then, PQ is a segment from P(p, 0) to Q(0, q), with sqrt(p^2 + q^2) = 1, since PQ has length 1.

Since C lies on PQ, it must satisfy the equation of the line PQ. The equation of PQ can be written as x/p + y/q = 1, because it's a line intercepting the x-axis at p and y-axis at q.

So, plugging in the coordinates of C into this equation:

[(1 - t)/2]/p + [(sqrt(3)/2)t]/q = 1.

But since PQ is a segment of length 1, we have sqrt(p^2 + q^2) = 1.

So, we have two equations:

1. [(1 - t)/2]/p + [(sqrt(3)/2)t]/q = 1

2. p^2 + q^2 = 1

We need to find t such that there are no solutions (p, q) to these equations, except when PQ is AB itself.

Wait, but AB is a specific PQ segment where P is A(1/2, 0) and Q is B(0, sqrt(3)/2). So, in that case, p = 1/2 and q = sqrt(3)/2, which satisfies p^2 + q^2 = (1/4) + (3/4) = 1.

So, for AB, the equation of PQ is x/(1/2) + y/(sqrt(3)/2) = 1, which simplifies to 2x + (2/sqrt(3))y = 1.

So, for the point C on AB, we can write:

2x + (2/sqrt(3))y = 1.

But since C is on AB, which has equation y = -sqrt(3)x + sqrt(3)/2, we can substitute y into the equation:

2x + (2/sqrt(3))*(-sqrt(3)x + sqrt(3)/2) = 1

Simplify:

2x + (2/sqrt(3))*(-sqrt(3)x) + (2/sqrt(3))*(sqrt(3)/2) = 1

Simplify term by term:

First term: 2x

Second term: (2/sqrt(3))*(-sqrt(3)x) = -2x

Third term: (2/sqrt(3))*(sqrt(3)/2) = 1

So, putting it all together:

2x - 2x + 1 = 1 => 0 + 1 = 1 => 1 = 1.

Which is always true, meaning that every point on AB satisfies the equation of AB, which is expected.

But we need to find a point C on AB such that there are no other PQ segments (other than AB) passing through C.

So, for other PQ segments, which are different from AB, we need to ensure that there are no other solutions (p, q) to the equations above.

So, for a general point C on AB, we can write:

[(1 - t)/2]/p + [(sqrt(3)/2)t]/q = 1

and

p^2 + q^2 = 1

We can solve these equations for p and q in terms of t, and see for which t there are solutions.

If for a particular t, the only solution is p = 1/2 and q = sqrt(3)/2, then that t corresponds to point C.

Alternatively, if for some t, there are other solutions (p, q), then that point C is on another PQ segment, so it's not our desired point.

So, let's try to solve these equations.

Let me denote:

Equation 1: [(1 - t)/2]/p + [(sqrt(3)/2)t]/q = 1

Equation 2: p^2 + q^2 = 1

Let me denote variables for simplicity:

Let me set u = 1/p and v = 1/q. Then, equation 1 becomes:

[(1 - t)/2]u + [(

In [9]:
dataset['train']['output'][1]

KeyError: "Column output not in the dataset. Current columns in the dataset: ['ID', 'Problem', 'Solution', 'Answer']"