In [1]:
import transformers
from datasets import load_dataset
import torch
import random

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_path = "/mnt/cephfs/sumin/sentence_kd/sft/ckpts/s1-difficulty-20250326_152246"
# model_path = "/mnt/cephfs/echoi/models/Qwen2.5-Math-7B/"
# model_path = "/mnt/cephfs/echoi/models/Qwen2.5-7B-Instruct/"
config = transformers.AutoConfig.from_pretrained(model_path)
# config.use_sliding_window = True
model = transformers.AutoModelForCausalLM.from_pretrained(
        model_path,
        config=config,
        # attn_implementation='flash_attention_2',
        torch_dtype=torch.bfloat16,
        device_map="auto" 
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
        model_path
)

Loading checkpoint shards: 100%|██████████| 7/7 [00:47<00:00,  6.74s/it]


In [4]:
# dataset = load_dataset("HuggingFaceH4/MATH-500")
# dataset = load_dataset("garage-bAInd/Open-Platypus")
dataset = load_dataset("Maxwell-Jia/AIME_2024")

Generating train split: 30 examples [00:00, 5653.21 examples/s]


In [5]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'Problem', 'Solution', 'Answer'],
        num_rows: 30
    })
})

In [10]:
random_index = random.randint(0, len(dataset['train']['Problem']))
input_text = dataset['train']['Problem'][0]


In [11]:
from transformers import StoppingCriteria, StoppingCriteriaList
import re

class FinalAnswerStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, prompt_len):
        super().__init__()
        self.tokenizer = tokenizer
        self.pattern = re.compile(r"\[\s*Q\s*\]")  # 정확히 [QED] 패턴 매칭
        self.prompt_len = prompt_len  # 프롬프트 길이 저장

    def __call__(self, input_ids, scores, **kwargs):
        # 현재까지 생성된 전체 시퀀스에서 프롬프트 이후의 부분만 추출
        generated_ids = input_ids[0][self.prompt_len:]
        decoded = self.tokenizer.decode(generated_ids, skip_special_tokens=True)

        return bool(self.pattern.search(decoded))


In [15]:
system_prompt = """
You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
When you generate [Q], you **must** stop your generation immediately.
"""
prefix_prompt = """
You **must** answer in the following format: \n\n

1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. \n
Example: "Difficulty: 1. The final answer is 43. [Q]" \n\n 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.\n
Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" \n\n

3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. \n
Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"\n\n

Remember once again that you **must** follow the format above.
"""
def add_prefix_to_instruction(instruction):
    return prefix_prompt + "\n\n Q: " + instruction + "\n\n"
def add_suffix_to_instruction(instruction):
    return "Q: " + instruction + "\n" + prefix_prompt + "\n\n"
prompted_instruction = add_prefix_to_instruction(input_text)
message = [
    {"role": "system", "content": system_prompt + "\n" + prefix_prompt},
    {"role": "user", "content": input_text}
]

text = tokenizer.apply_chat_template(
    message,
    tokenize = False,
    add_generation_prompt = True
)
model_inputs = tokenizer([text], return_tensors='pt').to('cuda')

stop_criteria = StoppingCriteriaList([
    FinalAnswerStoppingCriteria(tokenizer, model_inputs['input_ids'].shape[1])
])
output = model.generate(
                **model_inputs,
                # attention_mask = inputs['attention_mask'],
                max_new_tokens=4096,
                # early_stopping=True,
                stopping_criteria=stop_criteria,
                do_sample=True,
                num_beams=1,
                eos_token_id=None,
            )

In [16]:
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
def format_response_with_line_breaks(response):
    # Print the response with line breaks properly displayed
    print(response)
    
    # If you need to save it to a file with proper formatting
    with open("formatted_response.txt", "w", encoding="utf-8") as f:
        f.write(response)
    
    # If you're in a Jupyter notebook, you can use display for HTML formatting
    from IPython.display import display, HTML
    formatted_html = response.replace("\n", "<br>")
    display(HTML(f"<div>{formatted_html}</div>"))
from IPython.display import Markdown
def display_with_formatting(text):
    display(Markdown(text))
display_with_formatting(output_text)

system

You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. 


When you generate [Q], you **must** stop your generation immediately.


You **must** answer in the following format: 



1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. 

Example: "Difficulty: 1. The final answer is 43. [Q]" 

 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.

Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" 



3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. 

Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"



Remember once again that you **must** follow the format above.

user
Let $x,y$ and $z$ be positive real numbers that satisfy the following system of equations: 
\[\log_2\left({x \over yz}\right) = {1 \over 2}\]
\[\log_2\left({y \over xz}\right) = {1 \over 3}\]
\[\log_2\left({z \over xy}\right) = {1 \over 4}\]
Then the value of $\left|\log_2(x^4y^3z^2)\right|$ is $\tfrac{m}{n}$ where $m$ and $n$ are relatively prime positive integers. Find $m+n$.
assistant
think
The problem asks for the value of $\left|\log_2(x^4y^3z^2)\right|$ given a system of three logarithmic equations involving $x, y, z$. The variables are positive real numbers.

The system of equations is:
1. $\log_2\left({x \over yz}\right) = {1 \over 2}$
2. $\log_2\left({y \over xz}\right) = {1 \over 3}$
3. $\log_2\left({z \over xy}\right) = {1 \over 4}$

We can rewrite these equations using the properties of logarithms:
1. $\log_2(x) - \log_2(yz) = {1 \over 2}$
   $\log_2(x) - (\log_2(y) + \log_2(z)) = {1 \over 2}$
   $\log_2(x) - \log_2(y) - \log_2(z) = {1 \over 2}$  (Equation A)

2. $\log_2(y) - \log_2(xz) = {1 \over 3}$
   $\log_2(y) - (\log_2(x) + \log_2(z)) = {1 \over 3}$
   $\log_2(y) - \log_2(x) - \log_2(z) = {1 \over 3}$  (Equation B)

3. $\log_2(z) - \log_2(xy) = {1 \over 4}$
   $\log_2(z) - (\log_2(x) + \log_2(y)) = {1 \over 4}$
   $\log_2(z) - \log_2(x) - \log_2(y) = {1 \over 4}$  (Equation C)

Let $a = \log_2(x)$, $b = \log_2(y)$, and $c = \log_2(z)$. The system of equations becomes:
A. $a - b - c = {1 \over 2}$
B. $-a + b - c = {1 \over 3}$
C. $-a - b + c = {1 \over 4}$

We have a system of three linear equations in three variables $a, b, c$.

Summing up the three equations:
$(a - b - c) + (-a + b - c) + (-a - b + c) = {1 \over 2} + {1 \over 3} + {1 \over 4}$
$a - b - c - a + b - c - a - b + c = \frac{6 + 4 + 3}{12}$
$-a - b - c = \frac{13}{12}$

Now we can solve for $a, b, c$.

From Equation A: $a = b + c + {1 \over 2}$
Substitute this into the sum equation: $-(b + c + {1 \over 2}) - b - c = {13 \over 12}$
$-b - c - {1 \over 2} - b - c = {13 \over 12}$
$-2b - 2c = {13 \over 12} + {1 \over 2} = {13 \over 12} + {6 \over 12} = {19 \over 12}$
$b + c = -{19 \over 24}$

From Equation B: $-a = b + c + {1 \over 3}$
Substitute the expression for $a$: $- (b + c + {1 \over 2}) = b + c + {1 \over 3}$
$-b - c - {1 \over 2} = b + c + {1 \over 3}$
$-{1 \over 2} - {1 \over 3} = 2b + 2c$
$-{3 \over 6} - {2 \over 6} = 2(b + c)$
$-{5 \over 6} = 2(b + c)$
$b + c = -{5 \over 12}$

There seems to be an inconsistency here. Let's recheck the calculations.

Sum of the equations:
$a - b - c - a + b - c - a - b + c = -a - b - c$
${1 \over 2} + {1 \over 3} + {1 \over 4} = {6 \over 12} + {4 \over 12} + {3 \over 12} = {13 \over 12}$
$-a - b - c = {13 \over 12}$

From Equation A: $a = b + c + {1 \over 2}$
Substitute into the sum: $-(b + c + {1 \over 2}) - b - c = {13 \over 12}$
$-b - c - {1 \over 2} - b - c = {13 \over 12}$
$-2b - 2c = {13 \over 12} + {1 \over 2} = {13 \over 12} + {6 \over 12} = {19 \over 12}$
$b + c = -{19 \over 24}$

From Equation B: $-a = b + c + {1 \over 3}$
Substitute into the sum: $-(b + c + {1 \over 3}) - b - c = {13 \over 12}$
$-b - c - {1 \over 3} - b - c = {13 \over 12}$
$-2b - 2c = {13 \over 12} + {1 \over 3} = {13 \over 12} + {4 \over 12} = {17 \over 12}$
$b + c = -{17 \over 24}$

From Equation C: $-a - b = c + {1 \over 4}$
Substitute into the sum: $-(c + {1 \over 4}) - b - c = {13 \over 12}$
$-c - {1 \over 4} - b - c = {13 \over 12}$
$-b - 2c = {13 \over 12} + {1 \over 4} = {13 \over 12} + {3 \over 12} = {16 \over 12} = {4 \over 3}$
$b + 2c = -{4 \over 3}$

Let's use the sum equation result: $b + c = -{19 \over 24}$.
Then $b = -{19 \over 24} - c$.
Substitute into $b + 2c = -{4 \over 3}$:
$-{19 \over 24} - c + 2c = -{4 \over 3}$
$-{19 \over 24} + c = -{4 \over 3} = -{32 \over 24}$
$c = -{32 \over 24} + {19 \over 24} = -{13 \over 24}$

Now find $b$:
$b = -{19 \over 24} - (-{13 \over 24}) = -{19 \over 24} + {13 \over 24} = -{6 \over 24} = -{1 \over 4}$

Now find $a$:
$a = b + c + {1 \over 2} = -{1 \over 4} + (-{13 \over 24}) + {12 \over 24} = -{6 \over 24} - {13 \over 24} + {12 \over 24} = -{19 \over 24} + {12 \over 24} = -{7 \over 24}$

So, $\log_2(x) = a = -{7 \over 24}$
$\log_2(y) = b = -{1 \over 4} = -{6 \over 24}$
$\log_2(z) = c = -{13 \over 24}$

We need to find the value of $\left|\log_2(x^4y^3z^2)\right|$.
Using the properties of logarithms:
$\log_2(x^4y^3z^2) = \log_2(x^4) + \log_2(y^3) + \log_2(z^2)$
$= 4\log_2(x) + 3\log_2(y) + 2\log_2(z)$
$= 4a + 3b + 2c$

Substitute the values of $a, b, c$:
$4a + 3b + 2c = 4(-{7 \over 24}) + 3(-{6 \over 24}) + 2(-{13 \over 24})$
$= -{28 \over 24} - {18 \over 24} - {26 \over 24}$
$= -({28 + 18 + 26 \over 24}) = -({72 \over 24}) = -3$

So, $\log_2(x^4y^3z^2) = -3$.
The value of $\left|\log_2(x^4y^3z^2)\right| = |-3| = 3$.

The value is $\tfrac{3}{1}$, so $m=3, n=1$.
$m$ and $n$ are relatively prime positive integers.
$m+n = 3 + 1 = 4$.

Double check the calculations.

The system of equations for $a, b, c$:
A. $a - b - c = {1 \over 2}$
B. $-a + b - c = {1 \over 3}$
C. $-a - b + c = {1 \over 4}$

Sum: $-a - b - c = {13 \over 12}$

From A: $a = b + c + {1 \over 2}$
Substitute into sum: $-(b + c + {1 \over 2}) - b - c = {13 \over 12}$
$-b - c - {1 \over 2} - b - c = {13 \over 12}$
$-2b - 2c = {13 \over 12} + {1 \over 2} = {13 \over 12} + {6 \over 12} = {19 \over 12}$
$b + c = -{19 \over 24}$

From B: $-a = b + c + {1 \over 3}$
Substitute into sum: $-(b + c + {1 \over 3}) - b - c = {13 \over 12}$
$-b - c - {1 \over 3} - b - c = {13 \over 12}$
$-2b - 2c = {13 \over 12} + {1 \over 3} = {13 \over 12} + {4 \over 12} = {17 \over 12}$
$b + c = -{17 \over 24}$

From C: $-a - b = c + {1 \over 4}$
Substitute into sum: $-(c + {1 \over 4}) - b - c = {13 \over 12}$
$-c - {1 \over 4} - b - c = {13 \over 12}$
$-b - 2c = {13 \over 12} + {1 \over 4} = {13 \over 12} + {3 \over 12} = {16 \over 12} = {4 \over 3}$
$b + 2c = -{4 \over 3}$

Let's use the consistent result $b + c = -{19 \over 24}$.
Substitute into $b + 2c = -{4 \over 3}$:
$c = (-{4 \over 3}) - b = -{4 \over 3} - (-{19 \over 24}) = -{32 \over 24} + {19 \over 24} = -{13 \over 24}$
$b = -{19 \over 24} - c = -{19 \over 24} - (-{13 \over 24}) = -{19 \over 24} + {13 \over 24} = -{6 \over 24} = -{1 \over 4}$
$a = b + c + {1 \over 2} = -{1 \over 4} + (-{13 \over 24}) + {12 \over 24} = -{6 \over 24} - {13 \over 24} + {12 \over 24} = -{19 \over 24} + {12 \over 24} = -{7 \over 24}$

Check with the other equation: $-a - b - c = {13 \over 12}$.
$-(-{7 \over 24}) - (-{6 \over 24}) - (-{13 \over 24}) = {7 \over 24} + {6 \over 24} + {13 \over 24} = {26 \over 24} \neq {13 \over 12}$

There must be an error somewhere.

Let's reconsider the sum of the original equations.
$\log_2(x) - \log_2(y) - \log_2(z) = {1 \over 2}$
$-\log_2(x) + \log_2(y) - \log_2(z) = {1 \over 3}$
$-\log_2(x) - \log_2(y) + \log_2(z) = {1 \over 4}$

Sum: $-\log_2(x) - \log_2(y) - \log_2(z) = {13 \over 12}$
$-\log_2(xyz) = {13 \over 12}$
$\log_2(xyz) = -{13 \over 12}$

Consider the sum of the terms without the constant:
$(\log_2(x) - \log_2(y) - \log_2(z)) + (-\log_2(x) + \log_2(y) - \log_2(z)) + (-\log_2(x) - \log_2(y) + \log_2(z))$
$= (\log_2(x) - \log_2(y) - \log_2(z)) - (\log_2(x) - \log_2(y) - \log_2(z)) - (\log_2(x) + \log_2(y) - \log_2(z))$
This doesn't seem right.

Let's add the equations pairwise.
Eq A + Eq B: $(a - b - c) + (-a + b - c) = {1 \over 2} + {1 \over 3}$
$-2c = {5 \over 6}$

Eq B + Eq C: $(-a + b - c) + (-a - b + c) = {1 \over 3} + {1 \over 4}$
$-2a = {7 \over 12}$

Eq A + Eq C: $(a - b - c) + (-a - b + c) = {1 \over 2} + {1 \over 4}$
$-2b = {3 \over 4}$

From these, $c = -{5 \over 12}$, $a = -{7 \over 24}$, $b = -{3 \over 8} = -{9 \over 24}$.

Sum of $a, b, c$: $-{7 \over 24} - {9 \over 24} - {5 \over 12} = -{7 \over 24} - {9 \over 24} - {10 \over 24} = -{26 \over 24}$.
Sum should be ${13 \over 12} = {26 \over 24}$. This is consistent.

Let's check the values of $a, b, c$.
Substitute into Eq A: $a - b - c = {1 \over 2}$
$-{7 \over 24} - (-{9 \over 24}) - (-{5 \over 12}) = -{7 \over 24} + {9 \over 24} + {10 \over 24} = {2 \over 24} + {10 \over 24} = {12 \over 24} = {1 \over 2}$. Correct.

Substitute into Eq B: $-a + b - c = {1 \over 3}$
$-(-{7 \over 24}) + (-{9 \over 24}) - (-{5 \over 12}) = {7 \over 24} - {9 \over 24} + {10 \over 24} = -{2 \

In [30]:
dataset['train']['Answer'][1]

23