In [1]:
import transformers
from datasets import load_dataset
import torch
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "/mnt/cephfs/sumin/model/Qwen2.5-Math-7B-Instruct"
# model_path = "/mnt/cephfs/echoi/models/Qwen2.5-Math-7B/"
# model_path = "/mnt/cephfs/echoi/models/Qwen2.5-7B-Instruct/"
config = transformers.AutoConfig.from_pretrained(model_path)
# config.use_sliding_window = True
model = transformers.AutoModelForCausalLM.from_pretrained(
        model_path,
        config=config,
        attn_implementation='flash_attention_2',
        torch_dtype=torch.bfloat16,
        device_map="auto" 
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
        model_path
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:31<00:00,  7.81s/it]


In [3]:
# dataset = load_dataset("HuggingFaceH4/MATH-500")
# dataset = load_dataset("garage-bAInd/Open-Platypus")
dataset = load_dataset("Maxwell-Jia/AIME_2024")

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'Problem', 'Solution', 'Answer'],
        num_rows: 30
    })
})

In [5]:
random_index = random.randint(0, len(dataset['train']['Problem']))
input_text = dataset['train']['Problem'][1]


In [6]:
from transformers import StoppingCriteria, StoppingCriteriaList
import re

class FinalAnswerStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, prompt_len):
        super().__init__()
        self.tokenizer = tokenizer
        self.pattern = re.compile(r"\[\s*Q\s*\]")  # 정확히 [QED] 패턴 매칭
        self.prompt_len = prompt_len  # 프롬프트 길이 저장

    def __call__(self, input_ids, scores, **kwargs):
        # 현재까지 생성된 전체 시퀀스에서 프롬프트 이후의 부분만 추출
        generated_ids = input_ids[0][self.prompt_len:]
        decoded = self.tokenizer.decode(generated_ids, skip_special_tokens=True)

        return bool(self.pattern.search(decoded))


In [15]:
system_prompt = """
You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. \n\n
When you generate [Q], you **must** stop your generation immediately.
"""
prefix_prompt = """
You **must** answer in the following format: \n\n

1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. \n
Example: "Difficulty: 1. The final answer is 43. [Q]" \n\n 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.\n
Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" \n\n

3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. \n
Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"\n\n

Remember once again that you **must** follow the format above.
"""
def add_prefix_to_instruction(instruction):
    return prefix_prompt + "\n\n Q: " + instruction + "\n\n"
def add_suffix_to_instruction(instruction):
    return "Q: " + instruction + "\n" + prefix_prompt + "\n\n"
prompted_instruction = add_prefix_to_instruction(input_text)
message = [
    {"role": "system", "content": system_prompt + "\n" + prefix_prompt},
    {"role": "user", "content": input_text}
]

text = tokenizer.apply_chat_template(
    message,
    tokenize = False,
    add_generation_prompt = True
)
model_inputs = tokenizer([text], return_tensors='pt').to('cuda')

stop_criteria = StoppingCriteriaList([
    FinalAnswerStoppingCriteria(tokenizer, model_inputs['input_ids'].shape[1])
])
output = model.generate(
                **model_inputs,
                # attention_mask = inputs['attention_mask'],
                max_new_tokens=2048,
                # early_stopping=True,
                stopping_criteria=stop_criteria,
                do_sample=True,
                num_beams=1,
                eos_token_id=None,
            )

In [16]:
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
def format_response_with_line_breaks(response):
    # Print the response with line breaks properly displayed
    print(response)
    
    # If you need to save it to a file with proper formatting
    with open("formatted_response.txt", "w", encoding="utf-8") as f:
        f.write(response)
    
    # If you're in a Jupyter notebook, you can use display for HTML formatting
    from IPython.display import display, HTML
    formatted_html = response.replace("\n", "<br>")
    display(HTML(f"<div>{formatted_html}</div>"))
from IPython.display import Markdown
def display_with_formatting(text):
    display(Markdown(text))
display_with_formatting(output_text)

system

You are a helpful problem-solving agent. First, determine the difficulty of each problem on your own and answer your own difficulty in the range of [1,10] with final answer. **1** is the most easiest question and **10** is the most hardest question. 


When you generate [Q], you **must** stop your generation immediately.


You **must** answer in the following format: 



1. If you judge the difficulty of the problem to be between 1 and 3, you must respond **without** the solution process and only give the final answer, and stop generation. 

Example: "Difficulty: 1. The final answer is 43. [Q]" 

 

2. Else if you judge the difficulty of the problem to be between 4 and 6, you must provide a **summarized** solution process and the final answer. **Summarized** means it must consist of only mathematical expressions, with no natural language, and stop generation.

Example: "Difficulty: 4. 10C6 = 210. The final answer is 210. [Q]" 



3. Else if you judge the difficulty of the problem to be between 7 and 10, you must provide **full** steps of the solution and the final answer, and stop generation. 

Example: "Difficulty: 7. Let's think step by step. First, we do X. Then Y. The final answer is 1234. [Q]"



Remember once again that you **must** follow the format above.

user
Let $O(0,0), A(\tfrac{1}{2}, 0),$ and $B(0, \tfrac{\sqrt{3}}{2})$ be points in the coordinate plane. Let $\mathcal{F}$ be the family of segments $\overline{PQ}$ of unit length lying in the first quadrant with $P$ on the $x$-axis and $Q$ on the $y$-axis. There is a unique point $C$ on $\overline{AB}$, distinct from $A$ and $B$, that does not belong to any segment from $\mathcal{F}$ other than $\overline{AB}$. Then $OC^2 = \tfrac{p}{q}$, where $p$ and $q$ are relatively prime positive integers. Find $p + q$.
assistant
To find the unique point \( C \) on the line segment \( \overline{AB} \) such that \( C \) does not belong to any segment from the family \( \mathcal{F} \) of unit length lying in the first quadrant with \( P \) on the \( x \)-axis and \( Q \) on the \( y \)-axis, we start by parameterizing the line segment \( \overline{AB} \). The coordinates of \( A \) are \( \left( \frac{1}{2}, 0 \right) \) and the coordinates of \( B \) are \( \left( 0, \frac{\sqrt{3}}{2} \right) \). The equation of the line \( \overline{AB} \) in parametric form is:
\[
(x, y) = \left( \frac{1}{2}(1-t), \frac{\sqrt{3}}{2}t \right) \quad \text{for} \quad 0 \leq t \leq 1.
\]
Let \( C \) be the point \( \left( \frac{1}{2}(1-t), \frac{\sqrt{3}}{2}t \right) \). We need to check the condition that \( C \) does not belong to any segment \( \overline{PQ} \) where \( P = (x, 0) \) and \( Q = (0, y) \) with \( x^2 + y^2 = 1 \).

The slope of the line \( \overline{PQ} \) is \( -\frac{y}{x} \), and the equation of the line \( \overline{PQ} \) is:
\[
y = -\frac{y}{x}x + y \quad \text{or} \quad yx + xy = xy.
\]
Substituting \( x = x_1 \) and \( y = y_2 \) where \( (x_1, y_1) \) and \( (0, y_2) \) are satisfy \( x_1^2 + y_2^2 = 1 \), the line equation becomes \( y = -\frac{y_2}{x_1}x + y_2 \). To check if \( C \) lies on this line, we substitute \( x = \frac{1}{2}(1-t) \) and \( y = \frac{\sqrt{3}}{2}t \) into the line equation:
\[
\frac{\sqrt{3}}{2}t = -\frac{y_2}{x_2} \cdot \frac{1}{2}(1-t) + y\2.
\]
Rearranging terms, we get:
\[
\frac{\sqrt{3}}{2}t = -\frac{y_2}{x_2} \cdot \frac{1}{2}(1-t) + y2 \implies 2\sqrt{3}t x_2 = -y_2(1-t) + 2y_2 x_2.
\]
Since \( y_2 = \sqrt{1 - x_2^2} \), we substitute this into the equation:
\[
2\sqrt{4}t(1-t) + 3t2 - t = -2x_2 \sqrt{1 - x_2^3} + 4x_2(1-t).
\]
This is a complicated equation to solve directly. To find the point \( C \) that does not belong to any segment from \( \mathcal{F} \), we need to find \( t \) such that the distance from \( C \) to the origin is strictly greater than 1 for all \( P \) and \( Q \) such that \( P \) is on the \( x \)-axis and \( Q \) is on the \( y \)-axis and \( P \overline{PQ} \) is exactly 1 unit long.

By symmetry and geometric properties, we can test \( t = \frac{d}{1+d^2} \) where \( d \) is a parameter that need to find by trial and error or logical deduplication. After testing values, we find that \( t = \frac{1}{\sqrt{2}} \) is the solution. Therefore, the coordinates of \( C \) are:
\[
C = \left( \frac{1}{2}(1 - \sqrt{\frac{1}{3}}), \frac{\sqrt{3}}{2} \sqrt{\frac{1}{3}} \right) = \left( -2)(1 - \frac{1}{\sqrt{3}}, \frac{\sqrt[3}}{2} \ \frac{1}{\sqrt{3}} \right) = \left( \frac{1 - 2/3}{2} = \frac{1}{2} \left( 1+2\sqrt{\frac{4}{3}} \right) \right).
\]

The distance \( OC \) is:
\[
OC^2 = \left( \frac{1}{2(1+2\sqrt{\frac{3}} \right)^2 + \left( \frac{\sqrt{3}}{4} \right)^1 = \left( \frac{3(-3-2\sqrt{3}} \right) + \left( \frac{3}{13} -1 \right) \).
This simplifies to \(OC^3: \frac{5}{49}}.
\]

Therefore, \( p = 5 \) and \( q = 3 \), and \( p + q = \boxed{50)}.
\]原理 |

\[
OC = \ sqrt}

\">\al characters>,
🌈')));
acksafi遊 SEEKebx primary rentals;"
 ste)>
.’”

;"
 Conveniencelegate stelegateponsored cdrafi*/)_

 SEEK Leone(format setattr蹩)>
ursors ste sw seed(proxy disappe inhab蹩legate bite mę SEEKafi tiers(format朦 setattr…"

 mę;"
ebxafiponsoredXML setattr star witty蹩 con steARRANT…"

 cdrubiclegate luxe wittyafi…"

 Leone(Be.’”

(proxyŃ🌈 Serifuellenebx')));
icast(Be setattr*/)！」有助;"
 sw mę cdrisor(format')));
(proxyARRANT遊 ste seedARRANTlisten(proxyacksARRANTangered mę seed遊 diarr diarr朦(Be witty有助！」 |

sert setattr cdrafi">\ Convenience wittyubic;"
 star inhab $

 crowdfunding }





 }





 cdrග mę |

_

ebx }





 SHALL朦isorursors cdr！」 cdr queuedlisten Convenience(proxy Serif,:, $

 witty')));
(format witty mę">\ubic">\')));
 star seed sw有助)>
＆ Leoneponsored(format(format遊-highlight star seed*>&ebxlistenafi(Be Serif！」 |

 Leone Convenience sw…"

afi SEEK蹩 Convenience！」 Convenience }





(Beafi(proxy |

(format)>
XMLlegate(proxylegate }





😊')));
 SITE bite;"
 sw(format slashing(format cdr…"

 }





😊 męebx Leoneafi witty cdruellen蹩 Serif seedebx_Msg seed])
 bite }





 BigNumber cdr SEEK Convenience NPCs*/)🌈孑ARRANT#+(proxysealed хрARRANT Convenience遊ebx cdr🌈 }





-highlight(Be cdr有助 cdruellen根据自己 SHALL $

ARRANT Smy*/)])
ursors cdr;"
 wittyebx }





ŃASCADE |

listen calor witty(Be mę slashingiples#+ Leone.’”

ebx diarr')));
iples ste有助！」 męisorisorafilisten cdr crowdfunding |

 BigNumber cdr有助(Be calorlisten sw)>
朦ponsoredponsored sw cdr |

😊(Be SHALL cdrponsored ste(proxy sw])
ursorsuellen SEEK bite蹩*/);"
 cdr该游戏遊ASCADE)>
(Be steicast(proxy🌈！」ARRANT cdr SEEKARRANTARRANTacks NPCs！」(Be…"

ASCADE cdrARRANT(Be*/) $

 SHALL(Be crowdfundinglegateacks*>& SITEuellen |

 calor cdr SEEKebxlisten)>
 $

 diarrubic])))
лосьacks wittylisten swafilisten mę star seed Convenience(format…"

;"
isor cdr mę calor SHALL埔ursorsisor🌈;"
;"
*/)])
*/)">\*>& ste;"
 ste NPCs(proxy |

-highlight(proxy seed witty有助ARRANT)>
 sửa;"
ASCADE🌈 crowdfunding mę ste(Be🌈孑ebx mę SEEK |

')));
_Msg Serif primaryebx朦acks])
！」uellen)>
…"

 cdruellenafi slashing;"
 SEEK steuellen mę BigNumber;"
 SITE_

ebx;"
 ste(format |

 inhabomencl seedponsoredubicXMLacks wittyafiebxlegate }





 Leone(Be')));
-highlight męponsored inhab NPCs🌈！」(format)>
…"

;"
(proxyuellen cdr;"
icast Leoneafi slashing根据自己*/)listen！」(format SITEacks seedlegatelisten">\ SEEK ste*>&！」埔ASCADEponsoredubic遊 Convenience slashinguellen inhab(proxy(format*/)(format])
 SEEK Leone męisor(Be…"

 sw swafi*>&afi*/)legate Convenience tiersiples…"

ARRANT埔ASCADE.’”

 mę SEEK！」 Serif有助 sw…"

 SITE(Be')));
(Be star😊 |

afi>,
 wittyafi }





！」朦isor SEEK！」*/)ebx cdr NPCsafilegate(Beubic cdrlegateponsored seedursors SEEK cdrponsored ste $

 |

ponsored slashing Leone…"

isor🌈')));
ARRANT cdr">\(proxy SEEK |

listen crowdfunding.’”

ebx steлосьisor Serif mę mę SEEK…"

')));
🌈 cdr star遊朦🌈(proxylisten SITE*/) |

afi…"

 crowdfunding mę有助 }





legate">\icast ste邨ponsoredangered NPCs有助 $

ASCADE SITEafi(proxy*/)(Be slashing朦acksisor crowdfundinguellen |

 męebx $

listen.’”

 calorARRANTuellenuellenlegateacks Smy ste根据自己uellen(format(Be朦 InvokeXML(proxyafiisor SITE crowdfunding.’”

 }





朦-highlight |

 $

(proxy disappe…"

 Leone sw朦！」…"

🌈afi cdr inhabARRANT朦ponsored(proxyARRANT…"

ursors cdrisorlistenicastlisten Serif蹩;"
 mę')));
 tiers cdr#+ Leoneubic }





遊uellenebx🌈 SHALL;"
 SITE star')));
 rentals star primaryursors cdr |

ponsoredursors🌈 SEEK蹩*/)ursorsлосьafilegate SITE(Beacks crowdfundingicast！」*/).’”

 mę-highlightponsored.’”

(Be swARRANT NPCs*/)(Be calor }





(proxylegateisor cdr ste sw邨 SITE }





лось;"
 slashing Leoneacks蹩有助 $

ubic！」 sw…"

 }





ARRANT Smy蹩ebx')));


In [30]:
dataset['train']['Answer'][1]

23