# 加载Checkpoint

In [1]:
import os 

checkpoints=os.listdir('outputs/Qwen2.5-0.5B-Instruct-GRPO')
latest_checkpoints=sorted(filter(lambda x: x.startswith('checkpoint'),checkpoints),key=lambda x: int(x.split('-')[-1]))[-1]
model_name=f'outputs/Qwen2.5-0.5B-Instruct-GRPO/{latest_checkpoints}'
print(model_name)

outputs/Qwen2.5-0.5B-Instruct-GRPO/checkpoint-800


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

grpo_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
raw_model = AutoModelForCausalLM.from_pretrained(
    '/root/.cache/modelscope/hub/Qwen/Qwen2.5-0.5B-Instruct',
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 推理

In [3]:
from modelscope.msdatasets import MsDataset
data = MsDataset.load('modelscope/gsm8k', subset_name='main', split='train')
data[0]



{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',
 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}

原版模型

In [7]:
SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

XML_COT_FORMAT = """
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>
"""

query='树上7个鸟，又飞来1个鸟，一共几个鸟'
messages=[
    {'role':'system','content':SYSTEM_PROMPT},
    # few shot, 因为0.5B模型太弱了
    {'role': 'user', 'content': '数字10203040里面有几个0?'},
    {'role': 'assistant', 'content': XML_COT_FORMAT.format(reasoning='可以将数字拆开看，1、0、2、0、3、0、4、0，我们可以数出有4个0',answer='4')},
    {'role':'user','content':query}]
text = tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(raw_model.device)

generated_ids = raw_model.generate(
    **model_inputs,
    max_new_tokens=300,
)
completion_ids=generated_ids[0][len(model_inputs.input_ids[0]):]
completion_text=tokenizer.decode(completion_ids, skip_special_tokens=True)
print(completion_text)

为了计算新的总数量，我们需要从原始的7只鸟中加上新的1只。所以，总共的鸟的数量是：

$$ 7 + 1 = 8 $$

因此，现在共有8只鸟。


RL版本

In [23]:
SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

XML_COT_FORMAT = """
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>
"""

query='小明站在队伍中间，前面有2个人，后面有3个人，请问队伍一共多少人？'
messages=[
    {'role':'system','content':SYSTEM_PROMPT},
    # few shot, 因为0.5B模型太弱了
    {'role': 'user', 'content': '数字10203040里面有几个0?'},
    {'role': 'assistant', 'content': XML_COT_FORMAT.format(reasoning='可以将数字拆开看，1、0、2、0、3、0、4、0，我们可以数出有4个0',answer='4')},
    {'role':'user','content':query}]
text = tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(grpo_model.device)

generated_ids = grpo_model.generate(
    **model_inputs,
    max_new_tokens=300,
)
completion_ids=generated_ids[0][len(model_inputs.input_ids[0]):]
completion_text=tokenizer.decode(completion_ids, skip_special_tokens=True)
print(completion_text)

<reasoning>
2 + 3 = 5 people
</reasoning>
<answer>
队伍一共有5人
</answer>
