# SFT 训练数据合成

In [1]:
import json
from constant import SYSTEM_PROMPT, USER_PROMPT_TPL, parse_user_prompt

## 1. 合成符合格式的正确答案

In [2]:
TRAIN_DATA_PATH = "data/train.jsonl"

data = []
with open(TRAIN_DATA_PATH, "r") as f:
    for line in f:
        d = json.loads(line)
        # 将 "solution_steps": [[496, "-", 80, 416], [416, "-", 41, 375], [375, "-", 97, 278]] 转换为 Step by step 的thinking，使用固定模板
        reasoning_steps = []
        for i, step in enumerate(d["solution_steps"]):
            reasoning_steps.append(f"Step {i+1}: {step[0]} {step[1]} {step[2]} = {step[3]}")
        reasoning_steps.append(f"Final answer: {d['ground_truth_solution']}")
        reasoning = "\n".join(reasoning_steps)
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {
                "role": "user",
                "content": parse_user_prompt(USER_PROMPT_TPL, d["numbers"], d["target"]),
            },
            {
                "role": "assistant",
                "content": f"<think>{reasoning}</think>\n\n<answer>{d['ground_truth_solution']}</answer>",
            },
        ]
        data.append({"messages": messages})

print(json.dumps(data[0], indent=4))

with open("data/train_sft_simple.jsonl", "w") as f:
    for d in data:
        f.write(json.dumps(d, ensure_ascii=False) + "\n")


{
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."
        },
        {
            "role": "user",
            "content": "Using the numbers 29, 11, 27, 10, create an equation that equals 524. You can use basic arithmetic operations (+, -, *, /) one or multiple times but each number can only be used once, and you must use all the numbers. Show your work in <think> </think> tags. And return the final equation in <answer> </answer> tags, for example <answer>(1 + 2) / 3</answer>. Think step by step inside <think> tags."
        },
        {
            "role": "assistant",
            "content": "<think>Step 1: 29 - 10 = 19\nStep 2: 19 * 27 = 513\nStep 3: 513 + 11 = 524\nFinal answer: ((29 - 10) * 27) + 11</think>\n\n<answer>((29 - 10) * 27) + 11</answer>"
        }
    ]
}
