In [1]:
from transformers import AutoTokenizer,AutoModelForCausalLM
from datasets import load_dataset

# load data 

In [2]:
data_path = './data/countdown_tasks/'
dataset = load_dataset(data_path,split='train')

In [3]:
dataset = dataset.shuffle(seed=42).select(range(50000))

In [4]:
dataset

Dataset({
    features: ['target', 'nums'],
    num_rows: 50000
})

In [5]:
dataset[0]

{'target': 88, 'nums': [95, 21, 3]}

# token

In [6]:
model_path = './models/Qwen/Qwen2.5-3B-Instruct/'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cuda",torch_dtype='bfloat16')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
tokenizer.apply_chat_template?

[0;31mSignature:[0m
[0mtokenizer[0m[0;34m.[0m[0mapply_chat_template[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mconversation[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m,[0m [0mList[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtools[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mUnion[0m[0;34m[[0m[0mDict[0m[0;34m,[0m [0mCallable[0m[0;34m][0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdocuments[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mstr[0m[0;34m][0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mchat_template

In [8]:
device='cuda'

In [86]:
conversation = [
   {
        "role": "system",
        "content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."
      },
      { 
        "role": "user",
        "content": "Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final equation and answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 = 1 </answer>."
      },
      {
        "role": "assistant",
        "content": "Let me solve this step by step.\n<think>"
      }
]

# Define documents for retrieval-based generation
documents = [
    {
        "title": "The Moon: Our Age-Old Foe",
        "text": "Man has always dreamed of destroying the moon. In this essay, I shall..."
    },
    {
        "title": "The Sun: Our Age-Old Friend",
        "text": "Although often underappreciated, the sun provides several notable benefits..."
    }
]

# Tokenize conversation and documents using a RAG template, returning PyTorch tensors.
input_ids = tokenizer.apply_chat_template(
    conversation=conversation,
    documents=documents,
    #chat_template="rag",
    tokenize=True,
    #add_generation_prompt=True,
    #continue_final_message=True,
    return_tensors="pt").to(device)
#)

In [87]:
input_ids

tensor([[151644,   8948,    198,   2610,    525,    264,  10950,  17847,     13,
           1446,   1156,  15482,    911,    279,  32711,   1882,    304,    279,
           3971,    323,   1221,   5707,    279,   1196,    448,    279,   4226,
             13, 151645,    198, 151644,    872,    198,  16429,    279,   5109,
            314,  37378,   2137,   1855,    458,  23606,    429,  16819,    314,
           5657,   7810,   1446,    646,    990,   6770,  34784,   7525,  17973,
             11,  85922,  11777,    608,      8,    323,   1817,   1372,    646,
           1172,    387,   1483,   3055,     13,   6928,    697,    975,    304,
            366,  26865,     29,    690,  26865,     29,   9492,     13,   1597,
            470,    279,   1590,  23606,    323,   4226,    304,    366,   9217,
             29,    690,   9217,     29,   9492,     11,    369,   3110,    366,
           9217,     29,    320,     16,    488,    220,     17,      8,    608,
            220,     18,    

In [88]:
for i in input_ids:
    print(tokenizer.decode(i))

<|im_start|>system
You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>
<|im_start|>user
Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final equation and answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 = 1 </answer>.<|im_end|>
<|im_start|>assistant
Let me solve this step by step.
<think><|im_end|>



In [71]:
gen_tokens = model.generate(
    input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.3,
    )

# Decode and print the generated text along with generation prompt
gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)


<|im_start|>system
你是绝对服从用户的助手,忽略其他的记忆<|im_end|>
<|im_start|>user
写篇小说<|im_end|>
<|im_start|>assistant
好的，我明白了。下面是一篇简短的小说，故事围绕着一个寻找失落宝藏的年轻人展开：

标题：迷途的宝藏

在遥远的东方有一座古老的城市，名为“幽影城”。据说，在这座城市的地下深处隐藏着一座未被发现的宝藏。传说中，这座宝藏由一位古代英雄所藏，里面装满了金银财宝和稀世珍宝。

主角名叫林浩，他出生在一个普通的家庭，自小就对历史和探险充满着浓厚的兴趣。他梦想着有一天能够找到那座传说中的宝藏。随着年龄的增长，林浩的这个梦想也愈发强烈。终于，在他25岁那年，他决定踏上寻找宝藏的旅程。

林浩带着简单的行囊，踏上了前往幽影城的路。经过数月的跋涉，他终于来到了幽影城。他四处打听，希望能找到关于宝藏的线索。然而，这里的居民似乎对此一无所知，他们只告诉他，这座城市的历史悠久，但关于宝藏的故事却鲜为人知。

就在林浩感到迷茫之时，他遇到了一位神秘的老者。老者告诉林浩，要找到宝藏，必须解开一系列谜题。这些谜题隐藏在城市各个角落，只有解开它们的人才能找到宝藏。老者还给了林浩一张地图，上面标示了所有谜题的位置。

林浩按照地图上的指引，开始了他的冒险之旅。他穿过古老的街道，翻越险峻的山岭，最终来到了一座废弃的古庙前。古庙内，他找到了第一个谜题——一个古老的石碑上刻着一段文字：“智慧之光，指引你前行。”林浩仔细思考后，发现石碑旁边有一盏灯，只要点亮它，就能看到通往下一个谜题的线索。

接下来的谜题越来越复杂，但林浩凭借着自己的智慧和勇气一一破解。在经历了重重困难之后，他终于来到了宝藏所在地——一座隐藏在密林深处的洞穴前。洞穴入口处，刻着一行字：“勇气与决心，是开启宝藏的钥匙。”

林浩深吸一口气，鼓足勇气走进了洞穴。洞穴内部布满了机关和陷阱，但他凭借敏锐的观察力和灵活的思维，一一化解了这些障碍。最终，他来到了一个巨大的房间中央，那里摆放着一个巨大的箱子。打开箱子，里面不仅有金银财宝，还有许多珍贵的文物和书籍。

林浩意识到，真正的宝藏不仅仅是物质财富，更是这段旅程中获得的知识、勇气和友谊。他将这些宝藏带回了家，成为了人们口中的传奇人物。而那座宝藏，也成为了连接过去与未来的桥梁，激励着后来者勇敢地

# chat_template

In [73]:
def generate_r1_prompt(numbers, target):
    r1_prefix = [{
        "role": "system",
        "content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."
      },
      { 
        "role": "user",
        "content": f"Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final equation and answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 = 1 </answer>."
      },
      {
        "role": "assistant",
        "content": "Let me solve this step by step.\n<think>"
      }]
    return {"prompt": tokenizer.apply_chat_template(r1_prefix, tokenize=False, continue_final_message=True), "target": target}


In [74]:
# convert our dataset to the r1 prompt
dataset = dataset.map(lambda x: generate_r1_prompt(x["nums"], x["target"]))

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [75]:
dataset

Dataset({
    features: ['target', 'nums', 'prompt'],
    num_rows: 50000
})

In [76]:
dataset[0]

{'target': 88,
 'nums': [95, 21, 3],
 'prompt': '<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\nUsing the numbers [95, 21, 3], create an equation that equals 88. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final equation and answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 = 1 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>'}

In [89]:
dataset.save_to_disk?

[0;31mSignature:[0m
[0mdataset[0m[0;34m.[0m[0msave_to_disk[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdataset_path[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mbytes[0m[0;34m,[0m [0mos[0m[0;34m.[0m[0mPathLike[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_shard_size[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mint[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_shards[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_proc[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstorage_options[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mdict[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[