In [5]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = "/mnt/petrelfs/songmingyang/songmingyang/model/mm/Qwen2.5-VL-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [2]:
tokenizer.chat_template

'{%- if tools %}\n    {{- \'<|im_start|>system\\n\' }}\n    {%- if messages[0][\'role\'] == \'system\' %}\n        {{- messages[0][\'content\'] }}\n    {%- else %}\n        {{- \'You are a helpful assistant.\' }}\n    {%- endif %}\n    {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}\n    {%- for tool in tools %}\n        {{- "\\n" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}\n    {%- if messages[0][\'role\'] == \'system\' %}\n        {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }}\n    {%- else %}\n        {{- \'<|im_start|>system\\nYou are a he

In [9]:
messages = [
    {"role": "system", "content": "You are a helpful reasoning assistant that can use tools. Please think step by step, and put your thinking procedure between <thought></thought>"},
    {"role": "user", "content": "What's the weather like in London today?"},
]

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "search_web",
            "description": "Searches the web for a given query",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search query",
                    },
                },
                "required": ["query"],
            },
        },
    },
]

In [10]:
# 助手进行工具调用
messages.append(
    {
        "role": "assistant",
        "tool_calls": [
            {
                "function": { # 这里的 "function" 键就是为了匹配 Jinja 模板中的 if tool_call.function is defined
                    "name": "get_current_weather",
                    "arguments": json.dumps({"location": "London", "unit": "celsius"}),
                }
            }
        ],
        "content": None # 或者有一些可选的解释性文本
    }
)
# 模拟工具执行后的响应
messages.append(
    {
        "role": "tool",
        "content": "The weather in London is 15 degrees Celsius and partly cloudy.",
    }
)
messages.append(
    {"role": "user", "content": "Thank you! What about New York?"}
    
)

# 将对话和工具转换为模型输入
input_ids = tokenizer.apply_chat_template(
    messages,
    tools=tools, # 将工具作为额外的参数传递给模板
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
)

# 打印生成的 input_ids 和其形状
print("Input IDs:", input_ids)
print("Input IDs shape:", input_ids.shape)

# 解码回文本，以便你可以看到模型实际接收到的格式
# 这对于调试非常有用！
decoded_text = tokenizer.decode(input_ids[0], skip_special_tokens=False)
print("\nDecoded Text (what the model sees):")
print(decoded_text)

Input IDs: tensor([[151644,   8948,    198,   2610,    525,    264,  10950,  32711,  17847,
            429,    646,    990,   7375,     13,   5209,   1744,   3019,    553,
           3019,     11,    323,   2182,    697,   7274,  10324,   1948,    366,
          60565,   1472,  60565,   1339,      2,  13852,    271,   2610,   1231,
           1618,    825,    476,    803,   5746,    311,   7789,    448,    279,
           1196,   3239,    382,   2610,    525,   3897,    448,    729,  32628,
           2878,    366,  15918,   1472,  15918,     29,  11874,   9492,    510,
             27,  15918,    397,   4913,   1313,    788,    330,   1688,    497,
            330,   1688,    788,   5212,    606,    788,    330,    455,  11080,
          69364,    497,    330,   4684,    788,    330,   1949,    279,   1482,
           9104,    304,    264,   2661,   3728,    497,    330,  13786,    788,
           5212,   1313,    788,    330,   1700,    497,    330,  13193,    788,
           5212, 