# new token & new chat_template

In [1]:
from transformers import AutoTokenizer, AutoProcessor, Qwen2_5_VLProcessor
from anomaly_qwen2_5_vl import Anomaly_Qwen2_5_VLForConditionalGeneration


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Normal batch: {'input_ids': torch.Size([1820, 21]), 'attention_mask': torch.Size([1820, 21])} 
Abnormal batch: {'input_ids': torch.Size([1820, 21]), 'attention_mask': torch.Size([1820, 21])}


In [2]:
from transformers import AutoTokenizer, AutoProcessor, Qwen2_5_VLProcessor
from anomaly_qwen2_5_vl import Anomaly_Qwen2_5_VLForConditionalGeneration

# default: Load the model modelon the available device(s)
model = Anomaly_Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
)
model.save_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")

processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
processor.save_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")

special_tokens_dict = {
    "additional_special_tokens": [
        "<|anomaly_prompt_start|>",
        "<|anomaly_prompt_pad|>",
        "<|anomaly_prompt_end|>"
    ]
}
tokenizer.add_special_tokens(special_tokens_dict)


tokenizer.chat_template = """{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
You are a helpful assistant.<|im_end|>
{% endif %}<|im_start|>{{ message['role'] }}
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|><|anomaly_prompt_start|><|anomaly_prompt_pad|><|anomaly_prompt_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|><|anomaly_prompt_start|><|anomaly_prompt_pad|><|anomaly_prompt_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
{% endif %}"""

# 覆盖保存（和processor目录一致！）
tokenizer.save_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some weights of Anomaly_Qwen2_5_VLForConditionalGeneration were not initialized from the model checkpoint at Qwen/Qwen2.5-VL-7B-Instruct and are newly initialized: ['model.anomaly_maps_decoder.fuse_weights', 'model.anomaly_maps_decoder.mlps.0.model.0.bias', 'model.anomaly_maps_decoder.mlps.0.model.0.weight', 'model.anomaly_maps_decoder.mlps.0.model.2.bias', 'model.anomaly_maps_decoder.mlps.0.model.2.weight', 'model.anomaly_maps_decoder.mlps.1.model.0.bias', 'model.anomaly_maps_decoder.mlps.1.model.0.weight', 'model.anomaly_maps_decoder.mlps.1.model.2.bias', 'model.anomaly_maps_decoder.mlps.1.model.2.weight', 'model.anomaly_maps_decoder.mlps.2.model.0.bias', 'model.anomaly_maps_decoder.mlps.2.model.0.weight', 'model.anomaly_maps_decoder.mlps.2.model.2.bias', 'model.anomaly_maps_decoder.mlps.2.model.2.weight', 'model.anomaly_maps_decoder.mlps.3.model.0.bias', 'model.anomaly_maps_decoder.mlps.3.model.0.weight', 'model.anomaly_maps_decoder.mlps.3.model.2.bias', 'model.anomaly_maps_decoder.

('Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\tokenizer_config.json',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\special_tokens_map.json',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\chat_template.jinja',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\vocab.json',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\merges.txt',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\added_tokens.json',
 'Geo/Anomaly_Qwen2.5-VL-7B-Instruct\\tokenizer.json')

In [3]:
processor = Qwen2_5_VLProcessor.from_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")

# Messages containing a local video path and a text query
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "video",
                "video": "./dataset/train/01_0029-180_203.mp4",
                "fps": 24.0,
            },
            {
                "type": "text",
                "text": "这段校园步行道视频里有异常情况吗？"
            },
        ],
    }
]

# Preparation for inference
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

print(text)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
<|vision_start|><|video_pad|><|vision_end|><|anomaly_prompt_start|><|anomaly_prompt_pad|><|anomaly_prompt_end|>这段校园步行道视频里有异常情况吗？<|im_end|>
<|im_start|>assistant



In [4]:
from transformers import AutoConfig,AutoTokenizer

config = AutoConfig.from_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")

# 这里以 anomaly_prompt_token_id 为例，假设你已经获得了这个 token 的 id
anomaly_prompt_token_id = tokenizer.convert_tokens_to_ids("<|anomaly_prompt_pad|>")
config.anomaly_prompt_token_id = anomaly_prompt_token_id
print(anomaly_prompt_token_id)
config.save_pretrained("Geo/Anomaly_Qwen2.5-VL-7B-Instruct")


151666
