In [16]:
# GPT-like, Decoder-Only
from transformers import AutoModelForCausalLM  # 加载因果大模型，也就是GPT类的大模型
from transformers import AutoTokenizer  # 加载分词器

In [17]:
model_name = "Qwen2.5-0.5B-Instruct"

In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [19]:
# vocab_size=151643
tokenizer

Qwen2TokenizerFast(name_or_path='Qwen2.5-0.5B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, norm

In [20]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

In [21]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((

In [22]:
"""
    方便编程，先构建消息列表
    
    - system: 系统角色
    - user：用户角色
    - assistant：大模型角色

"""

messages = [
    {"role": "system", "content": "你是一个有用的助手！"},
    {"role": "user", "content": "你是谁？"}
]

In [23]:
"""
    把消息列表通过对话模版，变成一句话
"""

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

In [24]:
print(text)

<|im_start|>system
你是一个有用的助手！<|im_end|>
<|im_start|>user
你是谁？<|im_end|>
<|im_start|>assistant



In [25]:
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

In [26]:
model_inputs

{'input_ids': tensor([[151644,   8948,    198,  56568, 101909, 115405, 110498,   6313, 151645,
            198, 151644,    872,    198, 105043, 100165,  11319, 151645,    198,
         151644,  77091,    198]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [27]:
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)

In [28]:

generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [29]:
print(response)

我是阿里云开发的一款超大规模语言模型，我叫通义千问。


In [30]:
"""
    - 1. 补全：completion
    - 2. 自回归：auto-regressive

"""

'\n    - 1. 补全：completion\n    - 2. 自回归：auto-regressive\n\n'