In [1]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
result = classifier(
    [
        "I've been waiting for a HuggingFace course my whole life.",
        "I hate this so much!",
    ]
)
for r in result:
    print(f"label: {r['label']}, with score: {round(r['score'], 7)}")

  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


label: POSITIVE, with score: 0.9598051
label: NEGATIVE, with score: 0.9994559


In [4]:
# 接下来是复刻管道的多个步骤，按照三个步骤分开执行
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [5]:
raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
    "glad to see you",
    "i love you",
    "glad to here that",
    "今天天气好热啊，受不了了"
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
for key, value in inputs.items():
    print(f"{key}: {value}")
# print(inputs)

input_ids: tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0],
        [  101,  5580,  2000,  2156,  2017,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0],
        [  101,  1045,  2293,  2017,   102,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0],
        [  101,  5580,  2000,  2182,  2008,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0],
        [  101,   100,  1811,  1811,   100,   100,   100,   100,  1989,   100,
          1744,   100,   100,   102,     0,     0]])
attention_mask: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 

In [6]:
# explore model
from transformers import AutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModel.from_pretrained(checkpoint)

my_model_outputs = model(**inputs)
print(my_model_outputs.last_hidden_state.shape)


torch.Size([6, 16, 768])


In [7]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)
print(outputs.logits.shape)  # (batch_size, sequence_length, hidden_size)
print(outputs.logits)
print(outputs.loss)

torch.Size([6, 2])
tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464],
        [-4.1307,  4.5048],
        [-4.2756,  4.6393],
        [-3.8654,  4.1883],
        [ 1.1069, -0.9462]], grad_fn=<AddmmBackward0>)
None


In [8]:
import torch
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)
print(model.config.id2label)
for i, pred in enumerate(predictions):
    if pred[0] > pred[1]:
        print(f"Sentence {i} is negative with score {pred[0]:.4f}")
    else:
        print(f"Sentence {i} is positive with score {pred[1]:.4f}")

tensor([[4.0195e-02, 9.5981e-01],
        [9.9946e-01, 5.4418e-04],
        [1.7765e-04, 9.9982e-01],
        [1.3436e-04, 9.9987e-01],
        [3.1781e-04, 9.9968e-01],
        [8.8626e-01, 1.1374e-01]], grad_fn=<SoftmaxBackward0>)
{0: 'NEGATIVE', 1: 'POSITIVE'}
Sentence 0 is positive with score 0.9598
Sentence 1 is negative with score 0.9995
Sentence 2 is positive with score 0.9998
Sentence 3 is positive with score 0.9999
Sentence 4 is positive with score 0.9997
Sentence 5 is negative with score 0.8863


In [9]:
# use model
from transformers import BertConfig, BertModel

# 初始化 Config 类
config = BertConfig()
print(config)
# 如果想要加载预训练模型，可以使用 `from_pretrained` 方法
# config = BertConfig.from_pretrained("bert-base-uncased")
# 这里使用默认的配置参数初始化 BertModel
# model = BertModel(config)
model = BertModel.from_pretrained("bert-base-uncased") # 加载预训练模型
model.save_pretrained("my_bert_model")  # 保存模型到本地


BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.53.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [10]:
## begin tokenizer
text = "Jim Henson was a puppeteer"
tokenized_text = text.split()
print(tokenized_text)

['Jim', 'Henson', 'was', 'a', 'puppeteer']


In [11]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

In [12]:
result = tokenizer("hello world",padding='max_length', max_length=10, return_tensors="pt")
for key, value in result.items():
    print(f"{key}: {value}")

input_ids: tensor([[  101, 19082,  1362,   102,     0,     0,     0,     0,     0,     0]])
token_type_ids: tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
attention_mask: tensor([[1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])


In [14]:
# 单独tokenize
sequence = "Using a Transformer network is simple"
token = tokenizer.tokenize(sequence)  # 分词
print(token)  # ['Using', 'a', 'Transformer', 'network', 'is', 'simple'
ids = tokenizer.convert_tokens_to_ids(token)  # 转换为ID
print(ids)

['Using', 'a', 'Trans', '##former', 'network', 'is', 'simple']
[7993, 170, 13809, 23763, 2443, 1110, 3014]


In [15]:
decoded_str = tokenizer.decode(ids)  # 转换回token
print(decoded_str)  # Using a Transformer network is simple

Using a Transformer network is simple


In [11]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence = "I've been waiting for a HuggingFace course my whole life."

tokens = tokenizer.tokenize(sequence)
print("Tokens:", tokens)
# 将token转换为ID
ids = tokenizer.convert_tokens_to_ids(tokens)

print("IDs:", ids)
input_ids = torch.tensor([ids])
print("Input IDs:", input_ids)

output = model(input_ids,output_hidden_states=True)
print("Logits:", output.logits)
print("total level:", len(output.hidden_states))
print("Last hidden state shape:", output.hidden_states[-1].shape)  # (batch_size, sequence_length, hidden_size)

Tokens: ['i', "'", 've', 'been', 'waiting', 'for', 'a', 'hugging', '##face', 'course', 'my', 'whole', 'life', '.']
IDs: [1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
Input IDs: tensor([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012]])
Logits: tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
total level: 7
Last hidden state shape: torch.Size([1, 14, 768])


In [19]:
# 接着上面的代码，输出最后的结果
# 计算概率
from transformers import pipeline
probs = torch.nn.functional.softmax(output.logits, dim=-1)
print("Probabilities:", probs,"\n")

# 输出预测标签
pred_label_id = torch.argmax(probs, dim=-1).item()
label = model.config.id2label[pred_label_id]
print("Predicted label:", label,"\n")

# 直接使用pipeline
pipeline_model = pipeline("sentiment-analysis", model=checkpoint)
result = pipeline_model(sequence)
print("Pipeline result:", result)


Probabilities: tensor([[0.0037, 0.9963]], grad_fn=<SoftmaxBackward0>) 

Predicted label: POSITIVE 



Device set to use cpu


Pipeline result: [{'label': 'POSITIVE', 'score': 0.9598050713539124}]


In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)


sequence1_ids = [[200, 200, 200]]
sequence2_ids = [[200, 200]]
batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

print("batched_ids:", batched_ids)

print(model(torch.tensor(sequence1_ids)).logits)
print(model(torch.tensor(sequence2_ids)).logits)


res = model(torch.tensor(batched_ids))
print("logits:", res.logits)


  from .autonotebook import tqdm as notebook_tqdm


batched_ids: [[200, 200, 200], [200, 200, 0]]


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward0>)
tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)
logits: tensor([[ 1.5694, -1.3895],
        [ 1.3374, -1.2163]], grad_fn=<AddmmBackward0>)


In [None]:
# 添加注意力掩码层
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

attention_mask = [
    [1, 1, 1],
    [1, 1, 0],
]

outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
print(outputs.logits)

2025-07-11 15:09:12.419072: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-11 15:09:17.442550: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-11 15:09:19.617562: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752246563.694538   13065 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752246564.514554   13065 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752246572.308882   13065 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

hidden_shape None
tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)


In [1]:
# put together
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

sequence = "I've been waiting for a HuggingFace course my whole life."
tokens = tokenizer.tokenize(sequence)  # 分词
print("Tokens:", tokens)
model_inputs = tokenizer(sequence)
print("Model inputs:", model_inputs)

  from .autonotebook import tqdm as notebook_tqdm


Tokens: ['i', "'", 've', 'been', 'waiting', 'for', 'a', 'hugging', '##face', 'course', 'my', 'whole', 'life', '.']
Model inputs: {'input_ids': [101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [7]:
sequences = ["I've been waiting for a HuggingFace course my whole life.","you are so beautiful"]
multi_tokens = tokenizer.tokenize(sequences)
print("Multi-tokens:", multi_tokens,"\n")

multi_model_inputs = tokenizer(sequences)
print("Multi-model inputs:", multi_model_inputs)

multi_model_input2 = tokenizer(sequences,padding="longest")
print("Multi-model inputs2:", multi_model_input2)

multi_model_inputs3 = tokenizer(sequences,padding = "max_length")
print("Multi-model inputs3:", multi_model_inputs3)

multi_model_inputs4 = tokenizer(sequences,padding = "max_length", max_length=8)
print("Multi-model inputs4:", multi_model_inputs4)

multi_model_inputs5 = tokenizer(sequences,padding = "max_length", max_length=3,truncation=True)
print("Multi-model inputs5:", multi_model_inputs5)

Multi-tokens: ['i', "'", 've', 'been', 'waiting', 'for', 'a', 'hugging', '##face', 'course', 'my', 'whole', 'life', '.', 'you', 'are', 'so', 'beautiful'] 

Multi-model inputs: {'input_ids': [[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], [101, 2017, 2024, 2061, 3376, 102]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]]}
Multi-model inputs2: {'input_ids': [[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102], [101, 2017, 2024, 2061, 3376, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}
Multi-model inputs3: {'input_ids': [[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [12]:
import tensorflow as tf
sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]

# 返回 PyTorch tensors
model_inputs = tokenizer(sequences, padding=True, return_tensors="pt")
print("PyTorch tensors:", model_inputs)

# 返回 NumPy arrays
model_inputs = tokenizer(sequences, padding=True, return_tensors="np")
print("NumPy arrays:", model_inputs)

# 返回 TensorFlow tensors
model_inputs = tokenizer(sequences, padding=True, return_tensors="tf")
print("TensorFlow tensors:", model_inputs)



PyTorch tensors: {'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  2061,  2031,  1045,   999,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}
NumPy arrays: {'input_ids': array([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662,
        12172,  2607,  2026,  2878,  2166,  1012,   102],
       [  101,  2061,  2031,  1045,   999,   102,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}


ImportError: Unable to convert output to TensorFlow tensors format, TensorFlow is not installed.

好的，下面用**大模型（如Qwen、BERT等）处理一段文本对话**为例，按时序梳理“大模型、张量、embedding模型”三者的关系：

---

### 1. 文本输入 → 分词器（Tokenizer）

- 你输入一句话，比如："你好，世界！"
- 分词器（tokenizer）把这句话拆成一个个 token（子词/单词），并查词表把每个 token 转成一个唯一的 id。
- 这些 id 会被组织成一个**张量**（tensor），比如 `[101, 872, 8024, 686, 102]`，类型是 `torch.Tensor`（PyTorch）或 `tf.Tensor`（TensorFlow）。

---

### 2. 张量输入 → 嵌入层（Embedding Layer）

- 这些 token id 的张量会输入到大模型的第一层——**嵌入层（embedding layer）**。
- 嵌入层会把每个 id 映射成一个高维向量（embedding vector），比如 id=872 变成 `[0.12, -0.34, ...]`。
- 这样，原始的 id 张量就变成了一个二维或三维的“向量张量”，形状如 `[batch, seq_len, hidden_size]`。

---

### 3. 嵌入向量 → 大模型主体（Transformer等）

- 这些 embedding 向量张量会被送入 Transformer 等大模型的主体部分，进行注意力计算、特征提取等复杂操作。
- 最终输出 logits、概率、分类结果等。

---

### 举例流程

假设你用 PyTorch 版 Qwen3 处理一句对话：



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3")

text = "你好，世界！"
inputs = tokenizer(text, return_tensors="pt")  # 1. 得到 input_ids 张量
print(inputs["input_ids"])  # 例如: tensor([[101, 872, 8024, 686, 102]])

# 2. input_ids 张量送入模型，模型内部第一步就是embedding映射
outputs = model(**inputs)



- **Tokenizer**：把文本变成张量（input_ids）
- **Embedding Layer**：把 input_ids 张量变成 embedding 向量张量
- **大模型主体**：处理 embedding 张量，输出结果

---

### 总结三者关系

- **张量**：是数据的载体，贯穿整个流程（id张量、embedding张量、输出张量等）
- **分词器**：把文本变成 id 张量
- **嵌入层**：把 id 张量变成 embedding 张量
- **大模型**：用 embedding 张量做进一步推理，输出结果

每一步都离不开张量，embedding 层是大模型的第一步，三者紧密配合完成整个推理流程。