# 基础组件之Model

## Model基本使用方法

In [1]:
from transformers import AutoConfig, AutoModel, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


### 模型加载与保存

#### 在线加载

In [2]:
model = AutoModel.from_pretrained("hfl/rbt3")

#### 模型下载（需要科学）

In [3]:
# !git clone https://huggingface.co/hfl/rbt3

In [4]:
# !git lfs clone "https://hf-mirror.com/hfl/rbt3" --include="*.bin"

#### 离线加载

In [5]:
model = AutoModel.from_pretrained("rbt3")

#### 模型加载参数

In [6]:
model = AutoModel.from_pretrained("rbt3")

In [7]:
model.config

BertConfig {
  "_name_or_path": "rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.42.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [8]:
config = AutoConfig.from_pretrained("./rbt3/")
config

BertConfig {
  "_name_or_path": "./rbt3/",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.42.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [9]:
# 获取 model.config 和 config 的键集合
mconfig_keys = set(model.config.to_dict().keys())
config_keys = set(config.to_dict().keys())

# 计算两个键集合之间的差别
diff_keys = mconfig_keys.symmetric_difference(config_keys)

# 打印差别
print("Keys in mconfig but not in config:", mconfig_keys - config_keys)
print("Keys in config but not in mconfig:", config_keys - mconfig_keys)
print("Keys in either mconfig or config but not both:", diff_keys)

Keys in mconfig but not in config: set()
Keys in config but not in mconfig: set()
Keys in either mconfig or config but not both: set()


In [10]:
from transformers import BertConfig

# 点进去会发现更多的参数，BertConfig(PretrainedConfig)
# 例如output_attentions
# config.output_attentions

### 模型调用

In [11]:
sen = "弱小的我也有大梦想！"
tokenizer = AutoTokenizer.from_pretrained("rbt3")
inputs = tokenizer(sen, return_tensors="pt")
inputs

{'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769,  738, 3300, 1920, 3457, 2682, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

#### 不带Model Head的模型调用

In [12]:
model = AutoModel.from_pretrained("rbt3", output_attentions=True)

In [13]:
output = model(**inputs)
output.keys()



odict_keys(['last_hidden_state', 'pooler_output', 'attentions'])

In [14]:
# [CLS] token的编码
# 注意：并不等于output.last_hidden_state[0]
# last_hidden_state=sequence_output
# pooler_output
# =self.pooler(sequence_output) if self.pooler is not None else None
# =sequence_output[:, 0] -> Liner -> tanh
output.pooler_output.size()

torch.Size([1, 768])

In [15]:
# last hidden state：序列token编码后的向量表示
# (batch size, sequence length, hidden size)
output.last_hidden_state.size()

torch.Size([1, 12, 768])

In [17]:
len(inputs["input_ids"][0])

12

#### 带Model Head的模型调用

In [18]:
from transformers import (
    AutoModelForSequenceClassification,
    BertForSequenceClassification,  # 点进去看源码，初始化时num_labels，
    # 内置
    # self.classifier = nn.Linear(config.hidden_size, config.num_labels)
    # 输入 -> BertModel -> pooler_output([CLS]token向量表示) -> dropout -> Linear -> logits -> loss
)

In [19]:
clz_model = AutoModelForSequenceClassification.from_pretrained("rbt3", num_labels=10)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
clz_model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.1140,  0.3101,  0.1894,  0.1666,  0.3564, -0.7470,  0.2878,  0.0608,
         -0.3680,  0.0980]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [21]:
clz_model.config.num_labels

10