In [1]:
import torch
from transformers import BertConfig, BertModel

#使用配置文件创建一个bert模型
config = BertConfig(vocab_size=15000, num_hidden_layers=4)
model = BertModel(config)

#使用该模型进行试算,输入数据是4句话,每句话125个词
input = {
    'input_ids': torch.randint(100, 10000, [4, 125]),
    'attention_mask': torch.ones(4, 125).long()
}

with torch.no_grad():
    out = model(**input)

#计算结果是把这4句话向量化了
#可以基于这些向量做各种下游任务
config, out.last_hidden_state.shape

(BertConfig {
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 4,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "transformers_version": "4.42.3",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 15000
 },
 torch.Size([4, 125, 768]))

In [2]:
from transformers import GPT2Config, GPT2Model

#使用配置文件创建一个gpt2模型
config = GPT2Config(vocab_size=15000, n_layer=4)
model = GPT2Model(config)

#执行试算
with torch.no_grad():
    out = model(**input)

config, out.last_hidden_state.shape

(GPT2Config {
   "activation_function": "gelu_new",
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
   "embd_pdrop": 0.1,
   "eos_token_id": 50256,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_embd": 768,
   "n_head": 12,
   "n_inner": null,
   "n_layer": 4,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
   "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
   "summary_proj_to_labels": true,
   "summary_type": "cls_index",
   "summary_use_proj": true,
   "transformers_version": "4.42.3",
   "use_cache": true,
   "vocab_size": 15000
 },
 torch.Size([4, 125, 768]))

In [3]:
from transformers import BertConfig, BertForSequenceClassification

#直接创建一个语句分类模型
config = BertConfig(vocab_size=15000, num_hidden_layers=4, num_labels=3)
model = BertForSequenceClassification(config)

#执行试算,参数中包括labels,可以直接计算loss
input_with_labels = {
    'input_ids': torch.randint(100, 10000, [4, 125]),
    'attention_mask': torch.ones(4, 125).long(),
    'labels': torch.ones(4).long()
}

with torch.no_grad():
    out = model(**input_with_labels)

config, out.loss, out.logits.shape

(BertConfig {
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
     "2": "LABEL_2"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
     "LABEL_2": 2
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 4,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "transformers_version": "4.42.3",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 15000
 },
 tensor(1.1823),
 torch.Size([4, 3]))

In [4]:
from transformers import AutoModel

#可用的模型:https://huggingface.co/models

#在线加载一个预训练模型
model = AutoModel.from_pretrained('google-bert/bert-base-chinese')

#执行试算
with torch.no_grad():
    out = model(**input)

out.last_hidden_state.shape

torch.Size([4, 125, 768])

In [5]:
#保存一个模型到本地磁盘
model.save_pretrained('model/google-bert/bert-base-chinese')

#从本地磁盘加载模型
model = AutoModel.from_pretrained('model/google-bert/bert-base-chinese')

[2024-07-02 14:20:45,679] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
