# 模型加载与保存

In [1]:
from transformers import AutoConfig, AutoModel, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


## 在线加载

In [2]:
model = AutoModel.from_pretrained('hfl/rbt3', force_download=True)

Downloading (…)lve/main/config.json: 100%|██████████| 828/828 [00:00<00:00, 1.61MB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)lve/main/config.json: 100%|██████████| 828/828 [00:00<?, ?B/s] 
Downloading pytorch_model.bin: 100%|██████████| 156M/156M [01:13<00:00, 2.13MB/s] 


## 模型下载

In [3]:
!git clone "https://huggingface.co/hfl/rbt3"

Cloning into 'rbt3'...
Updating files:  63% (7/11)
Updating files:  72% (8/11)
Updating files:  81% (9/11)
Updating files:  90% (10/11)
Updating files: 100% (11/11)
Updating files: 100% (11/11), done.
Filtering content:  66% (2/3)
Filtering content:  66% (2/3), 442.86 MiB | 227.89 MiB/s
Filtering content: 100% (3/3), 442.86 MiB | 227.89 MiB/s
Filtering content: 100% (3/3), 442.86 MiB | 3.67 MiB/s, done.


In [4]:
!git lfs clone "https://huggingface.co/hfl/rbt3" --include="*.bin"

          with new flags from 'git clone'

'git clone' has been updated in upstream Git to have comparable
speeds to 'git lfs clone'.
Cloning into 'rbt3'...


## 离线加载

In [5]:
model = AutoModel.from_pretrained('./rbt3/')

## 模型加载参数

In [None]:
model.config

In [6]:
config = AutoConfig.from_pretrained('./rbt3/')
config

BertConfig {
  "_name_or_path": "./rbt3/",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.34.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [7]:
config.output_attentions

False

In [8]:
from transformers import  BertConfig

In [10]:
BertConfig

transformers.models.bert.configuration_bert.BertConfig

# 模型调用

In [11]:
sen = '弱小的我也有大梦想!'
tokenizer = AutoTokenizer.from_pretrained('rbt3')
inputs = tokenizer(sen, return_tensors='pt')
inputs

{'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769,  738, 3300, 1920, 3457, 2682,  106,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

## 不带Model Head的模型调用

In [12]:
model = AutoModel.from_pretrained('rbt3', output_attentions=True)

In [13]:
output = model(**inputs)
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.2303,  0.1315,  0.6920,  ..., -0.5849,  0.5341, -0.0374],
         [-0.7454, -0.3422,  0.5117,  ..., -0.0404, -0.4554, -0.6653],
         [-0.0628,  0.6292,  0.1078,  ..., -0.3379,  0.3596, -0.5689],
         ...,
         [ 0.1060,  0.6700, -0.0908,  ..., -0.1800,  0.2782,  0.2195],
         [ 0.1642,  0.0029,  0.0420,  ..., -0.5950, -0.2168, -0.5789],
         [ 0.2265,  0.1351,  0.6900,  ..., -0.5835,  0.5320, -0.0330]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-1.1423e-01, -9.9031e-01, -1.0000e+00, -9.0013e-01,  9.9648e-01,
         -2.0447e-02,  3.6240e-01, -1.3737e-01,  9.9396e-01,  9.9986e-01,
         -1.0428e-02, -1.0000e+00,  6.2199e-02,  9.9932e-01, -9.9999e-01,
          9.9966e-01,  9.9652e-01,  9.5564e-01, -9.9572e-01, -4.5835e-02,
         -9.7243e-01, -9.9092e-01,  9.6289e-02,  9.4169e-01,  9.9111e-01,
         -9.9721e-01, -9.9998e-01,  2.0293e-02, -7.7725e-01, -9.998

In [14]:
output.last_hidden_state.size()

torch.Size([1, 12, 768])

In [15]:
len(inputs['input_ids'][0])

12

## 带Model Head的模型调用

In [16]:
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification

In [17]:
clz_model = AutoModelForSequenceClassification.from_pretrained('rbt3', num_labels=10)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
clz_model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.4554,  0.2853, -0.1562, -0.2745,  0.5057, -0.0642, -0.2172, -0.5352,
          0.2081, -0.3353]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [19]:
clz_model.config.num_labels

10