In [None]:
from typing import TYPE_CHECKING # 类型检查
from transformers.utils import _LazyModule
from transformers.utils.import_utils import define_import_structure
if TYPE_CHECKING: # 静态检查工具检测时
    from transformers.models.bert.modeling_bert import *
    from transformers.models.bert.modeling_flax_bert import *
    from transformers.models.bert.modeling_tf_bert import *
    from transformers.models.bert.tokenization_bert import *
    from transformers.models.bert.tokenization_bert_fast import *
    from transformers.models.bert.tokenization_bert_tf import *
else: # 否则,懒加载
    import sys
    _file = globals()["__file__"]
    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)

In [6]:
from collections import OrderedDict # 有序字典
from typing import Mapping
from transformers.configuration_utils import PretrainedConfig
from transformers.onnx import OnnxConfig
from transformers.utils import logging

logger = logging.get_logger(__name__)

In [7]:
class BertConfig(PretrainedConfig):
    """
    Examples:

    ```python
    >>> from transformers import BertConfig, BertModel

    >>> # Initializing a BERT google-bert/bert-base-uncased style configuration
    >>> configuration = BertConfig()

    >>> # Initializing a model (with random weights) from the google-bert/bert-base-uncased style configuration
    >>> model = BertModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```"""

    model_type = "bert"   # 指定模型类型

    def __init__(
        self,
        vocab_size=30522, # 词表大小
        hidden_size=768, # 模型隐藏维度
        num_hidden_layers=12, # 层数
        num_attention_heads=12, # 头数
        intermediate_size=3072,  # 前馈层维度
        hidden_act="gelu", # 激活函数，支持字符串或函数
        hidden_dropout_prob=0.1,  # 全连接层的 dropout 概率
        attention_probs_dropout_prob=0.1,  # 注意力权重的 dropout 概率
        max_position_embeddings=512,  # 支持的最大序列长度
        type_vocab_size=2, # token_type_ids 的词表大小
        initializer_range=0.02, # 权重初始化的标准差
        layer_norm_eps=1e-12,  # LayerNorm 中避免除零的小常数 epsilon
        pad_token_id=0, # 填充token id
        position_embedding_type="absolute",  # 位置编码类型，支持 absolute / relative_key / relative_key_query
        use_cache=True, # 推理时是否使用缓存past_key_value
        classifier_dropout=None, # 分类器前的 dropout 概率
        **kwargs,
    ):
        super().__init__(pad_token_id=pad_token_id, **kwargs) # 调用父类的初始化

        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.hidden_act = hidden_act
        self.intermediate_size = intermediate_size
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.type_vocab_size = type_vocab_size
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps
        self.position_embedding_type = position_embedding_type
        self.use_cache = use_cache
        self.classifier_dropout = classifier_dropout

In [8]:
config = BertConfig()

In [9]:
config

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.51.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [14]:
  # 继承自 OnnxConfig，用于配置 Bert 模型的 ONNX 导出设置
class BertOnnxConfig(OnnxConfig):
    # 这种装饰器叫做 @property 属性装饰器，它将一个方法转化为一个只读属性，可以像访问属性一样访问方法的返回值。
    # 如需设置或删除该属性，还可以配合 @xxx.setter 和 @xxx.deleter 使用。
    # @property 装饰后的方法变成的是实例属性
    @property 
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        if self.task == "multiple-choice": # 如果是多选任务
            # 动态轴 0:批次维度,1:多选维度,2:序列维度
            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
        else: # 其他任务只有批次和序列维度
            dynamic_axis = {0: "batch", 1: "sequence"}
        return OrderedDict( # 返回输入字典，映射每个输入到对应的动态轴
            [
                ("input_ids", dynamic_axis),  # 输入 token id
                ("attention_mask", dynamic_axis), # 注意力掩码
                ("token_type_ids", dynamic_axis), # token 类型 ID（用于区分句子对）
            ]
        )


In [15]:
onnxConfig=BertOnnxConfig(config)

In [16]:
onnxConfig.task

'default'

In [17]:
onnxConfig.inputs

OrderedDict([('input_ids', {0: 'batch', 1: 'sequence'}),
             ('attention_mask', {0: 'batch', 1: 'sequence'}),
             ('token_type_ids', {0: 'batch', 1: 'sequence'})])

In [None]:
__all__ = ["BertConfig", "BertOnnxConfig"]