In [23]:
# https://huggingface.co/docs/transformers/model_sharing
# https://huggingface.co/docs/optimum/v1.2.1/en/onnxruntime/modeling_ort
# https://huggingface.co/docs/optimum/v1.2.1/en/onnxruntime/modeling_ort#optimum.onnxruntime.ORTModelForQuestionAnswering

# https://github.com/huggingface/optimum/blob/ed95b9fa8019af29ce1904ac3cfef8729eb4f4be/optimum/modeling_base.py#L12
# https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/hf_api.py#L1458
# https://github.com/huggingface/huggingface_hub/blob/664cfdd25adfb69f429decf19e2d65ed5599f9fd/src/huggingface_hub/utils/_deprecation.py#L7


In [19]:
import os
from typing import Mapping, OrderedDict
from pathlib import Path

from huggingface_hub import HfApi, HfFolder, hf_hub_download
from transformers import AutoModelWithHeads, AutoTokenizer, AutoConfig
from transformers.onnx import OnnxConfig, export

import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

In [25]:
def push_to_hub(save_dir, repository_id):
    huggingface_token = HfFolder.get_token()
    api = HfApi()

    api.create_repo(
        token=huggingface_token,
        repo_id=f'UKP-SQuARE/{repository_id}',
        exist_ok=True,
        private=False
    )

    for path, subdirs, files in os.walk(save_dir):
        for name in files:
            local_file_path = os.path.join(path, name)
            _, hub_file_path = os.path.split(local_file_path)
            try:
                api.upload_file(
                    token=huggingface_token,
                    repo_id=f"UKP-SQuARE/{repository_id}",
                    path_or_fileobj=os.path.join(os.getcwd(), local_file_path),
                    path_in_repo=hub_file_path,
                )
            except KeyError:
                pass
            except NameError:
                pass


In [43]:
class CustomOnnxConfig(OnnxConfig):
    # Inspired by BertONNXConfig, can be extended to support other QA tasks
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        if self.task == "multiple-choice":
            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
        else:
            dynamic_axis = {0: "batch", 1: "sequence"}
        return OrderedDict(
            [
                ("input_ids", dynamic_axis),
                ("attention_mask", dynamic_axis),
                ("token_type_ids", dynamic_axis),
            ]
        )

def onnx_export(base_model, adapter, quantize_model=True):
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    model = AutoModelWithHeads.from_pretrained(base_model)
    adapter_name = model.load_adapter(adapter, source="hf")
    model.active_adapters = adapter_name

    config = AutoConfig.from_pretrained(base_model)

    onnx_config = CustomOnnxConfig(config, task="question-answering")

    model_id = adapter.split("/")[1]+"-onnx"
    directory_path = Path("onnx/{}".format(model_id))
    directory_path.mkdir(parents=True, exist_ok=True)

    onnx_model_path = Path("{}/model.onnx".format(directory_path))

    # Export ONNX model
    export(tokenizer, model, onnx_config, onnx_config.default_onnx_opset, onnx_model_path)

    # Create config.json of vanilla model
    model.save_pretrained(directory_path)
    os.remove(directory_path / "pytorch_model.bin")

    if quantize_model:
        quantized_model_path = "{}/model_quant.onnx".format(directory_path)
        quantize_dynamic(onnx_model_path, quantized_model_path, weight_type=QuantType.QInt8)

    print("Uploading models to hub... (may take a few minutes)")
    push_to_hub(
        save_dir = directory_path,
        repository_id = model_id,
    )

In [44]:
onnx_export("bert-base-uncased", "AdapterHub/bert-base-uncased-pf-drop")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 11581.14it/s]
