In [32]:
from transformers import QuestionAnsweringPipeline, AutoAdapterModel, AutoModelWithHeads, AutoTokenizer, AutoConfig
from transformers.onnx import OnnxConfig, validate_model_outputs, export
from transformers.models.bert import BertOnnxConfig

import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
from onnxruntime import InferenceSession
import onnxruntime

from onnx_opcounter import calculate_params

import os
import time
import torch
import numpy as np

from datasets import load_metric, load_dataset

from typing import Mapping, OrderedDict
from pathlib import Path
import random
import pandas as pd

In [33]:
# working
# cosmos_qa

# semi working
# multirc, quartz, race und quail  

# repo error
# narrativeqa

# input diff
# commonsense_qa und social_i_qa 


In [77]:
# adapters based on sequence options 
adapter_list = ["cosmos_qa", "multi#rc", "quartz", "race", "quail"]
adapter = adapter_list[0]

In [93]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelWithHeads.from_pretrained("bert-base-uncased")
adapter_name = model.load_adapter(f"AdapterHub/bert-base-uncased-pf-{adapter}", source="hf")

model.active_adapters = adapter_name

def mc_model_inference(question, context, choices):
    outputs = []

    raw_input = [[context, question + " " + choice] for choice in choices]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)

    answer_idx = torch.argmax(outputs.logits)
    return choices[answer_idx]

question = "What animal has the most hair?"
context = "Fish are typically not hairy. Cats have 10g of hair. Tigers have 12g of hair. Horses have 100g of hair."

choices = ["Cat", "Horse", "Tiger", "Fish"]
answer = mc_model_inference(question, context, choices)
print(answer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 4245.96it/s]

Horse





In [94]:
# "narrativeqa", "commonsense_qa", "social_i_qa"
# adapter_name = model.load_adapter(f"AdapterHub/narrativeqa", source="hf")

In [103]:
# commonsense_qa
adapter = "commonsense_qa"

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelWithHeads.from_pretrained("bert-base-uncased")
adapter_name = model.load_adapter(f"AdapterHub/bert-base-uncased-pf-{adapter}", source="hf")
model.active_adapters = adapter_name

def mc_model_inference(question, question_concept, choices):
    outputs = []

    raw_input = [[question_concept, question + " " + choice] for choice in choices["text"]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)

    answer_idx = torch.argmax(outputs.logits)
    return choices["text"][answer_idx]

question = "The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?"
question_concept = "punishing"
choices = { "label": [ "A", "B", "C", "D", "E" ], "text": [ "ignore", "enforce", "authoritarian", "yell at", "avoid" ] }

# question = "Google Maps and other highway and street GPS services have replaced what?"
# question_concept = "highway"
# choices = { "label": [ "A", "B", "C", "D", "E" ], "text": [ "united states", "mexico", "countryside", "atlas", "oceans" ] }

answer = mc_model_inference(question, question_concept, choices)
print(answer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 3681.37it/s]


ignore


In [104]:
class CustomOnnxConfig(OnnxConfig):
    # Inspired by BertONNXConfig, can be extended to support other QA tasks
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        if self.task == "multiple-choice":
            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
        elif self.task == "mcq_commonsense_qa":
            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
        else:
            dynamic_axis = {0: "batch", 1: "sequence"}

        return OrderedDict(
                [
                    ("input_ids", dynamic_axis),
                    ("attention_mask", dynamic_axis),
                    ("token_type_ids", dynamic_axis), # Roberta doesn't use this
                ]
            )

In [105]:
config = AutoConfig.from_pretrained("bert-base-uncased")
onnx_config = BertOnnxConfig(config, task="multiple-choice")

onnx_path = Path(f"onnx/{adapter}/model.onnx")

export(tokenizer, model, onnx_config, onnx_config.default_onnx_opset, onnx_path)

RuntimeError: shape '[-1, 5]' is invalid for input of size 8

In [102]:
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

In [9]:
def onnx_inference(onnx_path, question, context, choices):
    onnx_model = InferenceSession(
        str(onnx_path), providers=["CPUExecutionProvider"]
    )

    raw_input = [[context, question + " " + choice] for choice in choices]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="np")

    inputs['token_type_ids'] = np.expand_dims(inputs['token_type_ids'], axis=0)
    inputs['input_ids'] =  np.expand_dims(inputs['input_ids'], axis=0)
    inputs['attention_mask'] =  np.expand_dims(inputs['attention_mask'], axis=0)

    outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)

    answer_idx = np.argmax(outputs[0])
    return choices[answer_idx]
    
answer = onnx_inference(onnx_path, question, context, choices)
print(answer)

Horse
