In [2]:
from transformers import QuestionAnsweringPipeline, AutoAdapterModel, AutoModelWithHeads, AutoTokenizer, AutoConfig
from transformers.onnx import OnnxConfig, validate_model_outputs, export
from transformers.models.bert import BertOnnxConfig

import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
from onnxruntime import InferenceSession
import onnxruntime

from onnx_opcounter import calculate_params

import os
import time
import torch
import numpy as np

from datasets import load_metric, load_dataset

from typing import Mapping, OrderedDict
from pathlib import Path
import random
import pandas as pd

In [3]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelWithHeads.from_pretrained("bert-base-uncased")
adapter_name = model.load_adapter("AdapterHub/bert-base-uncased-pf-boolq", source="hf")
model.active_adapters = adapter_name

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 2819.38it/s]


In [4]:
def categorical_model_inference(question, context):
    
    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    outputs = model(**inputs)
    answer_idx = torch.argmax(outputs.logits)

    return bool(answer_idx)

# Test

In [5]:
data = load_dataset("boolq", split='validation')

Found cached dataset boolq (/Users/michaelhermann/.cache/huggingface/datasets/boolq/default/0.1.0/bf0dd57da941c50de94ae3ce3cef7fea48c08f337a4b7aac484e9dddc5aa24e5)


In [6]:
for i in range(1, 100):
    if data[i]["answer"] == True:
        break
# print(f"using {i}")
test_no = i

question = data[test_no]["question"]
correct_answer = data[test_no]["answer"]
context = data[test_no]["passage"]


answer = categorical_model_inference(question, context)

print(question)
print(f"Correct answer: {correct_answer}")
print(f"Given answer: {answer}")

is house tax and property tax are same
Correct answer: True
Given answer: True


# Run a little test - base model

In [7]:
correct = 0
total = 200
for i in range(total):
    test_no = i

    question = data[test_no]["question"]
    correct_answer = data[test_no]["answer"]
    passage = data[test_no]["passage"]

    answer = categorical_model_inference(question, passage)
    if answer == correct_answer:
        correct += 1

print(f"{correct} out of {total} -> {correct/total}%")

147 out of 200 -> 0.735%


# Export to ONNX

In [8]:
config = AutoConfig.from_pretrained("bert-base-uncased") # bert-base-uncased-pf-boolq",
onnx_config = BertOnnxConfig(config)

onnx_path = Path("onnx/boolq/model.onnx")

onnx_inputs, onnx_outputs = export(tokenizer, model, onnx_config, onnx_config.default_onnx_opset, onnx_path)

onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

  if context.output_adapter_gating_scores:
  if tensor is not None and hidden_states.shape[0] != tensor.shape[0]:
  if getattr(ctx, "output_" + attr, False):


In [9]:
def onnx_inference(onnx_model, question, context):

    inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors="np")
    inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}

    outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)

    return bool(np.argmax(outputs[0][0]))

In [10]:
onnx_path = "onnx/boolq/model.onnx"
onnx_model  = onnxruntime.InferenceSession(
        str(onnx_path), providers=["CPUExecutionProvider"]
    )

In [11]:
context = "Property tax or 'house tax' is a local tax on buildings, along with appurtenant land. It is and imposed on the Possessor (not the custodian of property as per 1978, 44th amendment of constitution). It resembles the US-type wealth tax and differs from the excise-type UK rate. The tax power is vested in the states and is delegated to local bodies, specifying the valuation method, rate band, and collection procedures. The tax base is the annual rental value (ARV) or area-based rating. Owner-occupied and other properties not producing rent are assessed on cost and then converted into ARV by applying a percentage of cost, usually four percent. Vacant land is generally exempt. Central government properties are exempt. Instead a 'service charge' is permissible under executive order. Properties of foreign missions also enjoy tax exemption without requiring reciprocity. The tax is usually accompanied by service taxes, e.g., water tax, drainage tax, conservancy (sanitation) tax, lighting tax, all using the same tax base. The rate structure is flat on rural (panchayat) properties, but in the urban (municipal) areas it is mildly progressive with about 80% of assessments falling in the first two brackets."
question = "is house tax and property tax are same"

In [12]:
answer = onnx_inference(onnx_model, question, context)
print(answer)

True


# Run a little test - onnx model

In [13]:
correct = 0
total = 200

for i in range(total):
    test_no = i

    question = data[test_no]["question"]
    correct_answer = data[test_no]["answer"]
    context = data[test_no]["passage"]

    answer = onnx_inference(onnx_model, question, context)
    if answer == correct_answer:
        correct += 1

print(f"{correct} out of {total} -> {correct/total}%")

127 out of 200 -> 0.635%
