In [18]:
from transformers import QuestionAnsweringPipeline, AutoAdapterModel, AutoModelWithHeads, AutoTokenizer, AutoConfig
from transformers.onnx import OnnxConfig, validate_model_outputs, export
from transformers.models.bert import BertOnnxConfig

import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
from onnxruntime import InferenceSession
import onnxruntime

from onnx_opcounter import calculate_params

import os
import time
import torch
import numpy as np

from datasets import load_metric, load_dataset

from typing import Mapping, OrderedDict
from pathlib import Path
import random
import pandas as pd

In [19]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelWithHeads.from_pretrained("bert-base-uncased")
adapter_name = model.load_adapter("AdapterHub/bert-base-uncased-pf-boolq", source="hf")
model.active_adapters = adapter_name

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 4034.28it/s]


In [20]:
def categorical_model_inference(question, context):
    
    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    outputs = model(**inputs)
    answer_idx = torch.argmax(outputs.logits)

    return bool(answer_idx)

# Test

In [21]:
data = load_dataset("boolq", split='validation')

Found cached dataset boolq (/Users/michaelhermann/.cache/huggingface/datasets/boolq/default/0.1.0/bf0dd57da941c50de94ae3ce3cef7fea48c08f337a4b7aac484e9dddc5aa24e5)


In [22]:
for i in range(1, 100):
    if data[i]["answer"] == True:
        break
# print(f"using {i}")
test_no = i

question = data[test_no]["question"]
correct_answer = data[test_no]["answer"]
context = data[test_no]["passage"]


answer = categorical_model_inference(question, context)

print(question)
print(f"Correct answer: {correct_answer}")
print(f"Given answer: {answer}")

is house tax and property tax are same
Correct answer: True
Given answer: True


# Check diff

In [33]:
for i in range(len(data)):
    i =  4

    question = data[i]["question"]
    correct_answer = data[i]["answer"]
    context = data[i]["passage"]

    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    outputs = model(**inputs)
    answer_idx = torch.argmax(outputs.logits)
    answer = bool(answer_idx)
    
    if answer != correct_answer:
        print(f"no: {i}")
        print(outputs.logits)
        print(f"Answer: {answer}")
        print(f"Base Answer: {correct_answer}")
        print(f"Question: {question}")
        print(f"Context: {context}")

        break  

no: 4
tensor([[ 1.1532, -1.1786]], grad_fn=<AddmmBackward0>)
Answer: False
Base Answer: True
Question: is there a difference between hydroxyzine hcl and hydroxyzine pam
Context: Hydroxyzine preparations require a doctor's prescription. The drug is available in two formulations, the pamoate and the dihydrochloride or hydrochloride salts. Vistaril, Equipose, Masmoran, and Paxistil are preparations of the pamoate salt, while Atarax, Alamon, Aterax, Durrax, Tran-Q, Orgatrax, Quiess, and Tranquizine are of the hydrochloride salt.


# Run a little test - base model

In [15]:
correct = 0
total = 200

for i in range(total):
    test_no = i

    question = data[test_no]["question"]
    correct_answer = data[test_no]["answer"]
    context = data[test_no]["passage"]

    answer = categorical_model_inference(question, context)
    if answer == correct_answer:
        correct += 1

print(f"{correct} out of {total} -> {correct/total}%")

147 out of 200 -> 0.735%


# Export to ONNX

In [8]:
config = AutoConfig.from_pretrained("bert-base-uncased") # bert-base-uncased-pf-boolq",
onnx_config = BertOnnxConfig(config)

onnx_path = Path("onnx/boolq/model.onnx")

onnx_inputs, onnx_outputs = export(tokenizer, model, onnx_config, onnx_config.default_onnx_opset, onnx_path)

onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

  if context.output_adapter_gating_scores:
  if tensor is not None and hidden_states.shape[0] != tensor.shape[0]:
  if getattr(ctx, "output_" + attr, False):


In [9]:
def onnx_inference(onnx_model, question, context):

    inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors="np")
    inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}

    outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)

    return bool(np.argmax(outputs[0][0]))

In [10]:
onnx_path = "onnx/boolq/model.onnx"
onnx_model  = onnxruntime.InferenceSession(
        str(onnx_path), providers=["CPUExecutionProvider"]
    )

In [11]:
context = "Property tax or 'house tax' is a local tax on buildings, along with appurtenant land. It is and imposed on the Possessor (not the custodian of property as per 1978, 44th amendment of constitution). It resembles the US-type wealth tax and differs from the excise-type UK rate. The tax power is vested in the states and is delegated to local bodies, specifying the valuation method, rate band, and collection procedures. The tax base is the annual rental value (ARV) or area-based rating. Owner-occupied and other properties not producing rent are assessed on cost and then converted into ARV by applying a percentage of cost, usually four percent. Vacant land is generally exempt. Central government properties are exempt. Instead a 'service charge' is permissible under executive order. Properties of foreign missions also enjoy tax exemption without requiring reciprocity. The tax is usually accompanied by service taxes, e.g., water tax, drainage tax, conservancy (sanitation) tax, lighting tax, all using the same tax base. The rate structure is flat on rural (panchayat) properties, but in the urban (municipal) areas it is mildly progressive with about 80% of assessments falling in the first two brackets."
question = "is house tax and property tax are same"

In [12]:
answer = onnx_inference(onnx_model, question, context)
print(answer)

True


# Check Diff

In [32]:
for i in range(len(data)):

    question = data[i]["question"]
    correct_answer = data[i]["answer"]
    context = data[i]["passage"]

    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")


    inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors="np")
    inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}

    outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)
    output_prep = outputs[0][0]
    answer = bool(np.argmax(output_prep))
    
    if answer != correct_answer:
        print(f"no: {i}")
        print(output_prep)
        print(f"Answer: {answer}")
        print(f"Base Answer: {correct_answer}")
        print(f"Question: {question}")
        print(f"Context: {context}")

        break 

no: 4
[ 1.3443897 -1.2073131]
Answer: False
Base Answer: True
Question: is there a difference between hydroxyzine hcl and hydroxyzine pam
Context: Hydroxyzine preparations require a doctor's prescription. The drug is available in two formulations, the pamoate and the dihydrochloride or hydrochloride salts. Vistaril, Equipose, Masmoran, and Paxistil are preparations of the pamoate salt, while Atarax, Alamon, Aterax, Durrax, Tran-Q, Orgatrax, Quiess, and Tranquizine are of the hydrochloride salt.


# Run a little test - onnx model

In [13]:
correct = 0
total = 200

for i in range(total):
    test_no = i

    question = data[test_no]["question"]
    correct_answer = data[test_no]["answer"]
    context = data[test_no]["passage"]

    answer = onnx_inference(onnx_model, question, context)
    if answer == correct_answer:
        correct += 1

print(f"{correct} out of {total} -> {correct/total}%")

127 out of 200 -> 0.635%


# Compare base and onnx

In [37]:
list_base = []
list_onnx = []
list_correct = []

In [38]:
total = 100

In [39]:
for test_no in range(total):

    question = data[test_no]["question"]
    correct_answer = data[test_no]["answer"]
    context = data[test_no]["passage"]

    answer_base = categorical_model_inference(question, context)
    answer_onnx = onnx_inference(onnx_model, question, context)
    
    list_correct.append(correct_answer)
    list_base.append(answer_base)
    list_onnx.append(answer_onnx)

In [45]:
diff_indexes = []
for i in range(len(list_correct)):
    if list_base[i] != list_onnx[i]:
        diff_indexes.append(i)

In [55]:
for ind in range(len(diff_indexes[:10])):

    i = diff_indexes[ind]

    question = data[i]["question"]
    correct_answer = data[i]["answer"]
    context = data[i]["passage"]

    #base 
    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")
    outputs_base = model(**inputs)
    answer_idx = torch.argmax(outputs_base.logits)
    answer_base = bool(answer_idx)

    #onnx
    raw_input = [[context, question]]
    inputs = tokenizer(raw_input, padding=True, truncation=True, return_tensors="pt")

    inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors="np")
    inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}

    outputs_onnx = onnx_model.run(input_feed=dict(inputs), output_names=None)
    output_prep = outputs_onnx[0][0]
    answer_onnx = bool(np.argmax(output_prep))

    
    print(f"no: {i}")
    print(f"Base: {outputs_base.logits}")
    print(f"Onnx: {output_prep}")
    print(f"Base answer: {answer_base}")
    print(f"Onnx answer: {answer_onnx}")
    print(f"Correct Answer: {correct_answer}")
    # print(f"Question: {question}")
    # print(f"Context: {context}")



    

no: 0
Base: tensor([[0.0363, 0.1560]], grad_fn=<AddmmBackward0>)
Onnx: [ 1.0141207  -0.70817924]
Base answer: True
Onnx answer: False
Correct Answer: False
no: 5
Base: tensor([[-0.3566,  0.5935]], grad_fn=<AddmmBackward0>)
Onnx: [ 0.48343384 -0.32177168]
Base answer: True
Onnx answer: False
Correct Answer: False
no: 7
Base: tensor([[-1.6825,  1.6893]], grad_fn=<AddmmBackward0>)
Onnx: [ 0.69819057 -0.52735263]
Base answer: True
Onnx answer: False
Correct Answer: True
no: 9
Base: tensor([[-2.6897,  2.7745]], grad_fn=<AddmmBackward0>)
Onnx: [ 0.15282367 -0.0086502 ]
Base answer: True
Onnx answer: False
Correct Answer: True
no: 10
Base: tensor([[-0.7401,  0.9367]], grad_fn=<AddmmBackward0>)
Onnx: [0.17055601 0.15920028]
Base answer: True
Onnx answer: False
Correct Answer: True
no: 11
Base: tensor([[ 2.2703, -2.2002]], grad_fn=<AddmmBackward0>)
Onnx: [-1.0790286  1.2619745]
Base answer: False
Onnx answer: True
Correct Answer: False
no: 15
Base: tensor([[ 0.5078, -0.3660]], grad_fn=<AddmmBac

In [44]:
diff_indexes

[0,
 5,
 7,
 9,
 10,
 11,
 15,
 22,
 24,
 36,
 41,
 42,
 44,
 45,
 46,
 49,
 56,
 71,
 77,
 79,
 85,
 94,
 95,
 98,
 99]