# Imports

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from openvino.inference_engine import IECore
import warnings
from pathlib import Path
import numpy as np
import onnx
import os
from urllib.parse import urlparse

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  int(TensorProto.STRING): np.dtype(np.object)


# Transformers Serialization

Check out this link [https://huggingface.co/docs/transformers/serialization] to learn how to convert transformers to onnx format

In [2]:
!python -m transformers.onnx -h

usage: Hugging Face ONNX Exporter tool [-h] -m MODEL
                                       [--feature {causal-lm,causal-lm-with-past,default,default-with-past,masked-lm,question-answering,seq2seq-lm,seq2seq-lm-with-past,sequence-classification,token-classification}]
                                       [--opset OPSET] [--atol ATOL]
                                       output

positional arguments:
  output                Path indicating where to store generated ONNX model.

optional arguments:

2022-11-21 16:12:39.295443: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2022-11-21 16:12:39.295541: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.



  -h, --help            show this help message and exit
  -m MODEL, --model MODEL
                        Model's name of path on disk to load.
  --feature {causal-lm,causal-lm-with-past,default,default-with-past,masked-lm,question-answering,seq2seq-lm,seq2seq-lm-with-past,sequence-classification,token-classification}
                        Export the model with some additional feature.
  --opset OPSET         ONNX opset version to export the model with (default
                        12).
  --atol ATOL           Absolute difference tolerence when validating the
                        model.


In [3]:
!python -m transformers.onnx -m distilbert-base-uncased-finetuned-sst-2-english --feature sequence-classification model/

2022-11-21 16:12:49.614851: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2022-11-21 16:12:49.614940: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Using framework PyTorch: 1.9.1+cpu
Validating ONNX model...
	-[\u2713] ONNX model outputs' name match reference model ({'logits'})
	- Validating ONNX Model output "logits":
		-[\u2713] (2, 2) matches (2, 2)
		-[\u2713] all values close (atol: 1e-05)
All good, model saved at: model/model.onnx


In [4]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint)

# Model Optimizer

In [5]:
onnx_model_path = 'model.onnx'
MODEL_DIR = f"model/"
os.makedirs(MODEL_DIR, exist_ok=True)

onnx_model_path = Path(MODEL_DIR)/onnx_model_path

!mo --input_model $onnx_model_path --output_dir $MODEL_DIR --model_name $checkpoint --input input_ids,attention_mask --input_shape [1,512],[1,512] --data_type FP32

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	C:\Users\arunimac\OneDrive - Intel Corporation\Documents\Projects\OpenVINO Contrib\224-distilbert-Sequence-Classification\model\model.onnx
	- Path for generated IR: 	C:\Users\arunimac\OneDrive - Intel Corporation\Documents\Projects\OpenVINO Contrib\224-distilbert-Sequence-Classification\model/
	- IR output name: 	distilbert-base-uncased-finetuned-sst-2-english
	- Log level: 	ERROR
	- Batch: 	Not specified, inherited from the model
	- Input layers: 	input_ids,attention_mask
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	[1,512],[1,512]
	- Source layout: 	Not specified
	- Target layout: 	Not specified
	- Layout: 	Not specified
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP32
	- Enable fusing: 	True
	- User transformations: 	Not specified
	- Reverse input channels: 	False
	- Enable IR generation for fixed input sha

In [6]:
ir_model_xml = (Path(MODEL_DIR)/checkpoint).with_suffix(".xml")
ir_model_bin = (Path(MODEL_DIR)/checkpoint).with_suffix(".bin")

Creating the input and output features for the inference

In [7]:
BATCH_SIZE = 1
MAX_SEQ_LENGTH = 512

warnings.filterwarnings("ignore")

ie = IECore()
net_onnx = ie.read_network(model=ir_model_xml, weights=ir_model_bin)
exec_net_onnx = ie.load_network(network=net_onnx, device_name="CPU")

output_layer_onnx = next(iter(exec_net_onnx.outputs))
input_layer_onnx = next(iter(exec_net_onnx.input_info))

# get input and output names of nodes
input_keys = list(exec_net_onnx.input_info)
output_keys = list(exec_net_onnx.outputs.keys())

# get network input size
input_size = exec_net_onnx.input_info[input_keys[0]].input_data.shape[1]

Defining a softmax function to extract the prediction from the output of the IR format.

In [8]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum() 


# Inference

Creating a generic inference function to read the input and infer the result into 2 classes: Positive or Negative.

In [9]:
def infer(input_text):
    input_text = tokenizer(input_text, padding='max_length', max_length = 512, truncation=True, return_tensors="pt")
    inputs = {'input_ids': input_text['input_ids'], 'attention_mask': input_text['attention_mask']}
    result = exec_net_onnx.infer(inputs= inputs)
    result_ir = result[output_layer_onnx]
    probability = np.argmax(softmax(result_ir))
    print("Label:")
    label = {0: 'NEGATIVE', 1: 'POSITIVE'}     
    return label[probability]

For a single input sentence

In [10]:
print("User Input:")
input_text = input()
result = infer(input_text)
print(result)

User Input:
Have a great day
Label:
POSITIVE


Read from a file

In [18]:
with open("data/sample.txt","r") as f:
    input_text = f.readlines()
print("User Input:\n",input_text[0])
result = infer(input_text)
print(result)

User Input:
 The food was horrible.
Label:
NEGATIVE
