# Imports

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import openvino.runtime as ov
import warnings
from pathlib import Path
import numpy as np
import os
import time

# Transformers Serialization

Check out this link [https://huggingface.co/docs/transformers/serialization] to learn how to convert transformers to onnx format

In [None]:
!python -m transformers.onnx -h

In [None]:
!python -m transformers.onnx -m distilbert-base-uncased-finetuned-sst-2-english --feature sequence-classification model/

In [None]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint)

# Model Optimizer

In [None]:
onnx_model_path = 'model.onnx'
MODEL_DIR = f"model/"
onnx_model_path = Path(MODEL_DIR) / onnx_model_path

!mo --input_model $onnx_model_path --output_dir $MODEL_DIR --model_name $checkpoint --input input_ids,attention_mask --input_shape [1,128],[1,128]

Creating the input and output features for the inference

In [None]:
warnings.filterwarnings("ignore")
core = ov.Core()
ir_model_xml = str((Path(MODEL_DIR)/checkpoint).with_suffix(".xml"))
compiled_model = core.compile_model(ir_model_xml)
infer_request = compiled_model.create_infer_request()

Defining a softmax function to extract the prediction from the output of the IR format.

In [None]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum() 

# Inference

Creating a generic inference function to read the input and infer the result into 2 classes: Positive or Negative.

In [None]:
def infer(input_text):    
    
    input_text = tokenizer(input_text, padding='max_length', max_length = 128, truncation=True, return_tensors="np")
    inputs = dict(input_text)
    label = {0: 'NEGATIVE', 1: 'POSITIVE'}     
    result = infer_request.infer(inputs= inputs)
    for i in result.values():
        probability = np.argmax(softmax(i))
    
    return label[probability]

For a single input sentence

In [None]:
input_text = input("User Input: ")
start_time = time.perf_counter()
result = infer(input_text)
end_time = time.perf_counter()
total_time = end_time - start_time
print("Label: ", result)
print("Total Time: ","%.2f" %total_time," seconds")

Read from a file

In [None]:
start_time = time.perf_counter()
with open("data/sample.txt","r") as f:
    input_text = f.readlines()
    for lines in input_text:        
        print("User Input: ",lines)
        result = infer(lines)    
        print("Label: ", result,"\n")

end_time = time.perf_counter()
total_time = end_time - start_time
print("Total Time: ","%.2f" %total_time," seconds")