# Imports

In [1]:
from pathlib import Path
import torch
import openvino.runtime as ov
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import transformers

2023-03-28 18:35:38.484288: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Initializing the Model
We will use the transformer-based [microsoft/deberta-base-mnli](https://huggingface.co/microsoft/deberta-base-mnli/tree/main) model from Hugging Face.

In [2]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-base-mnli")

model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-base-mnli")

Some weights of the model checkpoint at microsoft/deberta-base-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Check model(tokenizer and Classification)

In [3]:
inputs = tokenizer(["I don't like it",
                    "mhy does well",
                    "my hometown is ordos, it's a nice place",
                    "he is a great American"],
                   padding=True,
                   return_tensors="pt")

In [4]:
with torch.no_grad():
    logits = model(**inputs).logits

In [5]:
inputs

{'input_ids': tensor([[    1,   100,   218,    75,   101,    24,     2,     0,     0,     0,
             0,     0,     0],
        [    1,   119, 11108,   473,   157,     2,     0,     0,     0,     0,
             0,     0,     0],
        [    1,  4783,  8994,    16, 22474,   366,     6,    24,    18,    10,
          2579,   317,     2],
        [    1,   700,    16,    10,   372,   470,     2,     0,     0,     0,
             0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])}

In [6]:
logits

tensor([[-1.5745,  2.7616, -1.7987],
        [-1.9788,  1.6223, -0.1850],
        [-1.9979,  4.6572, -2.7053],
        [-0.6713,  4.0547, -3.2321]])

In [7]:
predicted_class_id = logits.argmax().item()
predicted_class_id

7

# Convert to ONNX
## 1. low level
torch.onnx enables you to convert model checkpoints to an ONNX graph by the export method. But you have to provide a lot of values like input_names, dynamic_axes, etc.

In [8]:
# export
torch.onnx.export(
    model,
    tuple(inputs.values()),
    f="torch-model.onnx",
    input_names=['input_ids', 'attention_mask'],
    output_names=['logits'],
    dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'},
                  'attention_mask': {0: 'batch_size', 1: 'sequence'},
                  'logits': {0: 'batch_size', 1: 'sequence'}},
    do_constant_folding=True,
    opset_version=13,
)

  scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)
  scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)
  att_span = min(max(query_layer.size(-2), key_layer.size(-2)), self.max_relative_positions)
  pos_query_layer /= torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)
  pos_query_layer /= torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)
  if query_layer.size(-2) != key_layer.size(-2):
  if query_layer.size(-2) != key_layer.size(-2):
  output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))


verbose: False, log level: Level.ERROR



## 2. mid level
transformers.onnx enables you to convert model checkpoints to an ONNX graph by leveraging configuration objects. That way you don’t have to provide the complex configuration for dynamic_axes etc.

In [9]:
from transformers.onnx import FeaturesManager
feature = "sequence-classification"

# load config
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(
    model, feature=feature)
onnx_config = model_onnx_config(model.config)

# export
onnx_inputs, onnx_outputs = transformers.onnx.export(
    preprocessor=tokenizer,
    model=model,
    config=onnx_config,
    opset=13,
    output=Path("trfs-model.onnx")
)

  scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)
  scale = torch.sqrt(torch.tensor(query_layer.size(-1), dtype=torch.float) * scale_factor)
  att_span = min(max(query_layer.size(-2), key_layer.size(-2)), self.max_relative_positions)
  pos_query_layer /= torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)
  pos_query_layer /= torch.sqrt(torch.tensor(pos_query_layer.size(-1), dtype=torch.float) * scale_factor)
  if query_layer.size(-2) != key_layer.size(-2):
  if query_layer.size(-2) != key_layer.size(-2):
  output = input.masked_fill(rmask, torch.tensor(torch.finfo(input.dtype).min))


verbose: False, log level: Level.ERROR



## high level
Optimum Inference includes methods to convert vanilla Transformers models to ONNX using the ORTModelForXxx classes. To convert your Transformers model to ONNX you simply have to pass from_transformers=True to the from_pretrained() method and your model will be loaded and converted to ONNX leveraging the transformers.onnx package under the hood.

# Model Optimizer

In [10]:
onnx_model = "torch-model.onnx"
MODEL_DIR = "model/"
MODEL_DIR = f"{MODEL_DIR}"
checkpoint = "microsoft/deberta-base-mnli"
optimizer_command = f'mo \
    --input_model {onnx_model} \
    --output_dir {MODEL_DIR} \
    --model_name {checkpoint} \
    --input input_ids,attention_mask \
    --input_shape "[1,128],[1,128]"'
! $optimizer_command

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
[ SUCCESS ] Generated IR version 11 model.
[ SUCCESS ] XML file: /Users/mahaoyang/GSOC/openvino_notebooks/notebooks/deberta-sequence-classification/model/microsoft/deberta-base-mnli.xml
[ SUCCESS ] BIN file: /Users/mahaoyang/GSOC/openvino_notebooks/notebooks/deberta-sequence-classification/model/

In [11]:

core = ov.Core()
ir_model_xml = str((Path(MODEL_DIR) / checkpoint).with_suffix(".xml"))
compiled_model = core.compile_model(ir_model_xml)
infer_request = compiled_model.create_infer_request()

In [12]:
def softmax(x):
    """
    Creating a softmax function that extracts predictions from the IR format's output.
    Parameters: Logits array
    Returns: Probabilities
    """

    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

# Inference
When using OpenVINO for inference, there are typically two methods: using InferRequest or CompiledModel for inference. Below, we will implement these two inference methods separately. You can also learn how to use them in [OpenVINO™ Runtime API Tutorial](https://docs.openvino.ai/latest/notebooks/002-openvino-api-with-output.html)

In [13]:
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)

In [14]:
def infer_withInferRequest(input_text):
    """
    Creating a generic inference function
    to read the input and infer the result
    into 3 classes: CONTRADICTION, NEUTRAL or ENTAILMENT.
    Parameters: Text to be processed
    Returns: Label: CONTRADICTION, NEUTRAL or ENTAILMENT.
    """

    input_text = tokenizer(
        input_text,
        padding="max_length",
        max_length=128,
        truncation=True,
        return_tensors="pt",
    )
    inputs = dict(input_text)
    label = {0: "CONTRADICTION",
             1: "NEUTRAL",
             2: "ENTAILMENT"}

    # Because Port for tensor name token_type_ids was not found, we need to delete the attribute 'token_type_ids'
    del inputs['token_type_ids']
    result = infer_request.infer(inputs=inputs)
    for i in result.values():
        probability = np.argmax(softmax(i))
        print("probability: ", softmax(i))
    return label[probability]

In [15]:
def infer_withCompiledModel(input_text):
    """
    Creating a generic inference function
    to read the input and infer the result
    into 3 classes: CONTRADICTION, NEUTRAL or ENTAILMENT.
    Parameters: Text to be processed
    Returns: Label: CONTRADICTION, NEUTRAL or ENTAILMENT.
    """

    input_text = tokenizer(
        input_text,
        padding="max_length",
        max_length=128,
        truncation=True,
        return_tensors="pt",
    )
    inputs = dict(input_text)

    label = {0: "CONTRADICTION",
             1: "NEUTRAL",
             2: "ENTAILMENT"}
    # Because Port for tensor name token_type_ids was not found, we need to delete the attribute 'token_type_ids'
    del inputs['token_type_ids']
    # using a dictionary, where the key is input tensor name or index
    result = compiled_model(inputs)[output_layer]
    for i in result:
        probability = np.argmax(softmax(i))
        print("probability: ", softmax(i))
    return label[probability]

## for single input 

In [16]:
input_text = "I love you. I like you."
result = infer_withCompiledModel(input_text)
print("Label: ", result)

probability:  [4.7756627e-04 7.9052038e-02 9.2047036e-01]
Label:  ENTAILMENT


##  Read from a text file

In [17]:
with open("../data/text/food_reviews.txt", "r") as f:
    input_text = f.readlines()
    for lines in input_text:
        print("User Input: ", lines)
        result = infer_withCompiledModel(lines)
        print("Label: ", result, "\n")

User Input:  The food was horrible.

probability:  [0.01253021 0.97745925 0.0100106 ]
Label:  NEUTRAL 

User Input:  We went because the restaurant had good reviews.
probability:  [1.5560762e-03 9.9773127e-01 7.1273925e-04]
Label:  NEUTRAL 

