## OpenVINO optimizations for Text classification task


## Import the packages needed for successful execution

In [None]:
from transformers import AutoModel, AutoConfig, AutoTokenizer, AutoModel
from optimum.intel.openvino import OVAutoModel

import torch

### Load models

In [None]:
model_name = 'sentence-transformers/roberta-base-nli-stsb-mean-tokens'

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)

pt_model =  AutoModel.from_pretrained(model_name)

ov_xml_model_path = 'quantization/NNCF_optimized_model'
ov_model = OVAutoModel.from_pretrained(ov_xml_model_path, config=config, from_ov=True)

### Test Cosine similarity with sentence transformers


In [None]:
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

def get_cosine_similarity(model, inputs):
    model_output = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
    sentence_embeddings = mean_pooling(model_output, inputs['attention_mask'])
    cos_sim = torch.nn.CosineSimilarity(dim=0)
    output=cos_sim(sentence_embeddings[0],sentence_embeddings[1])
    return output

sent_pairs = [['This is a happy day', 'This is a happy day'], ['This is a happy day', 'This is an example sentence'], ['That is a happy person', 'That is a happy dog'], ['This is a happy day', 'This is not a good sentence']]

for sent_pair in sent_pairs:
    encoded_inputs = tokenizer(sent_pair, padding="max_length", truncation=True, return_tensors='pt')

    print(f' Sentence pair: {sent_pair}')

    pt_cos_sim = get_cosine_similarity(pt_model, encoded_inputs)
    ov_cos_sim = get_cosine_similarity(ov_model, encoded_inputs)
    print(f'Cosine similarity score with PyTorch: {pt_cos_sim}; Cosine similarity score with OV: {ov_cos_sim} \n')


### Benchmark the converted model using the benchmark app
The OpenVINO toolkit provides a benchmarking application to gauge the platform specific runtime performance that can be obtained under optimal configuration parameters for a given model. For more details refer to: https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html

In [None]:
print('Benchmark OpenVINO model using the benchmark app')
base_model_name = "{}/ov_model.xml".format(ov_xml_model_path)
! benchmark_app -m "$base_model_name" -d CPU -api async -t 10 -hint latency