## OpenVINO optimizations for Text classification task


## Import the packages needed for successful execution

In [None]:
from transformers import AutoConfig, AutoTokenizer, default_data_collator
from datasets import load_dataset, load_metric
from optimum.intel.openvino import OVAutoModelForSequenceClassification

from torch.utils.data import DataLoader

from tqdm import tqdm

### Instructions on conversion to OpenVINO
We will use the OpenVINO™ Integration with Optimum module to convert the sentiment classification model to an OpenVINO model object. <br>
We will then use Huggingface datasets and metric to evaluate the converted model.

In [None]:
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ov_model = OVAutoModelForSequenceClassification.from_pretrained(model_name, config=config, from_pt=True)
ov_model.save_pretrained('saved_model')

### Preprocess function for the dataset


In [None]:
def preprocess_function(examples):
    result =  tokenizer(examples['sentence'], padding='max_length', max_length=128, truncation=True)
    result["labels"] = examples["label"]
    return result

### Evaluating the model performance

In [None]:
dataset = load_dataset('sst2')
metric = load_metric('f1')

dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
val_dataloader = DataLoader(
       dataset['validation'], shuffle=True, collate_fn=default_data_collator
    )

for idx, batch in enumerate(tqdm(val_dataloader, desc="Looping over validation data")):
    outputs = ov_model(input_ids=batch['input_ids'].numpy(), attention_mask=batch['attention_mask'].numpy())
    preds = outputs[0].argmax()
    references = batch['labels'].numpy()
    metric.add_batch(predictions=[preds], references=[references])

ov_score = metric.compute()
print(f'Score for SST2 dataset with OV Optimum: {ov_score}')

### Benchmark the converted model using the benchmark app
The OpenVINO toolkit provides a benchmarking application to gauge the platform specific runtime performance that can be obtained under optimal configuration parameters for a given model. For more details refer to: https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html

In [None]:
base_model_name = 'saved_model/ov_model.xml'
print('Benchmark OpenVINO model using the benchmark app')
! benchmark_app -m "$base_model_name" -d CPU -api async -t 10 -hint latency