## OpenVINO optimizations for Question answering (QA) task


## Import the packages needed for successful execution

In [6]:
from transformers import pipeline
from datasets import load_dataset, load_metric
from optimum.intel.openvino import OVAutoModelForQuestionAnswering

from tqdm import tqdm

from pathlib import Path

# Required for OpenVINO conversion
output_dir = Path("ov_optimized_model")
base_model_name = "ov_model"
output_dir.mkdir(exist_ok=True)

# Paths where OpenVINO IR models will be stored
fp32_model_path = Path(output_dir / base_model_name).with_suffix(".xml")

seq_len = 256

### Instructions on conversion to OpenVINO
We will use the OV optimum module to convert the PyTorch Question answering pre-trained model to an OpenVINO model object. <br>
We will then use Huggingface datasets and metric to evaluate the converted model.

In [2]:
model = 'bert-large-uncased-whole-word-masking-finetuned-squad'
ov_model = OVAutoModelForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad', from_pt=True)
qa_pipeline = pipeline("question-answering", model=ov_model, tokenizer='bert-large-uncased-whole-word-masking-finetuned-squad')
dataset = load_dataset("squad", split="validation")
metric = load_metric('squad')

for idx, batch in enumerate(tqdm(dataset, desc="Looping over validation data")):
    input_context = batch['context']
    input_questions = batch['question']
    references = batch['answers']
    preds = qa_pipeline(question=batch['question'], context=batch['context'])
    
    predictions = [{'prediction_text': preds['answer'], 'id': batch['id']}]
    references = [{'answers': batch['answers'], 'id': batch['id']}]
    metric.add_batch(predictions=predictions, references=references)

score = metric.compute()
print(f'Score for squad dataset: {score}')

Exception raised from index_select_out_cpu_ at /pytorch/aten/src/ATen/native/TensorAdvancedIndexing.cpp:758 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7fd9a4c68a22 in /home/dkarkada/miniconda3/envs/optimumtests/lib/python3.8/site-packages/torch/lib/libc10.so)
frame #1: at::native::index_select_out_cpu_(at::Tensor const&, long, at::Tensor const&, at::Tensor&) + 0x2a9 (0x7fd9e88727d9 in /home/dkarkada/miniconda3/envs/optimumtests/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #2: at::native::index_select_cpu_(at::Tensor const&, long, at::Tensor const&) + 0x60 (0x7fd9e8875230 in /home/dkarkada/miniconda3/envs/optimumtests/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #3: <unknown function> + 0x1a00352 (0x7fd9e8fea352 in /home/dkarkada/miniconda3/envs/optimumtests/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #4: at::redispatch::index_select(c10::DispatchKeySet, at::Tensor const&, long, at::T

Score for squad dataset: {'exact_match': 86.5752128666036, 'f1': 92.93190288050819}


In [3]:
ov_model.save_pretrained(output_dir)

### Benchmark the converted model using the benchmark app

In [7]:
print('Benchmark OpenVINO model using the benchmark app')
! benchmark_app -m "$fp32_model_path" -d CPU -api async -t 10 -data_shape [1,"$seq_len"]

Benchmark OpenVINO model using the benchmark app
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[Step 1/11] Parsing and validating input arguments
[Step 2/11] Loading OpenVINO
[ INFO ] OpenVINO:
         API version............. 2022.1.0-7019-cdb9bec7210-releases/2022/1
[ INFO ] Device info
         CPU
         openvino_intel_cpu_plugin version 2022.1
         Build................... 2022.1.0-7019-cdb9bec7210-releases/2022/1

[Step 3/11] Setting device configuration
[Step 4/11] Reading network files
[ INFO ] Read model took 1014.44 ms
[Step 5/11] Resizing network to match image sizes and given batch
[ INFO ] Network batch size: 1
[Step 6/11] Configuring input of the model
[ INFO ] Model input 'input_ids' precision i32, dimensions ([...]): ? ?
[ INFO ] 