## OpenVINO optimizations for Question answering (QA) task


## Import the packages needed for successful execution

In [None]:
from transformers import pipeline
from datasets import load_dataset, load_metric
from optimum.intel.openvino import OVAutoModelForQuestionAnswering

from tqdm import tqdm

from pathlib import Path

# Required for OpenVINO conversion
output_dir = Path("ov_optimized_model")
base_model_name = "ov_model"
output_dir.mkdir(exist_ok=True)

# Paths where OpenVINO IR models will be stored
fp32_model_path = Path(output_dir / base_model_name).with_suffix(".xml")

seq_len = 256

### Instructions on conversion to OpenVINO
We will use the OpenVINO™ Integration with Optimum module to convert the PyTorch Question answering pre-trained model to an OpenVINO model object. <br>
We will then use Huggingface datasets and metric to evaluate the converted model.

In [None]:
model = 'bert-large-uncased-whole-word-masking-finetuned-squad'
ov_model = OVAutoModelForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad', from_pt=True)
qa_pipeline = pipeline("question-answering", model=ov_model, tokenizer='bert-large-uncased-whole-word-masking-finetuned-squad')
dataset = load_dataset("squad", split="validation")
metric = load_metric('squad')

for idx, batch in enumerate(tqdm(dataset, desc="Looping over validation data")):
    input_context = batch['context']
    input_questions = batch['question']
    references = batch['answers']
    preds = qa_pipeline(question=batch['question'], context=batch['context'])
    
    predictions = [{'prediction_text': preds['answer'], 'id': batch['id']}]
    references = [{'answers': batch['answers'], 'id': batch['id']}]
    metric.add_batch(predictions=predictions, references=references)

score = metric.compute()
print(f'Score for squad dataset: {score}')

### Save the converted model intermediate representation (IRs) for runtime performance evaluation

In [None]:
ov_model.save_pretrained(output_dir)

### Benchmark the converted model using the benchmark app
The OpenVINO toolkit provides a benchmarking application to gauge the platform specific runtime performance that can be obtained under optimal configuration parameters for a given model. For more details refer to: https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html

In [None]:
print('Benchmark OpenVINO model using the benchmark app')
! benchmark_app -m "$fp32_model_path" -d CPU -api async -t 10 -data_shape [1,"$seq_len"]