## OpenVINO optimizations for language modeling task


In [None]:
# Install openvino-optimum if not installed already
! pip install openvino-optimum

## Import the packages needed for successful execution

In [None]:
from transformers import AutoConfig, AutoTokenizer
from optimum.intel.openvino import OVAutoModelWithLMHead

### Instructions on conversion to OpenVINO
We will use the OpenVINO™ Integration with Optimum module to convert the BERT-Base, Multilingual Uncased model to an OpenVINO model object. <br>

In [None]:
model_name = 'bert-base-multilingual-uncased'
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ov_model = OVAutoModelWithLMHead.from_pretrained(model_name, config=config, from_pt=True)
ov_model.save_pretrained('bert-base-multilingual_OV_IR')

#### Evaluate the model by comparing to the results on the HF model card: https://huggingface.co/bert-base-multilingual-uncased

In [None]:
from transformers import pipeline
unmasker = pipeline('fill-mask', model=ov_model, tokenizer=tokenizer)
unmasker("Hello I'm a [MASK] model.")

### Benchmark the converted model using the benchmark app
The OpenVINO toolkit provides a benchmarking application to gauge the platform specific runtime performance that can be obtained under optimal configuration parameters for a given model. For more details refer to: https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html

In [None]:
base_model_name = 'bert-base-multilingual_OV_IR/ov_model.xml'

# Set the sequence length for benchmarking
seq_len = 128

print('Benchmark OpenVINO model using the benchmark app')
! benchmark_app -m "$base_model_name" -d CPU -api async -t 10 -hint latency -shape [1,"$seq_len"]