# [How to run Keras model inference x3 times faster with CPU and Intel OpenVINO](https://www.dlology.com/blog/how-to-run-keras-model-inference-x3-times-faster-with-cpu-and-intel-openvino-1/) | DLology Blog
Run the `setupvars.bat` before calling `jupyter notebook`.
```
C:\Intel\computer_vision_sdk\bin\setupvars.bat
```
Or in Linux
add the following line to `~/.bashrc`
```
source ~/intel/computer_vision_sdk/bin/setupvars.sh
```

For some steps, ipython kernel will **restart** before preceding to mimic a fresh start.


# PART 1: Using integrated GPUs via Plaidml

## Save the Keras model as a single .h5 file

For the tutorial, we will load a pre-trained ImageNet classification InceptionV3 model from Keras, 

In [1]:
import os

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3 as Net
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
import numpy as np

# Optional image to test model prediction.
img_path = './data/elephant.jpg'
model_path = './model'

# Path to save the model h5 file.
model_fname = os.path.join(model_path, 'model.h5')

os.makedirs(model_path, exist_ok=True)

img_height = 299

model = Net(weights='imagenet')


# Load the image for prediction.
img = image.load_img(img_path, target_size=(img_height, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
# Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]

# Save the h5 file to path specified.
model.save(model_fname)

Predicted: [('n02504458', 'African_elephant', 0.9739232), ('n01871265', 'tusker', 0.00807269), ('n02504013', 'Indian_elephant', 0.003881058)]


### Benchmark Keras prediction speed on CPU.

In [2]:
import time
times = []
for i in range(10):
    start_time = time.time()
    preds = model.predict(x)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))

# Clear any previous session.
tf.keras.backend.clear_session()

average(sec):1.1168538093566895,fps:0.8953723321909091


## Benchmark Keras prediction speed on integrated GPU via plaidml.

#### run plaidml-setup from commandline and select opencl_intel_intel(r)_gen9_hd_graphics_neo.0

In [3]:
import os
# to use plaidml backend (which is set to integrated GPUs in plaidml setup:  $ plaidml-setup)
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
import tensorflow as tf
import keras
import keras.applications as kapp
Net = kapp.InceptionV3(weights='imagenet')

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
import numpy as np

# Optional image to test model prediction.
img_path = './data/elephant.jpg'
model_path = './model'

# Path to save the model h5 file.
model_fname = os.path.join(model_path, 'model.h5')

os.makedirs(model_path, exist_ok=True)

img_height = 299

model = Net

# Load the image for prediction.
img = image.load_img(img_path, target_size=(img_height, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
# Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]


Using plaidml.keras.backend backend.
INFO:plaidml:Opening device "opencl_intel_intel(r)_gen9_hd_graphics_neo.0"
INFO:plaidml:Analyzing Ops: 34 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 135 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 215 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 308 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 377 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 657 of 1145 operations complete
INFO:plaidml:Analyzing Ops: 1065 of 1145 operations complete


Predicted: [('n02504458', 'African_elephant', 0.9368174), ('n01871265', 'tusker', 0.02795167), ('n02504013', 'Indian_elephant', 0.007957166)]


In [4]:
import time
times = []
for i in range(10):
    start_time = time.time()
    preds = model.predict(x)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))

# Clear any previous session.
tf.keras.backend.clear_session()

average(sec):1.0308536052703858,fps:0.9700698478303394


# Part 2: Using CPU, integrated GPU and Myriad-X via OPENVINO

## Freeze the graph to a single TensorFlow .pb file for inference

This step removes any layers and operations not necessary for inference.

In [5]:
# force reset ipython namespaces
%reset -f

import tensorflow as tf
from tensorflow.python.framework import graph_io
from tensorflow.keras.models import load_model


# Clear any previous session.
tf.keras.backend.clear_session()

save_pb_dir = './model'
model_fname = './model/model.h5'
def freeze_graph(graph, session, output, save_pb_dir='.', save_pb_name='frozen_model.pb', save_pb_as_text=False):
    with graph.as_default():
        graphdef_inf = tf.graph_util.remove_training_nodes(graph.as_graph_def())
        graphdef_frozen = tf.graph_util.convert_variables_to_constants(session, graphdef_inf, output)
        graph_io.write_graph(graphdef_frozen, save_pb_dir, save_pb_name, as_text=save_pb_as_text)
        return graphdef_frozen

# This line must be executed before loading Keras model.
tf.keras.backend.set_learning_phase(0) 

model = load_model(model_fname)

session = tf.keras.backend.get_session()

INPUT_NODE = [t.op.name for t in model.inputs]
OUTPUT_NODE = [t.op.name for t in model.outputs]
print(INPUT_NODE, OUTPUT_NODE)
frozen_graph = freeze_graph(session.graph, session, [out.op.name for out in model.outputs], save_pb_dir=save_pb_dir)

ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name more can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name less can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name man can't be aliased because it is another magic command.






['input_1'] ['predictions/Softmax']
INFO:tensorflow:Froze 378 variables.


INFO:tensorflow:Froze 378 variables.


INFO:tensorflow:Converted 378 variables to const ops.


INFO:tensorflow:Converted 378 variables to const ops.


In [6]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, None, None, 3 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, None, None, 3 0           batch_normalization[0][0]        
__________________________________________________________________________________________________
conv2d_1 (

batch_normalization_86 (BatchNo (None, None, None, 3 1152        conv2d_86[0][0]                  
__________________________________________________________________________________________________
batch_normalization_90 (BatchNo (None, None, None, 3 1152        conv2d_90[0][0]                  
__________________________________________________________________________________________________
activation_86 (Activation)      (None, None, None, 3 0           batch_normalization_86[0][0]     
__________________________________________________________________________________________________
activation_90 (Activation)      (None, None, None, 3 0           batch_normalization_90[0][0]     
__________________________________________________________________________________________________
conv2d_87 (Conv2D)              (None, None, None, 3 442368      activation_86[0][0]              
__________________________________________________________________________________________________
conv2d_88 

## OpenVINO model optimizer (try experimenting with the precision)

In [17]:
# force reset ipython namespaces
%reset -f

import platform
is_win = 'windows' in platform.platform().lower()

if is_win:
    mo_tf_path = 'C:/Intel/computer_vision_sdk/deployment_tools/model_optimizer/mo_tf.py'
else:
    # mo_tf.py path in Linux
    mo_tf_path = '/opt/intel/openvino/computer_vision_sdk/deployment_tools/model_optimizer/mo_tf.py'

pb_file = './model/frozen_model.pb'
output_dir = './model'
img_height = 299
input_shape = [1,img_height,img_height,3]
input_shape_str = str(input_shape).replace(' ','')
input_shape_str

ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name more can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name less can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name man can't be aliased because it is another magic command.


'[1,299,299,3]'

Use python3.5+

In [18]:
!python {mo_tf_path} --input_model {pb_file} --output_dir {output_dir} --input_shape {input_shape_str} --data_type FP16

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	/home/upsquared/Downloads/keras_openvino/./model/frozen_model.pb
	- Path for generated IR: 	/home/upsquared/Downloads/keras_openvino/./model
	- IR output name: 	frozen_model
	- Log level: 	ERROR
	- Batch: 	Not specified, inherited from the model
	- Input layers: 	Not specified, inherited from the model
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	[1,299,299,3]
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP16
	- Enable fusing: 	True
	- Enable grouped convolutions fusing: 	True
	- Move mean values to preprocess section: 	False
	- Reverse input channels: 	False
TensorFlow specific parameters:
	- Input model in text protobuf format: 	False
	- Offload unsupported operations: 	False
	- Path to model dump for TensorBoard: 	None
	- List of shared libraries with TensorFlow custom layers implementation: 	None
	- Update 

## Inference test with OpenVINO Inference Engine(IE)

Check path like `C:\Intel\computer_vision_sdk\python\python3.5` or `~/intel/computer_vision_sdk/python/python3.5` exists in `PYTHONPATH`.

In [19]:
# force reset ipython namespaces
%reset -f

import platform
is_win = 'windows' in platform.platform().lower()
if is_win:
    message = 'Please run `C:\\Intel\\computer_vision_sdk\\bin\\setupvars.bat` before launching jupyter notebook.'
else:
    message = 'Add the following line to ~/.bashrc and re-run jupyternotebook.\nsource ~/intel/computer_vision_sdk/bin/setupvars.sh'

import os
assert 'computer_vision_sdk' in os.environ['PYTHONPATH'], message

ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name more can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name less can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name man can't be aliased because it is another magic command.


In [20]:
from PIL import Image
import numpy as np
try:
    from openvino import inference_engine as ie
    from openvino.inference_engine import IENetwork, IEPlugin
except Exception as e:
    exception_type = type(e).__name__
    print("The following error happened while importing Python API module:\n[ {} ] {}".format(exception_type, e))
    sys.exit(1)

In [21]:
def pre_process_image(imagePath, img_height=299):
    # Model input format
    n, c, h, w = [1, 3, img_height, img_height]
    image = Image.open(imagePath)
    processedImg = image.resize((h, w), resample=Image.BILINEAR)

    # Normalize to keep data between 0 - 1
    processedImg = (np.array(processedImg) - 0) / 255.0

    # Change data layout from HWC to CHW
    processedImg = processedImg.transpose((2, 0, 1))
    processedImg = processedImg.reshape((n, c, h, w))

    return image, processedImg, imagePath

## Try changing the hardware backend

In [26]:
# Plugin initialization for specified device and load extensions library if specified.
plugin_dir = None
model_xml = './model/frozen_model.xml'
model_bin = './model/frozen_model.bin'
# Devices: GPU (intel), CPU, MYRIAD
plugin = IEPlugin("MYRIAD", plugin_dirs=plugin_dir)
# Read IR
net = IENetwork(model=model_xml, weights=model_bin)
assert len(net.inputs.keys()) == 1
assert len(net.outputs) == 1
input_blob = next(iter(net.inputs))
out_blob = next(iter(net.outputs))
# Load network to the plugin
exec_net = plugin.load(network=net)
del net

In [27]:
# Run inference
fileName = './data/elephant.jpg'
image, processedImg, imagePath = pre_process_image(fileName)
res = exec_net.infer(inputs={input_blob: processedImg})
# Access the results and get the index of the highest confidence score
output_node_name = list(res.keys())[0]
res = res[output_node_name]
idx = np.argsort(res[0])[-1]
idx

386

In [28]:
from tensorflow.keras.applications.inception_v3 import decode_predictions
print('Predicted:', decode_predictions(res, top=3)[0])

Predicted: [('n02504458', 'African_elephant', 0.91015625), ('n01871265', 'tusker', 0.071899414), ('n02504013', 'Indian_elephant', 0.016296387)]


In [29]:
import time
times = []
for i in range(10):
    start_time = time.time()
    res = exec_net.infer(inputs={input_blob: processedImg})
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print('average(sec):{},fps:{}'.format(mean_delta,fps))

average(sec):0.08649892807006836,fps:11.560836906440635
