# Benchmark of inference tools on 4CPU

Comparisson of different tools for model inference on 4 CPU.

|                     |                                         |
|---------------------|-------------------------------------------|
|**Hard ware**         | Intel Ice Lake with 4 CPUs and 16 Gb RAM.  |
|**Software Platform**| host |
|**Tools to compare**| Torch Script, Torch Trace, ONNX, Open Vino|

### Common

In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import timm
import torch as th
import typing as tp
import numpy as np
import cv2
import yaml
import json
import pandas as pd

from src.utils import get_batch, MAX_UINT8, benchmark


DATA_DIR = "./images"
BATCH_SIZES = [1,2,4,8,16]
PLATFORM = "4cpu"
WARMUP = 5
N_RUNS = 10
VERBOSE = False
N_PRINT = 5
DEVICE = "cpu"

In [3]:
# batches = dict()
# for batch_sz in BATCH_SIZES:
#     batch = get_batch(DATA_DIR, batch_sz)
#     batches[batch_sz] = batch
#     del batch

In [4]:
inference_records = []

### Torch Model

In [5]:
from src.model.torch import ModelTorch

In [6]:
records = []
with open("./config/torch.yaml") as fp:
    cfg_torch = yaml.safe_load(fp)

cfg_torch["device"] = DEVICE
model_torch = ModelTorch(cfg_torch)
for batch_sz in BATCH_SIZES:
    avg, stdev = benchmark(
        model=model_torch,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "Torch"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.040505,4cpu,1,Torch
1,0.067429,4cpu,2,Torch
2,0.140837,4cpu,4,Torch
3,0.273973,4cpu,8,Torch
4,0.575115,4cpu,16,Torch


### TorchScript Model

In [7]:
from src.model.torch_jit import ModelTorchJIT

tmp = th.jit.script(model_torch.model)
th.jit.save(tmp, "weights/model_scripted.th")

with open("./config/torch_scripted.yaml") as fp:
    cfg_scripted = yaml.safe_load(fp)
cfg_scripted["device"] = DEVICE
model_scripted = ModelTorchJIT(cfg_scripted)
records = []
for batch_sz in BATCH_SIZES:
    avg, stdev = benchmark(
        model=model_scripted,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "TorchScript"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.039361,4cpu,1,TorchScript
1,0.064719,4cpu,2,TorchScript
2,0.139555,4cpu,4,TorchScript
3,0.267235,4cpu,8,TorchScript
4,0.54952,4cpu,16,TorchScript


### TorchTrace Model

In [8]:
from src.model.torch_jit import ModelTorchJIT

In [9]:
tmp = th.jit.trace(
    model_torch.model,
    th.rand(1, 3, MAX_UINT8, MAX_UINT8).to(th.float32)
)
th.jit.save(tmp, "weights/model_traced.th")

In [10]:
with open("./config/torch_traced.yaml") as fp:
    cfg_traced = yaml.safe_load(fp)
cfg_traced["device"] = DEVICE
model_traced = ModelTorchJIT(cfg_traced)
records = []
for batch_sz in BATCH_SIZES:
    avg, _=benchmark(
        model=model_traced,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "TorchTrace"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.039563,4cpu,1,TorchTrace
1,0.066081,4cpu,2,TorchTrace
2,0.136957,4cpu,4,TorchTrace
3,0.280754,4cpu,8,TorchTrace
4,0.551547,4cpu,16,TorchTrace


### ONNX Model

In [11]:
from src.model.onnx import ModelONNX

In [12]:
th.onnx.export(
    model_torch.model,
    th.rand(1, 3, MAX_UINT8, MAX_UINT8).to(th.float32),
    "weights/model.onnx",
    verbose=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes = {
        'input': [0], 
        'output': [0]
    },
)



Exported graph: graph(%input : Float(*, 3, 255, 255, strides=[195075, 65025, 255, 1], requires_grad=0, device=cpu),
      %fc.weight : Float(17, 512, strides=[512, 1], requires_grad=1, device=cpu),
      %fc.bias : Float(17, strides=[1], requires_grad=1, device=cpu),
      %onnx::Conv_193 : Float(64, 3, 7, 7, strides=[147, 49, 7, 1], requires_grad=0, device=cpu),
      %onnx::Conv_194 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_196 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_197 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_199 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_200 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_202 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_203 : Float(64, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_205 : Float(64

In [13]:
with open("./config/onnx_cpu.yaml") as fp:
    cfg_onnx = yaml.safe_load(fp)
cfg_onnx["device"] = DEVICE
model_onnx = ModelONNX(cfg_onnx)
records = []
for batch_sz in BATCH_SIZES:
    avg, _ = benchmark(
        model=model_onnx,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "ONNX"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.018597,4cpu,1,ONNX
1,0.036123,4cpu,2,ONNX
2,0.071911,4cpu,4,ONNX
3,0.141841,4cpu,8,ONNX
4,0.281526,4cpu,16,ONNX


### OpenVino Model

In [14]:
# bash command to convert ONNX -> OpenVino
!mo --input_model weights/model.onnx --output_dir weights/openvino

Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
[ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
[ SUCCESS ] Generated IR version 11 model.
[ SUCCESS ] XML file: /home/fatuus/deepschool-cvr-conversion/weights/openvino/model.xml
[ SUCCESS ] BIN file: /home/fatuus/deepschool-cvr-conversion/weights/openvino/model.bin


In [15]:
from src.model.openvino import ModelOpenVino

In [16]:
with open("./config/openvino.yaml") as fp:
    cfg_ov = yaml.safe_load(fp)
cfg_ov["device"] = DEVICE

model_ov = ModelOpenVino(cfg_ov)
records = []
for batch_sz in BATCH_SIZES:
    avg, _ = benchmark(
        model=model_ov,
        input_shape=(batch_sz, 3, MAX_UINT8, MAX_UINT8),
        nwarmup=WARMUP,
        nruns=N_RUNS,
        print_step=N_PRINT,
        verbose=VERBOSE
    )
    records.append(
        {
            "time": avg,
            "platform": PLATFORM, 
            "batch_sz": batch_sz,
            "tool": "OpenVino"
        }
    )
inference_records.extend(records)
pd.DataFrame(records)


Unnamed: 0,time,platform,batch_sz,tool
0,0.054006,4cpu,1,OpenVino
1,0.092741,4cpu,2,OpenVino
2,0.18327,4cpu,4,OpenVino
3,0.365141,4cpu,8,OpenVino
4,0.729367,4cpu,16,OpenVino


In [17]:
with open(
    "results/inference_results_4cpu.json", 
    "w",
    encoding="utf8"
) as fp:
    json.dump(fp=fp, obj=inference_records)

In [18]:
pd.DataFrame(inference_records)

Unnamed: 0,time,platform,batch_sz,tool
0,0.040505,4cpu,1,Torch
1,0.067429,4cpu,2,Torch
2,0.140837,4cpu,4,Torch
3,0.273973,4cpu,8,Torch
4,0.575115,4cpu,16,Torch
5,0.039361,4cpu,1,TorchScript
6,0.064719,4cpu,2,TorchScript
7,0.139555,4cpu,4,TorchScript
8,0.267235,4cpu,8,TorchScript
9,0.54952,4cpu,16,TorchScript
