In [None]:
import pathlib

import numpy as np
import onnxruntime
import timm
import torch

from utils.torch2onnx import convert_to_onnx_model

In [None]:
N_CHANNELS = 3
INPUT_HEIGHT = 1024
INPUT_WIDTH = 1024

In [None]:
model_cpu = timm.create_model(model_name='resnet18')
model_cpu.eval()

model_gpu = timm.create_model(model_name='resnet18')
model_gpu.to('cuda:0')
model_gpu.eval()

In [None]:
onnxmodel, _ = convert_to_onnx_model(
                    model_cpu,
                    input_shape=(1, N_CHANNELS, INPUT_HEIGHT, INPUT_WIDTH),
                    output_names=['output'],
                    dynamic_axes={'input': {0: 'batch_size', 2: 'height', 3: 'width'}}
                )

In [None]:
input_array = np.random.randn(1, N_CHANNELS, INPUT_HEIGHT, INPUT_WIDTH).astype(np.float32)
input_tensor = torch.from_numpy(input_array)

### Pytorch Inference with CPU

In [None]:
%%timeit
with torch.inference_mode():
    result_torch = model_cpu(input_tensor)

### Pytorch Infenrence with GPU

In [None]:
%%timeit
with torch.inference_mode():
    _input_tensor = torch.from_numpy(input_array).to('cuda:0')
    result_torch = model_gpu(_input_tensor)

### ONNX Inference with CPUExecutionProvider

In [None]:
session_cpu = onnxruntime.InferenceSession(onnxmodel.SerializeToString(),
                                           providers=['CPUExecutionProvider']
                                           )

In [None]:
%%timeit
result_onnx = session_cpu.run(None, {'input': input_array})

### ONNX Inference with CudaExecutionProvider

In [None]:
session_gpu = onnxruntime.InferenceSession(onnxmodel.SerializeToString(),
                                           providers=['CudaExecutionProvider']
                                           )

In [None]:
%%timeit
result_onnx = session_gpu.run(None, {'input': input_array})

### OpenVINO Inference

In [None]:
import openvino.runtime as ov

core = ov.Core()
ovmodel = core.read_model(onnxmodel.SerializeToString())
compiled_model = core.compile_model(model=ovmodel, device_name='CPU')
infer_request = compiled_model.create_infer_request()

In [None]:
%%timeit
result_ov = infer_request.infer(input_tensor)

### OpenVINO Async Inference

In [None]:
infer_requests = []
for i in range(4):
    infer_requests.append(compiled_model.create_infer_request())


for ir in infer_requests:
        ir.start_async(input_array, share_inputs=True)
        
        
preds = []
for ir in infer_requests:
    ir.wait()
    preds.append(ir.results['output'])

preds = np.stack(preds).mean(axis=0)