In [1]:
import os
import sys
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook

import onnx
from onnxsim import simplify
import onnxruntime
from onnxruntime.quantization import (quantize_dynamic,
                                      QuantType,
                                      QuantFormat,
                                      quantize_static,
                                      CalibrationDataReader)

sys.path.append('../')
from platforms.core.config import cfg
from siamfcpp.pipeline.utils import (cxywh2xywh, get_crop,
                                     get_subwindow_tracking,
                                     xywh2cxywh, xyxy2cxywh)
from siamfcpp.model.common_opr.common_block import xcorr_depthwise

In [2]:
def to_bchw(im_patch):
    im_patch = im_patch.transpose(2, 0, 1)
    im_patch = im_patch[np.newaxis, :, :, :]
    return im_patch.astype(np.float32)

### Dynamic

In [3]:
backbone_init_fp32 = "../models/onnx/backbone_init.onnx"
backbone_init_uint8 = "../models/onnx_dynamic/backbone_init.onnx"

backbone_fp32 = "../models/onnx/backbone.onnx"
backbone_uint8 = "../models/onnx_dynamic/backbone.onnx"

head_fp32 = "../models/onnx/head_5.onnx"
head_uint8 = "../models/onnx_dynamic/head_5.onnx"

In [4]:
# quantized_model = quantize_dynamic(backbone_init_fp32, backbone_init_uint8, weight_type=QuantType.QUInt8)
# quantized_model = quantize_dynamic(backbone_fp32, backbone_uint8, weight_type=QuantType.QUInt8)
quantized_model = quantize_dynamic(head_fp32, head_uint8, weight_type=QuantType.QUInt8)

### Static

*backbone_init*

In [3]:
model_fp32 = "../models/onnx/backbone_init.onnx"
model_quant = "../models/onnx_static/backbone_init.onnx"

In [4]:
calibration_image_folder = "C:\\Users\\isd.illia.maliha\\work\\sorted_datasets\\background"

In [5]:
def _preprocess_images(images_folder: str, size_limit=0):
    image_names = os.listdir(images_folder)
    
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
        
    unconcatenated_batch_data = []


    for image_name in tqdm_notebook(batch_filenames[:1]):
        image_filepath = images_folder + "/" + image_name
        image = cv2.imread(image_filepath).astype(np.float32)
        
        h,w,_ = image.shape
        x = np.random.randint(0,0.7*w)
        y = np.random.randint(0,0.7*h)
        ww = np.random.randint(25,w-x)
        hh = np.random.randint(25,h-y)
        
        box = xywh2cxywh([x,y,ww,hh])
        target_pos, target_sz = box[:2], box[2:]

        avg_chans = np.mean(image, axis=(0, 1))

        im_z_crop, _ = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            avg_chans=avg_chans,
            context_amount=0.5,
            func_get_subwindow=get_subwindow_tracking,
        )

        im_z_crop = to_bchw(im_z_crop)

        unconcatenated_batch_data.append(im_z_crop)
        
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    
    return batch_data


class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str):
        self.enum_data = None

        # Use inference session to get input shape.
        session = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])

        # Convert image to input data
        self.nchw_data_list = _preprocess_images(calibration_image_folder, size_limit=0)
        
        self.input_name = session.get_inputs()[0].name
        self.datasize = len(self.nchw_data_list)
    

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter(
                [{self.input_name: self.nchw_data_list[idx]} for idx in range(self.datasize)]
            )
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None

In [6]:
reader = DataReader(calibration_image_folder, model_fp32)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if sys.path[0] == '':


  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
quantized_model = quantize_static(model_fp32,
                                  model_quant,
                                  reader,
                                  quant_format=QuantFormat.QDQ,
                                  activation_type=QuantType.QInt8,
                                  weight_type=QuantType.QInt8,)

-------

---------------------------

-----------------------------

*backbone*

In [8]:
model_fp32 = "../models/onnx/backbone.onnx"
model_quant = "../models/onnx_static/backbone.onnx"

In [9]:
calibration_image_folder = "C:\\Users\\isd.illia.maliha\\work\\sorted_datasets\\background"

In [10]:
def _preprocess_images(images_folder: str, size_limit=0):
    image_names = os.listdir(images_folder)
    
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
        
    unconcatenated_batch_data = []


    for image_name in tqdm_notebook(batch_filenames[:1]):
        image_filepath = images_folder + "/" + image_name
        image = cv2.imread(image_filepath).astype(np.float32)
        
        h,w,_ = image.shape
        x = np.random.randint(0,0.7*w)
        y = np.random.randint(0,0.7*h)
        ww = np.random.randint(25,w-x)
        hh = np.random.randint(25,h-y)
        
        box = xywh2cxywh([x,y,ww,hh])
        target_pos, target_sz = box[:2], box[2:]

        avg_chans = np.mean(image, axis=(0, 1))

        im_x_crop, scale_x = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            x_size=303,
            avg_chans=avg_chans,
            context_amount=cfg.context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )

        im_x_crop = to_bchw(im_x_crop)

        unconcatenated_batch_data.append(im_x_crop)
        
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    
    return batch_data


class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str):
        self.enum_data = None

        # Use inference session to get input shape.
        session = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])

        # Convert image to input data
        self.nchw_data_list = _preprocess_images(calibration_image_folder, size_limit=0)
        
        self.input_name = session.get_inputs()[0].name
        self.datasize = len(self.nchw_data_list)
    

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter(
                [{self.input_name: self.nchw_data_list[idx]} for idx in range(self.datasize)]
            )
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None

In [11]:
reader = DataReader(calibration_image_folder, model_fp32)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if sys.path[0] == '':


  0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
quantized_model = quantize_static(model_fp32,
                                  model_quant,
                                  reader,
                                  quant_format=QuantFormat.QDQ,
                                  activation_type=QuantType.QInt8,
                                  weight_type=QuantType.QInt8,)

-----------------------

--------------------

------------------

*head*

In [8]:
backbone_init_fp32 = "../models/onnx/backbone_init.onnx"
backbone_fp32 = "../models/onnx/backbone.onnx"
model_fp32 = "../models/onnx/head_5.onnx"
model_opt = "../models/onnx/head_opt.onnx"

model_quant = "../models/onnx_static/head_5.onnx"

In [9]:
calibration_image_folder = "C:\\Users\\isd.illia.maliha\\work\\sorted_datasets\\background"

In [10]:
def _preprocess_images(backbone_init_path: str, backbone_path: str, images_folder: str, size_limit=0):
    
    bone_init = onnxruntime.InferenceSession(backbone_init_path, providers=['CPUExecutionProvider'])
    bone = onnxruntime.InferenceSession(backbone_path, providers=['CPUExecutionProvider'])
    
    image_names = os.listdir(images_folder)
    
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
        
    in1,in2 = [],[]
    

    for image_name in tqdm_notebook(batch_filenames[:1]):
        image_filepath = images_folder + "/" + image_name
        image = cv2.imread(image_filepath).astype(np.float32)
        
        h,w,_ = image.shape
        x = np.random.randint(0,0.7*w)
        y = np.random.randint(0,0.7*h)
        ww = np.random.randint(25,w-x)
        hh = np.random.randint(25,h-y)
        
        box = xywh2cxywh([x,y,ww,hh])
        target_pos, target_sz = box[:2], box[2:]

        avg_chans = np.mean(image, axis=(0, 1))

        im_z_crop, _ = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            avg_chans=avg_chans,
            context_amount=0.5,
            func_get_subwindow=get_subwindow_tracking,
        )
        im_z_crop = to_bchw(im_z_crop)
        
        c_z_k, r_z_k = bone_init.run(None, {'input': im_z_crop})
    
    

        im_x_crop, scale_x = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            x_size=303,
            avg_chans=avg_chans,
            context_amount=cfg.context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        im_x_crop = to_bchw(im_x_crop)
    
        c_x, r_x = bone.run(None, {'input': im_x_crop})
        
        c_out = xcorr_depthwise(torch.Tensor(c_x), torch.Tensor(c_z_k))
        r_out = xcorr_depthwise(torch.Tensor(r_x), torch.Tensor(r_z_k))
        
        in1.append(c_out.numpy())
        in2.append(r_out.numpy())

        
    batch_data1 = np.concatenate(np.expand_dims(in1, axis=0), axis=0)
    batch_data2 = np.concatenate(np.expand_dims(in2, axis=0), axis=0)
    
    return [batch_data1, batch_data2]


class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str, backbone_init_path: str, backbone_path: str):
        self.enum_data = None

        # Use inference session to get input shape.
        session = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])

        # Convert image to input data
        self.nchw_data_list = _preprocess_images(backbone_init_path, backbone_path, calibration_image_folder, size_limit=0)
        
        self.input_names = [session.get_inputs()[i].name for i in range(2)]
        self.datasize = len(self.nchw_data_list[0])
    

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter(
                [{self.input_names[0]: self.nchw_data_list[0][idx],
                  self.input_names[1]: self.nchw_data_list[1][idx]} for idx in range(self.datasize)]
            )
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None

In [11]:
!python -m onnxruntime.quantization.preprocess --input "../models/onnx/head_5.onnx" --output "../models/onnx/head_opt.onnx"

In [12]:
reader = DataReader(calibration_image_folder, model_opt, backbone_init_fp32, backbone_fp32)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for image_name in tqdm_notebook(batch_filenames[:1]):


  0%|          | 0/1 [00:00<?, ?it/s]

In [13]:
quantized_model = quantize_static(model_opt,
                                  model_quant,
                                  reader,
                                  quant_format=QuantFormat.QDQ,
                                  activation_type=QuantType.QInt8,
                                  weight_type=QuantType.QInt8,)

----------------------

#### One input

In [14]:
from siamfcpp.config.config_head import cfg as root_cfg
from siamfcpp.config.config_head import specify_task 
from siamfcpp.engine.builder import build as tester_builder
from siamfcpp.model import builder_head as model_builder
from siamfcpp.pipeline import builder as pipeline_builder
from siamfcpp.utils import complete_path_wt_root_in_cfg

In [15]:
config = '../models/siamfcpp/test/vot/siamfcpp_alexnet.yaml'
model_path = '../models/snapshots/siamfcpp_alexnet-got/epoch-17.pkl'

In [16]:
model_one = "../models/onnx/head_5_one.onnx"
model_fp32 = "../models/onnx/head_5.onnx"

In [17]:
exp_cfg_path = os.path.realpath(config)
root_cfg.merge_from_file(exp_cfg_path)
root_cfg.test.track.model.task_model.SiamTrack.pretrain_model_path = model_path

root_cfg = root_cfg.test
task, task_cfg = specify_task(root_cfg)
task_cfg.freeze()

model = model_builder.build("track", task_cfg.model)
torch_model = model.head
torch_model.eval()

onnx_model = onnxruntime.InferenceSession(model_fp32, providers=['CPUExecutionProvider'])

2023-03-14 10:32:43.200 | INFO     | siamfcpp.model.module_base:update_params:60 - Load pretrained SiamTrack parameters from: ../models/snapshots/siamfcpp_alexnet-got/epoch-17.pkl whose md5sum is 2a050730626f1b083baed91f9a5c4a52


In [18]:
inp1 = torch.Tensor(np.random.uniform(size=(1,256,23,23)).astype(np.float32))
inp2 = torch.Tensor(np.random.uniform(size=(1,256,23,23)).astype(np.float32))

In [19]:
torch_out2 = model.head(inp1, inp2) 
onnx_out2 = onnx_model.run(None, {'input1': inp1.numpy(),
                                  'input2': inp2.numpy()})


for idx in range(4):
    print(torch.sum(torch_out2[idx]-torch.Tensor(onnx_out2[idx])))

tensor(-3.69548798e-06, grad_fn=<SumBackward0>)
tensor(-1.51991844e-05, grad_fn=<SumBackward0>)
tensor(-7.90357590e-05, grad_fn=<SumBackward0>)
tensor(-9.24681081e-05, grad_fn=<SumBackward0>)


In [20]:
class MyModel(nn.Module):
    def __init__(self, main_model):
        super(MyModel, self).__init__()
        self.main_model = main_model
        
    def forward(self, x):
        x1,x2 = torch.split(x, 256, dim=1)
        out = self.main_model(x1,x2)
        return out

In [21]:
inp = torch.cat([inp1.clone(), inp2.clone()], dim=1)

model = MyModel(torch_model)
model.eval()

torch_out1 = model(inp)

for idx in range(4):
    print(torch.sum(torch_out2[idx]-torch_out1[idx]))

tensor(0., grad_fn=<SumBackward0>)
tensor(0., grad_fn=<SumBackward0>)
tensor(0., grad_fn=<SumBackward0>)
tensor(0., grad_fn=<SumBackward0>)


In [22]:
torch.onnx.export(model, 
                  inp,
                  model_one, 
                  input_names=['input'],
                  output_names=['csl_score', 'ctr_score', 'offsets', 'fea'],
                  verbose=True, 
                  export_params=True, 
                  do_constant_folding=True,
                  opset_version=11) 

simplified_model, check = simplify(model_one, skip_fuse_bn=False)
onnx.save_model(simplified_model, model_one)

graph(%input : Float(1, 512, 23, 23, strides=[270848, 529, 23, 1], requires_grad=0, device=cpu),
      %main_model.cls_p5_conv1.conv.weight : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=1, device=cpu),
      %main_model.cls_p5_conv1.conv.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %main_model.bbox_p5_conv1.conv.weight : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=1, device=cpu),
      %main_model.bbox_p5_conv1.conv.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %main_model.cls_p5_conv2.conv.weight : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=1, device=cpu),
      %main_model.cls_p5_conv2.conv.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %main_model.bbox_p5_conv2.conv.weight : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=1, device=cpu),
      %main_model.bbox_p5_conv2.conv.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %87 : Float(256, 

In [23]:
onnx_model_one = onnxruntime.InferenceSession(model_one, providers=['CPUExecutionProvider'])

In [24]:
onnx_out1 = onnx_model_one.run(None, {'input': inp.numpy()})


for idx in range(4):
    print(torch.sum(torch.Tensor(onnx_out1[idx])-torch.Tensor(onnx_out2[idx])))

tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)


###### Quant

In [25]:
backbone_init_fp32 = "../models/onnx/backbone_init.onnx"
backbone_fp32 = "../models/onnx/backbone.onnx"
model_fp32 = "../models/onnx/head_5_one.onnx"
model_opt = "../models/onnx/head_opt.onnx"

model_quant = "../models/onnx_static/head_5_one.onnx"

In [26]:
calibration_image_folder = "C:\\Users\\isd.illia.maliha\\work\\sorted_datasets\\background"

In [27]:
def _preprocess_images(backbone_init_path: str, backbone_path: str, images_folder: str, size_limit=0):
    
    bone_init = onnxruntime.InferenceSession(backbone_init_path, providers=['CPUExecutionProvider'])
    bone = onnxruntime.InferenceSession(backbone_path, providers=['CPUExecutionProvider'])
    
    image_names = os.listdir(images_folder)
    
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
        
    inp = []
    

    for image_name in tqdm_notebook(batch_filenames[:1]):
        image_filepath = images_folder + "/" + image_name
        image = cv2.imread(image_filepath).astype(np.float32)
        
        h,w,_ = image.shape
        x = np.random.randint(0,0.7*w)
        y = np.random.randint(0,0.7*h)
        ww = np.random.randint(25,w-x)
        hh = np.random.randint(25,h-y)
        
        box = xywh2cxywh([x,y,ww,hh])
        target_pos, target_sz = box[:2], box[2:]

        avg_chans = np.mean(image, axis=(0, 1))

        im_z_crop, _ = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            avg_chans=avg_chans,
            context_amount=0.5,
            func_get_subwindow=get_subwindow_tracking,
        )
        im_z_crop = to_bchw(im_z_crop)
        
        c_z_k, r_z_k = bone_init.run(None, {'input': im_z_crop})
    
    

        im_x_crop, scale_x = get_crop(
            image,
            target_pos,
            target_sz,
            127,
            x_size=303,
            avg_chans=avg_chans,
            context_amount=cfg.context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        im_x_crop = to_bchw(im_x_crop)
    
        c_x, r_x = bone.run(None, {'input': im_x_crop})
        
        c_out = xcorr_depthwise(torch.Tensor(c_x), torch.Tensor(c_z_k))
        r_out = xcorr_depthwise(torch.Tensor(r_x), torch.Tensor(r_z_k))
        
        out = torch.cat([c_out, r_out], dim=1)
        
        inp.append(out.numpy())

        
    batch_data = np.concatenate(np.expand_dims(inp, axis=0), axis=0)
    
    return batch_data


class DataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str, backbone_init_path: str, backbone_path: str):
        self.enum_data = None

        # Use inference session to get input shape.
        session = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])

        # Convert image to input data
        self.nchw_data_list = _preprocess_images(backbone_init_path, backbone_path, calibration_image_folder, size_limit=0)
        
        self.input_name = session.get_inputs()[0].name
        self.datasize = len(self.nchw_data_list)
    

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter(
                [{self.input_name: self.nchw_data_list[idx]} for idx in range(self.datasize)]
            )
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None

In [28]:
!python -m onnxruntime.quantization.preprocess --input "../models/onnx/head_5_one.onnx" --output "../models/onnx/head_opt.onnx"

In [29]:
reader = DataReader(calibration_image_folder, model_opt, backbone_init_fp32, backbone_fp32)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for image_name in tqdm_notebook(batch_filenames[:1]):


  0%|          | 0/1 [00:00<?, ?it/s]

In [30]:
quantized_model = quantize_static(model_opt,
                                  model_quant,
                                  reader,
                                  quant_format=QuantFormat.QDQ,
                                  activation_type=QuantType.QInt8,
                                  weight_type=QuantType.QInt8,)

simplified_model, check = simplify(model_quant, skip_fuse_bn=False)
onnx.save_model(simplified_model, model_quant)

In [31]:
session = onnxruntime.InferenceSession(model_quant, providers=['CPUExecutionProvider'])

for x in session.get_outputs():
    print(x.name)

csl_score
ctr_score
offsets
fea
