# Post-Training Quantization by NNCF

This notebook is based on 'ImageNet training in PyTorch' example.

The goal of this notebook is to demonstrate how to use the Neural Network Compression Framework NNCF 8-bit quantization to optimize a PyTorch model for inference with OpenVINO Toolkit. The optimization process contains the following steps:

1. Evaluate the original model
2. Transform the original model to a quantizaed one
3. Export optimized and original models to ONNX
4. Compare perfomance of obtained FP32 and INT8 ONNXs

NOTE: This notebook requires C++

In [1]:
# On Windows, add the directory that contains cl.exe to the PATH to enable PyTorch to find the
# required C++ tools. This code assumes that Visual Studio 2019 is installed in the default
# directory. If you have a different C++ compiler, please add the correct path to os.environ["PATH"]
# directly.

# Adding the path to os.environ["LIB"] is not always required - it depends on the system's configuration

import sys

if sys.platform == "win32":
    import distutils.command.build_ext
    import os
    from pathlib import Path

    VS_INSTALL_DIR = r"C:/Program Files (x86)/Microsoft Visual Studio"
    cl_paths = sorted(list(Path(VS_INSTALL_DIR).glob("**/Hostx86/x64/cl.exe")))
    if len(cl_paths) == 0:
        raise ValueError(
            "Cannot find Visual Studio. This notebook requires C++. If you installed "
            "a C++ compiler, please add the directory that contains cl.exe to "
            "`os.environ['PATH']`"
        )
    else:
        # If multiple versions of MSVC are installed, get the most recent version
        cl_path = cl_paths[-1]
        vs_dir = str(cl_path.parent)
        os.environ["PATH"] += f"{os.pathsep}{vs_dir}"
        # Code for finding the library dirs from
        # https://stackoverflow.com/questions/47423246/get-pythons-lib-path
        d = distutils.core.Distribution()
        b = distutils.command.build_ext.build_ext(d)
        b.finalize_options()
        os.environ["LIB"] = os.pathsep.join(b.library_dirs)
        print(f"Added {vs_dir} to PATH")

In [2]:
import os
import sys
import time
import zipfile
from pathlib import Path
from urllib.request import urlretrieve

import torch
import torch.nn as nn
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms


sys.path.append("../utils")
from notebook_utils import download_file

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device} device')

MODEL_DIR = Path('model')
OUTPUT_DIR = Path('output')
BASE_MODEL_NAME = 'resnet50'
IMAGE_SIZE = [64, 64]

OUTPUT_DIR.mkdir(exist_ok=True)
MODEL_DIR.mkdir(exist_ok=True)

# Paths where PyTorch, ONNX and will be stored
fp32_checkpoint_filename = Path(BASE_MODEL_NAME + '_fp32').with_suffix('.pth') 
fp32_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + '_fp32')).with_suffix(".onnx")
int8_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + '_int8')).with_suffix('.onnx') 

fp32_pth_url  = "https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/304_resnet50_fp32.pth"
download_file(fp32_pth_url, directory=MODEL_DIR, filename=fp32_checkpoint_filename)

  return f(*args, **kwds)
  return f(*args, **kwds)


Using cuda device


model/resnet50_fp32.pth:   0%|          | 0.00/91.5M [00:00<?, ?B/s]

PosixPath('/home/aleksei/nncf_work/openvino_notebooks/notebooks/304-pytorch-post-training-quantization/model/resnet50_fp32.pth')

### Download and Prepare Tiny ImageNet dataset

* 100k images of shape 3x64x64
* 200 different classes: snake, spider, cat, truck, grasshopper, gull, etc.

In [23]:
def download_tiny_imagenet_200(output_dir,
                               url='http://cs231n.stanford.edu/tiny-imagenet-200.zip',
                               tarname='tiny-imagenet-200.zip'):
    output_dir.mkdir(exist_ok=True)
    archive_path = output_dir / tarname
    download_file(url, directory=output_dir, filename=tarname)
    zip_ref = zipfile.ZipFile(archive_path, 'r')
    zip_ref.extractall(path=output_dir)
    zip_ref.close()
    print(f'Successfully downloaded and extracted dataset to: {output_dir}')

def create_validation_dir(dataset_dir):
    VALID_DIR = dataset_dir / 'val'
    val_img_dir = VALID_DIR / 'images'
    
    fp = open(VALID_DIR / 'val_annotations.txt', 'r')
    data = fp.readlines()
    
    val_img_dict = {}
    for line in data:
        words = line.split('\t')
        val_img_dict[words[0]] = words[1] 
    fp.close()
    
    for img, folder in val_img_dict.items():
        newpath = val_img_dir/ folder
        if not newpath.exists():
            os.makedirs(newpath)
        if (val_img_dir/ img).exists():
            os.rename(val_img_dir/ img, newpath/ img)
    
DATASET_DIR = OUTPUT_DIR / 'tiny-imagenet-200'
if not DATASET_DIR.exists():
    download_tiny_imagenet_200(OUTPUT_DIR)
    create_validation_dir(DATASET_DIR)

### Helpers classes and functions
These will help us to count accuracy and visualize validation process

In [4]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

# Validation function

In [38]:
def validate(val_loader, model):
    batch_time = AverageMeter('Time', ':3.3f')
    top1 = AverageMeter('Acc@1', ':2.2f')
    top5 = AverageMeter('Acc@5', ':2.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()
    model.to(device)

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.to(device)
            target = target.to(device)
            
            # compute output
            output = model(images)
            
            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            print_frequency = 10
            if i % print_frequency == 0:
                progress.display(i)

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))
    return top1.avg

### Create and load original uncompressed model

In [29]:
def create_model(model_path):
    model = models.resnet50(pretrained=True)
    # update the last FC layer for Tiny ImageNet number of classes
    NUM_CLASSES = 200
    model.fc = nn.Linear(in_features=2048, out_features=NUM_CLASSES, bias=True)
    model.to(device)
    if model_path.exists():
        checkpoint = torch.load(str(model_path), map_location='cpu')
        model.load_state_dict(checkpoint['state_dict'], strict=True)
    else:
        raise RuntimeEror('There is no checkpoint to load')
    return model

model = create_model(MODEL_DIR / fp32_checkpoint_filename)

### Create train and validation dataloaders 

In [28]:
def create_dataloaders(batch_size=128):
    train_dir = DATASET_DIR / 'train'
    val_dir = DATASET_DIR / 'val' / 'images'
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
    train_dataset = datasets.ImageFolder(
    train_dir,
    transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        normalize,
    ]))
    val_dataset = datasets.ImageFolder(
        val_dir,
        transforms.Compose([
            transforms.Resize(IMAGE_SIZE),
            transforms.ToTensor(),
            normalize
        ]))

    train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True,
    num_workers=4, pin_memory=True, sampler=None)
    
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False,
        num_workers=4, pin_memory=True)
    return train_loader, val_loader

train_loader, val_loader = create_dataloaders()

### I. Evaluate the loaded model

In [45]:
acc1 = validate(val_loader, model)
print(f'Test accuracy of FP32 model: {acc1:.3f}')

Test: [ 0/79]	Time 0.475 (0.475)	Acc@1 83.59 (83.59)	Acc@5 93.75 (93.75)
Test: [10/79]	Time 0.272 (0.286)	Acc@1 56.25 (67.19)	Acc@5 85.16 (88.00)
Test: [20/79]	Time 0.265 (0.278)	Acc@1 68.75 (64.40)	Acc@5 84.38 (87.39)
Test: [30/79]	Time 0.266 (0.278)	Acc@1 53.12 (62.83)	Acc@5 76.56 (85.41)
Test: [40/79]	Time 0.267 (0.275)	Acc@1 66.41 (61.22)	Acc@5 89.84 (84.51)
Test: [50/79]	Time 0.265 (0.275)	Acc@1 63.28 (61.04)	Acc@5 88.28 (84.38)
Test: [60/79]	Time 0.268 (0.273)	Acc@1 66.41 (60.80)	Acc@5 87.50 (83.82)
Test: [70/79]	Time 0.256 (0.273)	Acc@1 50.00 (60.50)	Acc@5 78.91 (83.37)
 * Acc@1 61.040 Acc@5 83.990
Test accuracy of FP32 model: 61.040


### Export the FP32 model to ONNX, which is supported by OpenVINO™ Toolkit, to benchmark it in comparison with the INT8 model.

In [46]:
dummy_input = torch.randn(1, 3, *IMAGE_SIZE).to(device)
torch.onnx.export(model, dummy_input, fp32_onnx_path)
print(f"FP32 ONNX model was exported to {fp32_onnx_path}.")

FP32 ONNX model was exported to output/resnet50_fp32.onnx.


## II. Create and initialize quantization
NNCF enables post-training quantization using trainig dataset for the initialization process of recently added quantization layers. The framework is designed so that modifications to your original training code are minor. Quantization is the simplest scenario and requires only 3 modifications.

### 1. Configure NNCF parameters to specify compression

In [47]:
import nncf
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args

2021-10-12 14:45:17.514927: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
  curr=torch.__version__


In [48]:
#TODO: update NNCF config to config with preset with per-channel quantization
nncf_config_dict = {
    "input_info": {
        "sample_size": [1, 3, *IMAGE_SIZE]
    },
    "log_dir": str(OUTPUT_DIR), # log directory for NNCF-specific logging outputs
    "compression": {
        "algorithm": "quantization",
        "initializer": {
            "range": {
                "num_init_samples": 15000
            },
            "batchnorm_adaptation": {
                "num_bn_adaptation_samples": 4000
            }
        },
        "weights": {
            "per_channel": False
        },
        "activations": {
            "per_channel": False
        },
        "export_to_onnx_standard_ops": True
    }
}
nncf_config = NNCFConfig.from_dict(nncf_config_dict)

### 2. Provide data loader to initialize the values of quantization ranges and determine which activation should be signed or unsigned from the collected statistics using a given number of samples.

In [49]:
nncf_config = register_default_init_args(nncf_config, train_loader)

INFO:nncf:Please, provide execution parameters for optimal model initialization


### 3. Create a wrapped model ready for compression fine-tuning from a pre-trained FP32 model and configuration object.

In [50]:
compression_ctrl, model = create_compressed_model(model, nncf_config)

INFO:nncf:Wrapping module ResNet/Conv2d[conv1] by ResNet/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[0]/Conv2d[conv1] by ResNet/Sequential[layer1]/Bottleneck[0]/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[0]/Conv2d[conv2] by ResNet/Sequential[layer1]/Bottleneck[0]/NNCFConv2d[conv2]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[0]/Conv2d[conv3] by ResNet/Sequential[layer1]/Bottleneck[0]/NNCFConv2d[conv3]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[0]/Sequential[downsample]/Conv2d[0] by ResNet/Sequential[layer1]/Bottleneck[0]/Sequential[downsample]/NNCFConv2d[0]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[1]/Conv2d[conv1] by ResNet/Sequential[layer1]/Bottleneck[1]/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/Bottleneck[1]/Conv2d[conv2] by ResNet/Sequential[layer1]/Bottleneck[1]/NNCFConv2d[conv2]
INFO:nncf:Wrapping module ResNet/Sequenti

INFO:nncf:Collecting tensor statistics █                 | 13 / 118
INFO:nncf:Collecting tensor statistics ███               | 26 / 118
INFO:nncf:Collecting tensor statistics █████             | 39 / 118
INFO:nncf:Collecting tensor statistics ███████           | 52 / 118
INFO:nncf:Collecting tensor statistics ████████          | 65 / 118
INFO:nncf:Collecting tensor statistics ██████████        | 78 / 118
INFO:nncf:Collecting tensor statistics ████████████      | 91 / 118
INFO:nncf:Collecting tensor statistics ██████████████    | 104 / 118
INFO:nncf:Collecting tensor statistics ███████████████   | 117 / 118
INFO:nncf:Collecting tensor statistics ████████████████  | 118 / 118
INFO:nncf:Set sign: True and scale: [2.6400, ] for TargetType.OPERATOR_POST_HOOK /nncf_model_input_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK /nncf_model_input_0
INFO:nncf:Set sign: False and scale: [1.3751, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer1]/B

INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[0]/ReLU[relu]/relu__1
INFO:nncf:Set sign: True and scale: [2.2700, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[0]/BatchNorm2d[bn3]/batch_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[0]/BatchNorm2d[bn3]/batch_norm_0
INFO:nncf:Set sign: True and scale: [1.7111, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[0]/Sequential[downsample]/BatchNorm2d[1]/batch_norm_0
INFO:nncf:Performing signed activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[0]/Sequential[downsample]/BatchNorm2d[1]/batch_norm_0
INFO:nncf:Set sign: False and scale: [2.5322, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer3]/Bottleneck[1]/ReLU[relu]/relu__0
INFO:nncf:Performing unsigned activation quantization

INFO:nncf:Set sign: False and scale: [13.9028, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer4]/Bottleneck[2]/ReLU[relu]/relu__2
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer4]/Bottleneck[2]/ReLU[relu]/relu__2
INFO:nncf:Set sign: False and scale: [11.9460, ] for TargetType.OPERATOR_POST_HOOK ResNet/AdaptiveAvgPool2d[avgpool]/adaptive_avg_pool2d_0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/AdaptiveAvgPool2d[avgpool]/adaptive_avg_pool2d_0
INFO:nncf:Set sign: False and scale: [3.1914, ] for TargetType.OPERATOR_POST_HOOK ResNet/ReLU[relu]/relu__0
INFO:nncf:Performing unsigned activation quantization for: TargetType.OPERATOR_POST_HOOK ResNet/ReLU[relu]/relu__0
INFO:nncf:Set sign: False and scale: [2.4739, ] for TargetType.OPERATOR_POST_HOOK ResNet/Sequential[layer1]/Bottleneck[0]/ReLU[relu]/relu__2
INFO:nncf:Performing unsigned activation quantization for: Tar

INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer1]/Bottleneck[2]/NNCFConv2d[conv2]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.2867, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer1]/Bottleneck[2]/NNCFConv2d[conv3]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer1]/Bottleneck[2]/NNCFConv2d[conv3]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.3477, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer2]/Bottleneck[0]/NNCFConv2d[conv1]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer2]/Bottleneck[0]/NNCFConv2d[conv1]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.2922, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer2]/Bottleneck[0]/NNCFConv2d[conv2]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEI

INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[3]/NNCFConv2d[conv3]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.2772, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[4]/NNCFConv2d[conv1]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[4]/NNCFConv2d[conv1]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.1970, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[4]/NNCFConv2d[conv2]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[4]/NNCFConv2d[conv2]/conv2d_0
INFO:nncf:Set sign: True and scale: [0.3223, ] for TargetType.OPERATION_WITH_WEIGHTS ResNet/Sequential[layer3]/Bottleneck[4]/NNCFConv2d[conv3]/conv2d_0
INFO:nncf:Performing signed weight quantization for: TargetType.OPERATION_WITH_WEI

### 4. Evaluate the new model on the validation set after initialization of quantization. The accuracy should be close to the accuracy of the floating-point FP32 model for a simple case like the one we are demonstrating now.

In [51]:
acc1 = validate(val_loader, model)
print(f'Accuracy of initialized INT8 model: {acc1:.3f}')

Test: [ 0/79]	Time 0.654 (0.654)	Acc@1 83.59 (83.59)	Acc@5 92.97 (92.97)
Test: [10/79]	Time 0.385 (0.413)	Acc@1 58.59 (66.41)	Acc@5 86.72 (87.14)
Test: [20/79]	Time 0.384 (0.401)	Acc@1 70.31 (64.06)	Acc@5 85.16 (87.13)
Test: [30/79]	Time 0.381 (0.396)	Acc@1 52.34 (62.02)	Acc@5 75.78 (84.95)
Test: [40/79]	Time 0.387 (0.394)	Acc@1 66.41 (60.50)	Acc@5 85.94 (84.05)
Test: [50/79]	Time 0.384 (0.393)	Acc@1 63.28 (60.75)	Acc@5 86.72 (83.92)
Test: [60/79]	Time 0.391 (0.392)	Acc@1 65.62 (60.37)	Acc@5 87.50 (83.29)
Test: [70/79]	Time 0.376 (0.391)	Acc@1 56.25 (60.16)	Acc@5 79.69 (82.90)
 * Acc@1 60.660 Acc@5 83.520
Accuracy of initialized INT8 model: 60.660


## III. Export INT8 model to ONNX

In [52]:
compression_ctrl.export_model(int8_onnx_path)
print(f"INT8 ONNX model exported to {int8_onnx_path}.")

  self.shape = tuple(int(dim) for dim in shape)  # Handle cases when shape is a tuple of Tensors
  if not self.is_enabled_quantization():
  return self._num_bits.item()
  return self.signed_tensor.item() == 1
  self.get_padding_value_ref().data.fill_(padding_value.item())
  if not self.get_padding_value_ref():


INT8 ONNX model exported to output/resnet50_int8.onnx.


## IV. Compare perfomance of ONNX's INT8 model and FP32 model in OpenVINO Execution Provider

In [53]:
import numpy as np
import onnxruntime as rt

In [54]:
def measure_perfomance(session, image_size, runs=10):
    input_name = session.get_inputs()[0].name
    total = 0.0
    input_data = np.zeros((1, 3, *IMAGE_SIZE), np.float32)
    # Warming up
    _ = session.run([], {input_name: input_data})
    for i in range(runs):
        start = time.perf_counter()
        _ = session.run([], {input_name: input_data})
        end = (time.perf_counter() - start) * 1000
        total += end
        
    total /= runs
    print(f"Avg: {total:.2f}ms")
    return total

In [55]:
session = rt.InferenceSession(str(fp32_onnx_path))
session.set_providers(['OpenVINOExecutionProvider'])
fp32_inf_time = measure_perfomance(session, IMAGE_SIZE, runs=100)

ValueError: ['OpenVINOExecutionProvider'] does not contain a subset of available providers ['CPUExecutionProvider']

In [None]:
session = rt.InferenceSession(str(int8_onnx_path))
session.set_providers(['OpenVINOExecutionProvider'])
int8_inf_time = measure_perfomance(session, IMAGE_SIZE, runs=100)

In [None]:
gain_coefficient = fp32_inf_time / int8_inf_time
print (f'Gain coefficient is = {gain_coefficient}')