# Quantization of TIMM models with NNCF

This tutorial demonstrates how to apply INT8 quantization to models in [TIMM](https://github.com/rwightman/pytorch-image-models), which is a prominent repository for computer vision, with NNCF.  
It also assumes that OpenVINO™ is already installed and it uses `resnet18` and `mobilenetv2_050` in TIMM for simplicity.  
Other models' list can be checked by using `timm.list_models()`.

This tutorial consists of the following steps:
- Preparation
- Set analysis tool
- Load TIMM models
- Set NNCF config
- Export the model to onnx
- Run quantization & Check performance

## Preparation

In [None]:
# set environment
!pip install -r requirements.txt

In [None]:
import os
import re
import sys
import logging
import warnings

import timm
import torch
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.common.utils.logger import set_log_level

from texttable import Texttable

In [None]:
warnings.filterwarnings(action='ignore')
set_log_level(logging.ERROR)  # Disables all NNCF info and warning messages

In [None]:
dump_location = './models'
os.makedirs(dump_location, exist_ok=True)

## Set analysis tool
To obtain performance, [OpenVINO™ Model Analyzer](https://github.com/openvinotoolkit/model_analyzer) will be used.  
So, if `model_analyzer` is not in the working directory, it should be cloned in the working directory.

In [None]:
# clone OpenVINO Model Analyzer in the working directory
!git clone https://github.com/openvinotoolkit/model_analyzer.git

In [None]:
def benchmark_with_openvino(model_path):
    command_line = 'benchmark_app -m {} -d CPU '.format(model_path)
    output = os.popen(command_line).read()

    match = re.search("Throughput\: (.+?) FPS", output)
    if match != None:
        fps = match.group(1)
        return float(fps), output

    return None, output

def analyze_model(model_path):
    command_line = 'python model_analyzer/model_analyzer.py --model {} --ignore-unknown-layer'.format(model_path)
    output = os.popen(command_line).read()

    match1 = re.search("GFLOPs\: (.+?)\n", output)
    match2 = re.search("GIOPs\: (.+?)\n", output)
    if match1 != None and match2 != None:
        flops = float(match1.group(1))
        iops = float(match2.group(1))
        return iops/(flops+iops), output
    
    return None, output

In [None]:
table = Texttable()
table.header(["Model", "Methods", "Ops Ratio", "FP32 FPS", "Opt FPS", "Speedup"])

## Load models
To load or check the whole TIMM model(s)'s name, use `timm.list_models()`.  
Otherwise, to load the specific TIMM model(s), set the model(s)'s name in List.  
For simplicity, in this tutorial, `resnet18` and `mobilenetv2_050` will be used.

In [None]:
def get_model_list():
    ## full_list = timm.list_models()
    model_list = ['resnet18', 'mobilenetv2_050']
    return model_list

def create_timm_model(name):
    model = timm.create_model(name, num_classes=1000, in_chans=3, pretrained=True, checkpoint_path='')
    return model

def cleanup(files):
    # remove dump files
    for file in files:
        os.remove(file)

## Set NNCF configs
For simplicity and quantization only for now, NNCF configs are set briefly.

In [None]:
def optimize_with_nncf(model, save_here):
    # Quantize only for now
    nncf_config_dict = {
        "input_info": {
        "sample_size": [1, 3, 224, 224]
        },
        "compression": {
            "algorithm": "quantization",
            'quantize_inputs': True,
            'initializer': {
                'range': {
                    'num_init_samples': 0
                },
                'batchnorm_adaptation': {
                    'num_bn_adaptation_samples': 0
                }
            }
        }
    }

    nncf_config = NNCFConfig.from_dict(nncf_config_dict)
    compression_ctrl, model = create_compressed_model(model, nncf_config)
    compression_ctrl.export_model(save_here)

## Export the model to onnx

In [None]:
def export_to_onnx(model, save_here):
    x = torch.randn(1, 3, 224, 224, requires_grad=True)
    torch.onnx.export(model,
                      x,
                      save_here,
                      export_params=True,
                      opset_version=13,
                      do_constant_folding=False)

## Run quantization & Check performance

In [None]:
model_list = get_model_list()
print("Optimizing models from the list: {}".format(model_list))

In [None]:
for model_name in model_list:
    orig_model_path = os.path.join(dump_location, '{}_fp32.onnx'.format(model_name))
    opt_model_path = os.path.join(dump_location, '{}_opt.onnx'.format(model_name))

    result = ['N/A'] * len(table._header)
    result[0] = model_name
    result[1] = 'quantization'

    try:
        # set timm model
        model = create_timm_model(model_name)
        
        # export the model to onnx
        export_to_onnx(model,orig_model_path)
        
        # quantize the model with NNCF
        optimize_with_nncf(model, opt_model_path)

        # Analyze quantized model
        ops_ratio, ouptut = analyze_model(opt_model_path)
        if ops_ratio != None:
            result[2] = ops_ratio

        # Benchmark original model
        orig_model_perf, orig_bench_output = benchmark_with_openvino(orig_model_path)
        if orig_model_perf == None:
            print("Cannot measure performance for original model: {}\nDetails: {}\n".format(model_name, orig_bench_output))
            table.add_row(result)
            continue

        result[3] = orig_model_perf

        # Benchmark optimized model
        opt_model_perf, opt_becnh_output = benchmark_with_openvino(opt_model_path)
        if opt_model_perf == None:
            print("Cannot measure performance for optimized model: {}\nDetails: {}\n".format(model_name, opt_becnh_output))
            table.add_row(result)
            continue
        result[4] = opt_model_perf
        
        # Organize performance
        speedup = opt_model_perf / orig_model_perf
        print("Performance gain after applying optimizations to {}: {}".format(model_name, opt_model_perf / orig_model_perf))

        result[5] = '{:.2f}x'.format(speedup)

        cleanup([orig_model_path, opt_model_path]) # Comment this to keep the resulted models
        
    except BaseException as error:
        print("Unexpected error when optimizing model: {}. Details: {}".format(model_name, error))

    table.add_row(result)

print(table.draw())