In [1]:
import os
import json
import numpy as np

### Loading model

In [2]:
from aimet_torch.batch_norm_fold import fold_all_batch_norms
from torchvision.models import resnet18
import torch
torch.manual_seed(0)

model = resnet18(pretrained=True)
use_cuda = False
if torch.cuda.is_available():
    use_cuda = True
    model.to(torch.device('cuda'))
    
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

use_cuda

2025-01-07 08:15:07,651 - root - INFO - AIMET




[2025-01-07 08:15:11,726] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cpu (auto detect)


False

In [3]:
DATASET_DIR = '/home/shayan/Desktop/aimet/Examples/torch/quantization/'
import sys
sys.path.append("/home/shayan/Desktop/temp/aimet/")

import os
import torch
from Examples.common import image_net_config
from Examples.torch.utils.image_net_evaluator import ImageNetEvaluator
from Examples.torch.utils.image_net_trainer import ImageNetTrainer
from Examples.torch.utils.image_net_data_loader import ImageNetDataLoader

sys.path.remove("/home/shayan/Desktop/temp/aimet/")

class ImageNetDataPipeline:

    @staticmethod
    def get_val_dataloader() -> torch.utils.data.DataLoader:
        """
        Instantiates a validation dataloader for ImageNet dataset and returns it
        """
        data_loader = ImageNetDataLoader(DATASET_DIR,
                                         image_size=image_net_config.dataset['image_size'],
                                         batch_size=image_net_config.evaluation['batch_size'],
                                         is_training=False,
                                         num_workers=image_net_config.evaluation['num_workers']).data_loader
        return data_loader

    @staticmethod
    def evaluate(model: torch.nn.Module, use_cuda: bool) -> float:
        """
        Given a torch model, evaluates its Top-1 accuracy on the dataset
        :param model: the model to evaluate
        :param iterations: the number of batches to be used to evaluate the model. A value of 'None' means the model will be
                           evaluated on the entire dataset once.
        :param use_cuda: whether or not the GPU should be used.
        """
        evaluator = ImageNetEvaluator(DATASET_DIR, image_size=image_net_config.dataset['image_size'],
                                      batch_size=image_net_config.evaluation['batch_size'],
                                      num_workers=image_net_config.evaluation['num_workers'])

        return evaluator.evaluate(model, iterations=None, use_cuda=use_cuda)

In [4]:
def pass_calibration_data(sim_model, use_cuda):
    data_loader = ImageNetDataPipeline.get_val_dataloader()
    batch_size = data_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    idx = 0
    with torch.no_grad():
        for path, input_data, target_data in data_loader:
            # if "cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa" not in path[0]:
            #     continue
            # if "cf" in path[0]:
            print(path)
            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)
            break


In [5]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel
from copy import deepcopy

dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=deepcopy(model),
                           quant_scheme=QuantScheme.post_training_tf,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=8,
                           config_file="/home/shayan/Desktop/aimet/my_config.json")

sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

2025-01-07 08:15:12,496 - Quant - INFO - Unsupported op type Squeeze
2025-01-07 08:15:12,496 - Quant - INFO - Unsupported op type Mean
2025-01-07 08:15:12,498 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default
('/home/shayan/Desktop/aimet/Examples/torch/quantization/val/9/cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa.jpeg',)
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c88cc70> 	 encoding = <aimet_common._libpymo.TfEncoding object at 0x7b861c88c030> 	 is_encoding_valid = True


BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c88d9b0> 	 encoding = <aimet_common._libpymo.TfEncoding object at 0x7b8516349fb0> 	 is_encoding_valid = True


BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c88d970> 	 encoding = <aimet_common._libpymo.TfEncoding obje

In [6]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel
from copy import deepcopy

dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

tf_enhanced_sim = QuantizationSimModel(model=deepcopy(model),
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=8,
                           config_file="/home/shayan/Desktop/aimet/my_config.json")

tf_enhanced_sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

histogram = list(dict(tf_enhanced_sim.model.named_modules()).items())[-2][1].output_quantizers[0].get_stats_histogram()[0]

steps = []
for idx in range(len(histogram)):
    if idx == len(histogram) - 1:
        break
    steps.append(histogram[idx + 1][0] - histogram[idx][0])
    
set(steps)

2025-01-07 08:15:12,853 - Quant - INFO - Unsupported op type Squeeze
2025-01-07 08:15:12,854 - Quant - INFO - Unsupported op type Mean
2025-01-07 08:15:12,855 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default
('/home/shayan/Desktop/aimet/Examples/torch/quantization/val/9/cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa.jpeg',)
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c4118b0> 	 encoding = <aimet_common._libpymo.TfEncoding object at 0x7b861c8744f0> 	 is_encoding_valid = True


BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c4118f0> 	 encoding = <aimet_common._libpymo.TfEncoding object at 0x7b861c874cb0> 	 is_encoding_valid = True


BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
op = <aimet_common.AimetTensorQuantizer.AimetTensorQuantizer object at 0x7b861c411970> 	 encoding = <aimet_common._libpymo.TfEncoding obje

{0.10889194905757904}

In [7]:
temp = [(key, value) for key, value in histogram if value > 0]
sorted(temp, key=lambda x: x[0], reverse=False)
temp[0], temp[-1]
abs_max = max(abs(temp[0][0]), abs(temp[-1][0]))
abs_max

12.294080898165703

In [8]:
steps = []
for idx in range(len(histogram)):
    if idx == len(histogram) - 1:
        break
    steps.append(histogram[idx + 1][0] - histogram[idx][0])
    
set(steps)

{0.10889194905757904}

In [9]:
print(list(dict(sim.model.named_modules()).items())[-2][1].output_quantizers[0])

StaticGrid TensorQuantizer:
    quant-scheme:QuantScheme.post_training_tf, round_mode=RoundingMode.ROUND_NEAREST, bitwidth=8, enabled=True
    min:-11.122593879699707, max=11.122593879699707, delta=0.08757947936771422, offset=-127.0



In [10]:
def compute_encoding(min, max, bw=8):
    delta = (max - min) / (2 ** bw - 1)
    offset = min / delta
    return {"min": min, "max": max, "offset": int(offset), "delta": delta}

N = 10
def make_ranges(max, n=N, max_diff=0.1):
    max_diff = max * 0.5
    return np.linspace(max-max_diff if max-max_diff >= 0 else 0, max + max_diff, n)

encoding_max = list(dict(sim.model.named_modules()).items())[-2][1].output_quantizers[0].encoding.max
encoding_ranges = make_ranges(encoding_max)
encodings = [compute_encoding(-encoding_range, encoding_range) for encoding_range in encoding_ranges]

print(f"""
      Encoding Max {round(encoding_max, 2)}
      Encoding range {encoding_ranges}
      """)



      Encoding Max 11.12
      Encoding range [ 5.56129694  6.7971407   8.03298447  9.26882823 10.504672   11.74051576
 12.97635953 14.21220329 15.44804706 16.68389082]
      


In [11]:
from satAndQuantCostGpt import PDF, _quant_and_sat_cost

xleft = [x[0] for x in histogram]
pdf = [x[1] for x in histogram]

pdf = PDF(xLeft=xleft, pdf=pdf)

In [12]:
num_steps = 254
test_offset = (- num_steps) / 2
delta_max = abs_max / (num_steps / 2) 
test_candidates = []

for f in range(1, 101):
    testDelta = 0.01 * f * delta_max
    test_candidates.append((testDelta, test_offset))


In [13]:
i = 5
pdf.xLeft[0] + i * steps[0], pdf.xLeft[i]

(-24.293613985180855, -24.293613985180855)

In [14]:
best_loss = 1e9
# for idx, encoding in enumerate(encodings):
#     loss = _quant_and_sat_cost(pdf, 8, encoding['delta'], encoding['offset'])
#     if loss < best_loss:
#         best_loss = loss
#         best_encoding = encoding
        
for delta, offset in test_candidates:
    loss = _quant_and_sat_cost(pdf, 8, delta, offset)
    if loss < best_loss:
        best_loss = loss
        best_encoding = {"delta": delta, "offset": offset}
        
best_encoding

min_ind=226, max_ind=229, delta=0.0009680378659972995, offset=-127.0
min_val=-0.12294080898165705, pdf_start=-24.83807373046875 pdf_step=0.10889194905757904
pdf.xLeft[min_ind]=-0.22849324345588684

min_ind=225, max_ind=230, delta=0.001936075731994599, offset=-127.0
min_val=-0.2458816179633141, pdf_start=-24.83807373046875 pdf_step=0.10889194905757904
pdf.xLeft[min_ind]=-0.3373851925134659

min_ind=224, max_ind=231, delta=0.0029041135979918985, offset=-127.0
min_val=-0.3688224269449711, pdf_start=-24.83807373046875 pdf_step=0.10889194905757904
pdf.xLeft[min_ind]=-0.4462771415710449

min_ind=223, max_ind=232, delta=0.003872151463989198, offset=-127.0
min_val=-0.4917632359266282, pdf_start=-24.83807373046875 pdf_step=0.10889194905757904
pdf.xLeft[min_ind]=-0.555169090628624

min_ind=222, max_ind=233, delta=0.004840189329986498, offset=-127.0
min_val=-0.6147040449082852, pdf_start=-24.83807373046875 pdf_step=0.10889194905757904
pdf.xLeft[min_ind]=-0.664061039686203

min_ind=221, max_ind=23

{'delta': 0.09583574873373264, 'offset': -127.0}

In [15]:
import math
bw = 8
min_val = delta * offset
step_size = math.pow(2, bw) - 1
max_val = delta * (offset + step_size)

max_val

12.390884684765433