In [None]:
import torch
from data_pipeline import DataPipeline

DATA_DIR = '/media/bmw/datasets/imagenet-1k/val'
MODEL_DIR = 'resnet50.onnx'

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

Export PyTorch Model

In [2]:

import torchvision.models as models

resnet50 = models.resnet50()
resnet50.load_state_dict(torch.load("resnet50.pth",weights_only=True))
resnet50.eval()
resnet50.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### Prepare the Model according to Model Guidelines using Model Preparator API

In [3]:

from aimet_torch.model_preparer import prepare_model    
    
model = prepare_model(resnet50)

2025-02-19 16:48:56,214 - root - INFO - AIMET


  param_schemas = callee.param_schemas()


2025-02-19 16:48:57,316 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.0.module_relu_1} 
2025-02-19 16:48:57,317 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer1.0.module_add} 
2025-02-19 16:48:57,317 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.0.module_relu_2} 
2025-02-19 16:48:57,317 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.1.module_relu_1} 
2025-02-19 16:48:57,318 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer1.1.module_add_1} 
2025-02-19 16:48:57,318 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.1.module_relu_2} 
2025-02-19 16:48:57,318 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.2.module_relu_1} 
2025-02-19 16:48:57,319 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer1.2.module_add_2} 
2025-02-19 16

In [None]:
use_cuda = False
if torch.cuda.is_available():
    use_cuda = True
    model.to(torch.device('cuda'))

### Validate the Model using ModelValidator API

In [4]:
from aimet_torch.model_validator.model_validator import ModelValidator

input_tensor=torch.randn(1,3,224,224)
ModelValidator.validate_model(model, model_input=input_tensor.to(device))

2025-02-19 16:49:21,029 - Utils - INFO - Running validator check <function validate_for_reused_modules at 0x7f1eae4ca7a0>
2025-02-19 16:49:21,354 - Utils - INFO - Running validator check <function validate_for_missing_modules at 0x7f1eae0ebec0>
2025-02-19 16:49:21,759 - Utils - INFO - All validation checks passed.


True

In [5]:
model.to(device)
input_tensor=input_tensor.to(device)

### FP32-Model Accuracy

In [6]:
DataPipeline.evaluate(model)

GPU


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.01it/s]

Top-1 Accuracy: 87.00%
Top-5 Accuracy: 97.00%
Total Samples Evaluated: 100





#### Perform Batch Normalization Folding before quant simulation as it
#### improves inference performance on quantized runtimes

In [7]:

from aimet_torch.batch_norm_fold import fold_all_batch_norms

_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224),dummy_input=input_tensor)

### Quantization Simulation in Batch Normalized Model  W8A8

In [8]:
from aimet_torch.quantsim import QuantizationSimModel
from aimet_common.defs import QuantScheme

sim=QuantizationSimModel(model, dummy_input=input_tensor,quant_scheme=QuantScheme.training_range_learning_with_tf_init,  default_output_bw=8,
                           default_param_bw=8)

2025-02-19 16:50:49,008 - Quant - INFO - No config file provided, defaulting to config file at /home/bmw/anaconda3/envs/shabari/lib/python3.12/site-packages/aimet_common/quantsim_config/default_config.json
2025-02-19 16:50:49,030 - Quant - INFO - Unsupported op type Squeeze
2025-02-19 16:50:49,030 - Quant - INFO - Unsupported op type Mean
2025-02-19 16:50:49,037 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


In [10]:
print(sim.model)

GraphModule(
  (conv1): QuantizedConv2d(
    3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)
    (param_quantizers): ModuleDict(
      (weight): QuantizeDequantize(shape=(), qmin=-128, qmax=127, symmetric=True)
      (bias): None
    )
    (input_quantizers): ModuleList(
      (0): QuantizeDequantize(shape=(), qmin=0, qmax=255, symmetric=False)
    )
    (output_quantizers): ModuleList(
      (0): None
    )
  )
  (bn1): Identity()
  (relu): QuantizedReLU(
    inplace=True
    (param_quantizers): ModuleDict()
    (input_quantizers): ModuleList(
      (0): None
    )
    (output_quantizers): ModuleList(
      (0): QuantizeDequantize(shape=(), qmin=0, qmax=255, symmetric=False)
    )
  )
  (maxpool): QuantizedMaxPool2d(
    kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False
    (param_quantizers): ModuleDict()
    (input_quantizers): ModuleList(
      (0): None
    )
    (output_quantizers): ModuleList(
      (0): QuantizeDequantize(shape=(), qmin=0, qmax=255, symme

### Pass Calibration Data (Unlabelled Data)

In [11]:
def pass_calibration_data(sim_model, use_cuda):
    data_loader = DataPipeline.get_val_dataloader()
    batch_size = data_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    with torch.no_grad():
        for input_data, target_data in data_loader:

            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)

            batch_cntr += 1
            if (batch_cntr * batch_size) > samples:
                break

In [12]:

sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=True)

In [13]:
DataPipeline.evaluate(sim.model)


GPU


  if not _is_grid_representable(dtype, qmin, qmax):
  if not _is_grid_representable(internal_dtype, qmin, qmax):
  assert new_param_or_buffer.shape == orig_param_or_buffer.shape
  dtype_repr = torch.tensor(value, dtype=dtype)
  return dtype_repr.isfinite() and dtype_repr.long() == value
  return _is_value_representable(dtype, qmax) and \
Evaluating: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it]

Top-1 Accuracy: 79.00%
Top-5 Accuracy: 94.00%
Total Samples Evaluated: 100



