In [1]:
import cv2
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import json
from mmcv.transforms import Compose
import numpy as np
from mmdet.utils import get_test_pipeline_cfg

def read_json(json_path):
    with open(json_path) as f:
        data = json.load(f)
    return data

def preprocess(test_pipeline, image):
    if isinstance(image, np.ndarray):
        # Calling this method across libraries will result
        # in module unregistered error if not prefixed with mmdet.
        test_pipeline[0].type = 'mmdet.LoadImageFromNDArray'
    test_pipeline = Compose(test_pipeline)
    return test_pipeline(dict(img=image))

class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, annotations_json_path, transform=None):
        self.transform = transform
        self.images_dir = images_dir
        self.annotations_json = read_json(annotations_json_path)


    def __len__(self):
        return len(self.annotations_json['images'])

    def __getitem__(self, idx):
        image_dict = self.annotations_json['images'][idx]
        image_path = os.path.join(self.images_dir, image_dict['file_name'])
        image_id = image_dict['id']

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            transformed_images = self.transform(image)
        else:
            transformed_images = image

        return image_id, image_path, transformed_images


# calibrationDataloader = DataLoader(calibrationDataset, batch_size=32, shuffle=True)

In [4]:
import torch
from mmdet.apis import DetInferencer

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize([640, 640]),  # Resize
])

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
CONFIG_PATH = '/teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco.py'
WEIGHTS_PATH = '/teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth'
EVAL_DATASET_SIZE = 5000
CALIBRATION_DATASET_SIZE = 1000
BATCH_SIZE = 80

ROOT_DATASET_DIR = '/teamspace/studios/this_studio/COCO'
IMAGES_DIR = os.path.join(ROOT_DATASET_DIR, 'images')
ANNOTATIONS_JSON_PATH = os.path.join(ROOT_DATASET_DIR, 'annotations/instances_val2017.json')
# ANNOTATIONS_JSON_PATH = "/home/shayaan/Desktop/aimet/my_mmdet/temp.json"

model = DetInferencer(model=CONFIG_PATH, weights=WEIGHTS_PATH, device=DEVICE)
evalDataset = CustomImageDataset(images_dir=IMAGES_DIR, annotations_json_path=ANNOTATIONS_JSON_PATH, transform=transform)
eval_data_loader = DataLoader(evalDataset, batch_size=BATCH_SIZE)

DEVICE

[2024-08-12 07:51:57,307] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


df: /root/.triton/autotune: No such file or directory
/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status


Loads checkpoint by local backend from path: /teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth
The model and loaded state dict do not match exactly

unexpected key in source state_dict: data_preprocessor.mean, data_preprocessor.std





device(type='cuda', index=0)

In [3]:
total_params = sum(p.numel() for p in model.model.parameters())
total_params / 10 ** 6, len(list(model.model.modules())) - 1


(4.896168, 383)

In [4]:
from mmcv.transforms import Compose
test_evaluator = model.cfg.test_evaluator
test_evaluator.type = 'mmdet.evaluation.CocoMetric' 
test_evaluator.dataset_meta = model.model.dataset_meta
test_evaluator.ann_file = ANNOTATIONS_JSON_PATH
test_evaluator = Compose(test_evaluator)

loading annotations into memory...


Done (t=0.67s)
creating index...
index created!


In [5]:
import random
from typing import Optional
from tqdm import tqdm
import torch
from glob import glob
from torch.utils.data import Dataset, DataLoader, Subset

from mmengine.structures import InstanceData
from mmdet.models.utils import samplelist_boxtype2tensor
from mmengine.registry import MODELS

collate_preprocessor = model.preprocess
predict_by_feat = model.model.bbox_head.predict_by_feat
rescale = True

preprocessor = MODELS.build(model.cfg.model.data_preprocessor)
def add_pred_to_datasample(data_samples, results_list):
    for data_sample, pred_instances in zip(data_samples, results_list):
        data_sample.pred_instances = pred_instances
    samplelist_boxtype2tensor(data_samples)
    return data_samples

In [6]:
def eval_callback(model, use_cuda):
    data_loader = eval_data_loader
    new_preds = []
    for image_id, image_path, _ in tqdm(data_loader):
        pre_processed = collate_preprocessor(inputs=image_path, batch_size=BATCH_SIZE)
        _, data = list(pre_processed)[0]
        data = preprocessor(data, False)
        preds = model(data['inputs'].cuda())
        batch_img_metas = [
        data_samples.metainfo for data_samples in data['data_samples']
        ]
        preds = predict_by_feat(*preds, batch_img_metas=batch_img_metas, rescale=True)
        preds = add_pred_to_datasample(data['data_samples'], preds)
        
        for img_id, pred in zip(image_id, preds):
            pred = pred.pred_instances
            new_pred = InstanceData(metainfo={"img_id": int(img_id)})
            new_pred.bboxes = [np.array(p) for p in pred['bboxes'].cpu()]
            new_pred.labels = pred['labels'].cpu()
            new_pred.scores = pred['scores'].cpu()
            new_preds.append(new_pred)

    eval_results = test_evaluator(new_preds)
    num_file = len(glob("/teamspace/studios/this_studio/aimet/Examples/torch/quantization/eval_stats/eval_acc_*"))
    with open(f"/teamspace/studios/this_studio/aimet/Examples/torch/quantization/eval_stats/eval_acc_{num_file}.json", "w") as f:
        json.dump(eval_results, f, indent=4)
    bbox_map = eval_results['bbox_mAP']
    return bbox_map

In [7]:
def pass_calibration_data(model: torch.nn.Module, use_cuda):
    data_loader = eval_data_loader
    batch_size = data_loader.batch_size
    model.eval()
    samples = CALIBRATION_DATASET_SIZE
    batch_ctr = 0
    with torch.no_grad():
        for image_id, image_path, _ in tqdm(data_loader):
            pre_processed = collate_preprocessor(inputs=image_path, batch_size=BATCH_SIZE)
            _, data = list(pre_processed)[0]
            data = preprocessor(data, False)
            
            preds = model(data['inputs'].cuda())

            batch_ctr += 1
            if (batch_ctr * batch_size) > samples:
                break  

AIMET quantization simulation requires the user's model definition to follow certain guidelines.
For example, functionals defined in forward pass should be changed to equivalent torch.nn.Module.
AIMET user guide lists all these guidelines.

The following **ModelPreparer** API uses new graph transformation feature available in PyTorch 1.9+ version and automates model definition changes required to comply with the above guidelines.

In [8]:
from aimet_torch.model_preparer import prepare_model

model = prepare_model(model.model)

2024-08-09 09:30:21,147 - root - INFO - AIMET
2024-08-09 09:30:21,424 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.0.bn.module_batch_norm} 
2024-08-09 09:30:21,425 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.1.bn.module_batch_norm_1} 
2024-08-09 09:30:21,426 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.2.bn.module_batch_norm_2} 
2024-08-09 09:30:21,427 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.0.bn.module_batch_norm_3} 
2024-08-09 09:30:21,428 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.1.short_conv.bn.module_batch_norm_4} 
2024-08-09 09:30:21,428 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.1.main_conv.bn.module_batch_norm_5} 
2024-08-09 09:30:21,429 - ModelPreparer - INFO - Functional         : Adding new module f

---
We should decide whether to place the model on a CPU or CUDA device.
This example code will use CUDA if available in your current execution environment.
You can change this logic and force a device placement if needed.

In [9]:
# print("------------FP 32 MODEL MODULES ------------")
# print(dict(model.named_modules()))

In [10]:
use_cuda = False
if torch.cuda.is_available():
    use_cuda = True
    model.to(torch.device('cuda'))
use_cuda

True

---

## 3. Apply QuantAnalyzer to the model

QuantAnalyzer requires two functions to be defined by the user for passing data through the model:

**Forward pass callback**

One function will be used to pass representative data through a quantized version of the model to calibrate quantization parameters.
This function should be fairly simple - use the existing train or validation data loader to extract some samples and pass them to the model.
We don't need to compute any loss metrics, so we can just ignore the model output.

The function **must** take two arguments, the first of which will be the model to run the forward pass on.
The second argument can be anything additional which the function requires to run, and can be in the form of a single item or a tuple of items.

If no additional argument is needed, the user can specify a dummy "_" parameter for the function.

A few pointers regarding the forward pass data samples:

- In practice, we need a very small percentage of the overall data samples for computing encodings.
  For example, the training dataset for ImageNet has 1M samples. For computing encodings we only need 500 to 1000 samples.
- It may be beneficial if the samples used for computing encoding are well distributed.
  It's not necessary that all classes need to be covered since we are only looking at the range of values at every layer activation.
  However, we definitely want to avoid an extreme scenario like all 'dark' or 'light' samples are used - e.g. only using pictures captured at night might not give ideal results.

The following shows an example of a routine that passes unlabeled samples through the model for computing encodings.
This routine can be written in many ways; this is just an example.
This function only requires unlabeled data as no loss or other evaluation metric is needed.

In order to pass this function to QuantAnalyzer, we need to wrap it in a CallbackFunc object, as shown below.
The CallbackFunc takes two arguments: the callback function itself, and the inputs to pass into the callback function.

In [11]:
from aimet_torch.quant_analyzer import CallbackFunc

forward_pass_callback = CallbackFunc(pass_calibration_data, use_cuda)

---

**Evaluation callback**

The second function will be used to evaluate the model, and needs to return an accuracy metric.
In here, the user should pass any amount of data through the model which they would like when evaluating their model for accuracy.

Like the forward pass callback, this function also must take exactly two arguments: the model to evaluate, and any additional argument needed for the function to work.
The second argument can be a tuple of items in case multiple items are needed.

We will be using the ImageNetDataPipeline's evaluate defined above for this purpose.
Like the forward pass callback, we need to wrap the evaluation callback in a CallbackFunc object as well.

In [12]:
eval_callback = CallbackFunc(eval_callback, use_cuda)

---

**Enabling MSE loss per layer analysis**

An optional analysis step in QuantAnalyzer calculates the MSE loss per layer in the model, comparing the layer outputs from the original FP32 model vs. a quantized model.
To perform this step, the user needs to also provide an unlabeled DataLoader to QuantAnalyzer.

We will demonstrate this step by using the ImageNetDataLoader imported above.

In [13]:
data_loader = eval_data_loader

---

QuantAnalyzer also requires a dummy input to the model.
This dummy input does not need to be representative of the dataset.
All that matters is that the input shape is correct for the model to run on.

In [14]:
dummy_input = torch.rand(1, 3, 640, 640).cuda()    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

---
We are now ready to apply QuantAnalyzer.

In [15]:
module_names = dict(model.named_modules())
modules_to_ignore = ['backbone.stage2.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_14', 'backbone.stage1.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_7', 'backbone.stage2.1.blocks.0.conv2.pointwise_conv.conv', 'backbone.stage3.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_21', 'backbone.stage4.2.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_30', 'neck.top_down_blocks.0.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_37', 'neck.top_down_blocks.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_44', 'neck.bottom_up_blocks.0.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_51', 'neck.bottom_up_blocks.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_58']
modules_to_ignore = [module_names[m] for m in modules_to_ignore]

modules_to_ignore

[BatchNorm(),
 BatchNorm(),
 Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm()]

In [16]:
from aimet_torch.v2.quant_analyzer import QuantAnalyzer

quant_analyzer = QuantAnalyzer(model, dummy_input, forward_pass_callback, eval_callback, modules_to_ignore)

In [17]:
from aimet_common.defs import QuantScheme

quant_analyzer.analyze(quant_scheme=QuantScheme.post_training_tf_enhanced,
                       default_param_bw=8,
                       default_output_bw=8,
                       config_file=None,
                       results_dir="./tmp/")

2024-08-09 09:30:33,909 - BatchNormFolding - INFO - 0 BatchNorms' weights got converted
2024-08-09 09:30:39,861 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-08-09 09:30:39,893 - Quant - INFO - Unsupported op type Squeeze
2024-08-09 09:30:39,893 - Quant - INFO - Unsupported op type Mean
2024-08-09 09:30:39,907 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


 19%|█▉        | 12/63 [07:55<33:41, 39.65s/it]
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 63/63 [06:33<00:00,  6.24s/it]


08/09 09:45:45 - mmengine - [4m[97mINFO[0m - Evaluating bbox...
Loading and preparing results...
DONE (t=4.27s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=80.21s).
Accumulating evaluation results...
DONE (t=26.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.411
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.579
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.447
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.210
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.583
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.334
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.554
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.605

100%|██████████| 63/63 [06:38<00:00,  6.33s/it]


08/09 09:55:01 - mmengine - [4m[97mINFO[0m - Evaluating bbox...
Loading and preparing results...
DONE (t=5.77s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=77.50s).
Accumulating evaluation results...
DONE (t=28.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.385
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.551
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.419
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.202
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.322
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.535
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.588

100%|██████████| 63/63 [09:15<00:00,  8.81s/it]


08/09 10:06:51 - mmengine - [4m[97mINFO[0m - Evaluating bbox...
Loading and preparing results...
DONE (t=4.32s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=80.84s).
Accumulating evaluation results...
DONE (t=27.77s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.282
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.423
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.303
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.141
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.324
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.402
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.269
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.458
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.509

100%|██████████| 63/63 [09:16<00:00,  8.84s/it]


08/09 10:18:47 - mmengine - [4m[97mINFO[0m - Evaluating bbox...
Loading and preparing results...
DONE (t=5.41s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=80.47s).
Accumulating evaluation results...
DONE (t=27.76s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.272
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.410
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.293
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.133
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.308
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.401
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.265
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.452
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505

In [None]:
from aimet_common.defs import QuantScheme

sim = quant_analyzer._create_quantsim_and_encodings(quant_scheme=QuantScheme.post_training_tf_enhanced,
                       default_param_bw=8,
                       default_output_bw=8,
                       config_file=None)


2024-08-09 07:06:29,906 - BatchNormFolding - INFO - 0 BatchNorms' weights got converted
2024-08-09 07:06:36,145 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.10/dist-packages/aimet_common/quantsim_config/default_config.json
2024-08-09 07:06:36,177 - Quant - INFO - Unsupported op type Squeeze
2024-08-09 07:06:36,178 - Quant - INFO - Unsupported op type Mean
2024-08-09 07:06:36,191 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


 19%|█▉        | 12/63 [08:04<34:18, 40.36s/it]


In [28]:
dict(sim.model.named_modules())

{'': GraphModule(
   (backbone): Module(
     (stem): Module(
       (0): Module(
         (conv): QuantizedConv2d(
           3, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
           (param_quantizers): ModuleDict(
             (weight): QuantizeDequantize(shape=[1], bitwidth=8, symmetric=True)
           )
           (input_quantizers): ModuleList(
             (0): QuantizeDequantize(shape=[1], bitwidth=8, symmetric=False)
           )
           (output_quantizers): ModuleList(
             (0): QuantizeDequantize(shape=[1], bitwidth=8, symmetric=False)
           )
         )
         (bn): Module(
           (module_batch_norm): FakeQuantizedBatchNorm(
             (param_quantizers): ModuleDict()
             (input_quantizers): ModuleList(
               (0-4): 5 x QuantizeDequantize(shape=[1], bitwidth=8, symmetric=False)
             )
             (output_quantizers): ModuleList(
               (0): QuantizeDequantize(shape=[1], bitwidth=8, symmetric=F

In [None]:
sim.export(path="/teamspace/studios/this_studio/aimet/Examples/torch/quantization/sim_model_excluded_modules_embedded_encodings",
           filename_prefix="rtm_det",
           dummy_input=dummy_input.cpu(),
           use_embedded_encodings=True)

In [21]:
import torch
from mmdet.apis import DetInferencer

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize([640, 640]),  # Resize
])

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
CONFIG_PATH = '/teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco.py'
WEIGHTS_PATH = '/teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth'
EVAL_DATASET_SIZE = 5000
CALIBRATION_DATASET_SIZE = 1000
BATCH_SIZE = 80

ROOT_DATASET_DIR = '/teamspace/studios/this_studio/COCO'
IMAGES_DIR = os.path.join(ROOT_DATASET_DIR, 'images')
ANNOTATIONS_JSON_PATH = os.path.join(ROOT_DATASET_DIR, 'annotations/instances_val2017.json')
# ANNOTATIONS_JSON_PATH = "/home/shayaan/Desktop/aimet/my_mmdet/temp.json"

model = DetInferencer(model=CONFIG_PATH, weights=WEIGHTS_PATH, device=DEVICE)
evalDataset = CustomImageDataset(images_dir=IMAGES_DIR, annotations_json_path=ANNOTATIONS_JSON_PATH, transform=transform)
eval_data_loader = DataLoader(evalDataset, batch_size=BATCH_SIZE)

DEVICE

Loads checkpoint by local backend from path: /teamspace/studios/this_studio/mmdetection/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth
The model and loaded state dict do not match exactly

unexpected key in source state_dict: data_preprocessor.mean, data_preprocessor.std





device(type='cpu')

In [22]:
def pass_calibration_data(model: torch.nn.Module, samples: int):
    data_loader = eval_data_loader
    batch_size = data_loader.batch_size
    model.eval()
    batch_ctr = 0
    with torch.no_grad():
        for image_id, image_path, _ in tqdm(data_loader):
            pre_processed = collate_preprocessor(inputs=image_path, batch_size=BATCH_SIZE)
            _, data = list(pre_processed)[0]
            data = preprocessor(data, False)
            
            preds = model(data['inputs'].to(DEVICE))

            batch_ctr += 1
            if (batch_ctr * batch_size) > samples:
                break  

In [23]:
from aimet_torch.quantsim import QuantizationSimModel
from aimet_common.defs import QuantScheme
from aimet_torch.model_preparer import prepare_model

dummy_input = torch.rand(1, 3, 640, 640).to(DEVICE)  # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
model = prepare_model(model.model)
quant_sim = QuantizationSimModel(model=model,
                                quant_scheme=QuantScheme.post_training_tf_enhanced,
                                default_param_bw=8,
                                default_output_bw=8,
                                config_file=None,
                                dummy_input=dummy_input)

### if load encodings
quant_sim.load_encodings(encodings="/teamspace/studios/this_studio/aimet/Examples/torch/quantization/sim_model_excluded_modules/rtm_det_torch.encodings")

### else compute encodings
quant_sim.compute_encodings(pass_calibration_data, CALIBRATION_DATASET_SIZE)

2024-08-21 09:32:05,078 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.0.bn.module_batch_norm} 
2024-08-21 09:32:05,079 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.1.bn.module_batch_norm_1} 
2024-08-21 09:32:05,080 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stem.2.bn.module_batch_norm_2} 
2024-08-21 09:32:05,081 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.0.bn.module_batch_norm_3} 
2024-08-21 09:32:05,082 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.1.short_conv.bn.module_batch_norm_4} 
2024-08-21 09:32:05,082 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.1.main_conv.bn.module_batch_norm_5} 
2024-08-21 09:32:05,083 - ModelPreparer - INFO - Functional         : Adding new module for node: {backbone.stage1.1.blocks.0.conv1.bn.

2024-08-21 09:32:05,152 - ModelPreparer - INFO - Functional         : Adding new module for node: {neck.out_convs.2.bn.module_batch_norm_63} 
2024-08-21 09:32:05,155 - ModelPreparer - INFO - Functional         : Adding new module for node: {bbox_head.cls_convs.0.0.bn.module_batch_norm_64} 
2024-08-21 09:32:05,156 - ModelPreparer - INFO - Functional         : Adding new module for node: {bbox_head.cls_convs.0.1.bn.module_batch_norm_65} 
2024-08-21 09:32:05,157 - ModelPreparer - INFO - Functional         : Adding new module for node: {bbox_head.reg_convs.0.0.bn.module_batch_norm_66} 
2024-08-21 09:32:05,158 - ModelPreparer - INFO - Functional         : Adding new module for node: {bbox_head.reg_convs.0.1.bn.module_batch_norm_67} 
2024-08-21 09:32:05,158 - ModelPreparer - INFO - Functional         : Adding new module for node: {bbox_head.module_mul_4} 
2024-08-21 09:32:05,159 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {bbox_head.cls_convs.0.0.module_conv_1} 

  0%|          | 0/63 [00:00<?, ?it/s]


NameError: name 'collate_preprocessor' is not defined

In [8]:
module_names = dict(model.named_modules())
modules_to_ignore = ['backbone.stage2.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_14', 'backbone.stage1.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_7', 'backbone.stage2.1.blocks.0.conv2.pointwise_conv.conv', 'backbone.stage3.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_21', 'backbone.stage4.2.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_30', 'neck.top_down_blocks.0.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_37', 'neck.top_down_blocks.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_44', 'neck.bottom_up_blocks.0.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_51', 'neck.bottom_up_blocks.1.blocks.0.conv2.depthwise_conv.bn.module_batch_norm_58']
modules_to_ignore = [module_names[m] for m in modules_to_ignore]

modules_to_ignore

[BatchNorm(),
 BatchNorm(),
 Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm(),
 BatchNorm()]

In [9]:
def exclude_modules_from_quant(model, sim, modules_to_ignore):
    name_to_quant_wrapper_dict = {}
    for name, module in sim.model.named_modules():
        name_to_quant_wrapper_dict[name] = module

    module_to_name_dict = {}
    for name, module in model.named_modules():
        module_to_name_dict[module] = name
    print(list(module_to_name_dict.keys())[49])
    print(modules_to_ignore)
    quant_wrappers_to_ignore = []
    for module in modules_to_ignore:
        
        name = module_to_name_dict[module]
        quant_wrapper = name_to_quant_wrapper_dict[name]
        quant_wrappers_to_ignore.append(quant_wrapper)

    sim.exclude_layers_from_quantization(quant_wrappers_to_ignore)

exclude_modules_from_quant(model, quant_sim, modules_to_ignore)

Module(
  (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): Module(
    (module_batch_norm_6): BatchNorm()
  )
  (activate): CustomSiLU(
    (sigmoid): Sigmoid()
    (mul): Multiply()
  )
)
[BatchNorm(), BatchNorm(), Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False), BatchNorm(), BatchNorm(), BatchNorm(), BatchNorm(), BatchNorm(), BatchNorm()]


In [10]:
quant_sim.model

GraphModule(
  (backbone): Module(
    (stem): Module(
      (0): Module(
        (conv): StaticGridQuantWrapper(
          (_module_to_wrap): Conv2d(3, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        )
        (bn): Module(
          (module_batch_norm): StaticGridQuantWrapper(
            (_module_to_wrap): BatchNorm()
          )
        )
        (activate): CustomSiLU(
          (sigmoid): StaticGridQuantWrapper(
            (_module_to_wrap): Sigmoid()
          )
          (mul): StaticGridQuantWrapper(
            (_module_to_wrap): Multiply()
          )
        )
      )
      (1): Module(
        (conv): StaticGridQuantWrapper(
          (_module_to_wrap): Conv2d(12, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (bn): Module(
          (module_batch_norm_1): StaticGridQuantWrapper(
            (_module_to_wrap): BatchNorm()
          )
        )
        (activate): CustomSiLU(
          (sigmoid): StaticGridQu