In [1]:
import torch
import torch.cuda
from tqdm import tqdm

In [2]:
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

model = mobilenet_v2(weights = MobileNet_V2_Weights.DEFAULT).cuda()


Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /home/centar15-desktop1/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 86.3MB/s]


In [3]:
from aimet_common.defs import QuantScheme
from aimet_common.quantsim_config.utils import get_path_for_per_channel_config
from aimet_torch.quantsim import QuantizationSimModel

input_shape = (1, 3, 224, 224)
dummy_input = torch.randn(input_shape).cuda()

sim = QuantizationSimModel(model,
                           dummy_input,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           default_param_bw=8,
                           default_output_bw=16,
                           #config_file=get_path_for_per_channel_config())
)


2025-02-19 17:35:24,913 - root - INFO - AIMET


  param_schemas = callee.param_schemas()


2025-02-19 17:35:26,258 - Quant - INFO - No config file provided, defaulting to config file at /home/centar15-desktop1/LPCV_2025_T1/.venv/lib/python3.12/site-packages/aimet_common/quantsim_config/default_config.json
2025-02-19 17:35:26,267 - Quant - INFO - Unsupported op type Squeeze
2025-02-19 17:35:26,267 - Quant - INFO - Unsupported op type Mean
2025-02-19 17:35:26,270 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


In [4]:
print(sim.model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): QuantizedConv2d(
        3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
        (param_quantizers): ModuleDict(
          (weight): QuantizeDequantize(shape=(), qmin=-128, qmax=127, symmetric=True)
        )
        (input_quantizers): ModuleList(
          (0): QuantizeDequantize(shape=(), qmin=0, qmax=65535, symmetric=False)
        )
        (output_quantizers): ModuleList(
          (0): None
        )
      )
      (1): QuantizedBatchNorm2d(
        32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (param_quantizers): ModuleDict(
          (weight): None
          (bias): None
        )
        (input_quantizers): ModuleList(
          (0): None
        )
        (output_quantizers): ModuleList(
          (0): None
        )
      )
      (2): QuantizedReLU6(
        inplace=True
        (param_quantizers): ModuleDict()
        (input_quantizers): ModuleL

In [20]:
# sim.model(dummy_input)
sim.model.features[0][0]

QuantizedConv2d(
  3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
  (param_quantizers): ModuleDict(
    (weight): QuantizeDequantize(shape=(), qmin=-128, qmax=127, symmetric=True)
  )
  (input_quantizers): ModuleList(
    (0): QuantizeDequantize(shape=(), qmin=0, qmax=65535, symmetric=False)
  )
  (output_quantizers): ModuleList(
    (0): None
  )
)

In [21]:
model.features[0][0]

Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

In [6]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os
import sys

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(ROOT_DIR)

from dataset.DatasetReader import COCODataset
import dataset.utils as dsutils

def get_calibration_data_loader():
    transform = transforms.Compose([transforms.Resize(256),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    datasetCOCO = COCODataset(
    annotation_file = r"/home/centar15-desktop1/LPCV_2025_T1/datasets/coco/annotations/instances_val2017.json", 
    image_dir= r'/home/centar15-desktop1/LPCV_2025_T1/datasets/coco/val2017',
    target_classes=[s.lower() for s in dsutils.GLOBAL_CLASSES],
    transform = transform)

    dataloader = DataLoader(datasetCOCO, batch_size=16, shuffle=True)
    return dataloader

dataloader = get_calibration_data_loader()

In [7]:
def pass_calibration_data(model: torch.nn.Module, dataloader):
    num_batches = 32

    model.eval()

    with torch.no_grad():
        for batch, (input_data, _) in enumerate(dataloader):
            if batch >= num_batches:
                break
            inputs_batch = input_data.to("cuda")
            asdf = model(inputs_batch)
            print(asdf)

In [8]:
sim.compute_encodings(pass_calibration_data, forward_pass_callback_args=dataloader)

tensor([[-0.2864,  0.4086, -0.1682,  ..., -1.0524, -0.4447,  1.1467],
        [ 0.1579, -0.5318, -0.4925,  ..., -0.3671, -0.0064, -1.6205],
        [-0.2782,  0.1649, -0.2332,  ...,  0.5585,  0.2680, -0.8378],
        ...,
        [-0.5398,  0.6756, -0.5106,  ..., -0.8966, -0.3162,  0.6858],
        [-0.1569,  0.0467, -0.2110,  ..., -0.3236,  0.4402, -0.7001],
        [ 0.1008,  0.6660, -0.6845,  ..., -0.6017,  0.5578,  0.3483]],
       device='cuda:0')
tensor([[-0.4694,  0.0800, -0.0060,  ..., -0.7170, -1.2197, -1.2221],
        [-0.0178, -1.0593, -0.6007,  ..., -0.5795,  0.5358, -0.0333],
        [-0.4527, -0.7011, -0.3270,  ..., -0.4895,  1.6511, -0.4237],
        ...,
        [-0.7852, -0.3666, -0.0877,  ..., -0.1127, -0.2861, -0.0596],
        [-0.5309, -0.1522, -0.5360,  ..., -0.8239,  0.9017, -0.5682],
        [ 0.1828,  0.4876, -1.1630,  ..., -0.0969,  1.7014, -1.0652]],
       device='cuda:0')
tensor([[ 0.2538,  0.2936, -0.9496,  ..., -0.2193,  2.4241, -0.6486],
        [ 0.08

In [9]:
print(sim.model(dummy_input))

DequantizedTensor([[ 3.8063e-01,  9.4928e-02, -1.2205e-01, -6.0017e-01,
                    -7.8252e-02, -1.8381e-01, -8.2650e-02,  2.8937e-01,
                     4.7098e-02, -2.1845e-01, -2.2303e-01, -2.5308e-01,
                    -2.6957e-01,  2.5950e-01, -2.9230e-01, -1.6127e-01,
                    -3.0769e-01, -9.6578e-02,  5.3145e-03,  1.6530e-01,
                    -2.5290e-02, -3.7055e-01, -1.7886e-01, -5.8038e-01,
                    -4.9700e-01, -4.0170e-01, -3.4526e-01, -1.8583e-01,
                    -3.9144e-01, -1.7758e-01, -3.5955e-01, -2.8314e-01,
                    -5.3988e-01, -2.8075e-01, -6.5973e-03,  4.3616e-02,
                     9.9143e-02,  1.0812e-02, -4.6273e-01, -5.2595e-02,
                     2.1991e-03, -4.0335e-01, -2.9853e-01,  3.6652e-04,
                    -1.8033e-01, -4.6218e-01, -2.4832e-01, -4.3377e-01,
                    -3.3683e-01, -4.3158e-01, -2.1625e-01,  1.0189e-01,
                    -7.3670e-02, -1.6493e-03, -2.9798e-01, -5.20

In [10]:
print(sim.model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): QuantizedConv2d(
        3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
        (param_quantizers): ModuleDict(
          (weight): QuantizeDequantize(shape=(), qmin=-128, qmax=127, symmetric=True)
        )
        (input_quantizers): ModuleList(
          (0): QuantizeDequantize(shape=(), qmin=0, qmax=65535, symmetric=False)
        )
        (output_quantizers): ModuleList(
          (0): None
        )
      )
      (1): QuantizedBatchNorm2d(
        32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (param_quantizers): ModuleDict(
          (weight): None
          (bias): None
        )
        (input_quantizers): ModuleList(
          (0): None
        )
        (output_quantizers): ModuleList(
          (0): None
        )
      )
      (2): QuantizedReLU6(
        inplace=True
        (param_quantizers): ModuleDict()
        (input_quantizers): ModuleL

In [12]:
sim.export(path='tmp', filename_prefix='quantized_mobilenet_v2', dummy_input=dummy_input.cpu())

2025-02-19 17:37:29,436 - Utils - INFO - successfully created onnx model with 140/222 node names updated
['classifier.0']
This can be due to several reasons:
	- The layer is set to quantize with float datatype, but was not exercised in compute encodings. Not an issue if the layer is not meant to be run.
	- The layer has valid encodings but was not seen while exporting to onnx using the dummy input provided in sim.export(). Ensure that the dummy input covers all layers.
2025-02-19 17:37:29,483 - Quant - INFO - Layers excluded from quantization: []
To export encodings for saving and loading, use QuantizationSimModel's save_encodings_to_json() utility instead.[0m


In [18]:

for name, param in sim.model.named_parameters():
    print(f"Layer: {name}")
    print(f"Weights: {param.data}")  # Use .data to access the raw tensor
    print(f"Shape: {param.shape}\n")


Layer: features.0.0.weight
Weights: tensor([[[[-6.3108e-02, -1.8766e-01, -1.5188e-01],
          [-4.9379e-01, -6.4248e-01, -5.8935e-01],
          [-6.8005e-01, -9.7448e-01, -7.6317e-01]],

         [[-1.6350e-02, -1.8482e-02,  6.2783e-02],
          [ 3.5436e-02,  5.8980e-02,  1.0693e-01],
          [ 1.6995e-01,  1.4699e-01,  1.8521e-01]],

         [[ 1.1395e-01,  1.6316e-01,  1.0483e-01],
          [ 4.0824e-01,  5.7489e-01,  4.7270e-01],
          [ 5.7547e-01,  7.1503e-01,  5.3702e-01]]],


        [[[ 2.9983e-03,  1.4297e-02,  5.9918e-02],
          [ 5.5779e-03,  3.0330e-02, -4.5050e-02],
          [ 1.2600e-01,  5.5075e-02, -9.2239e-01]],

         [[ 4.2316e-03, -6.6995e-02, -7.4151e-02],
          [ 1.4910e-02,  1.5604e-02, -6.8112e-02],
          [ 2.4632e-02, -4.4099e-02, -7.3383e-01]],

         [[-9.1925e-03, -3.3236e-02,  1.0019e-01],
          [ 3.9586e-03,  5.4139e-02,  1.0663e-01],
          [ 1.0968e-01,  3.5723e-02, -3.9510e-01]]],


        [[[ 1.2649e-01, -1.502

In [19]:
for name, param in model.named_parameters():
    print(f"Layer: {name}")
    print(f"Weights: {param.data}")  # Use .data to access the raw tensor
    print(f"Shape: {param.shape}\n")

Layer: features.0.0.weight
Weights: tensor([[[[-6.3108e-02, -1.8766e-01, -1.5188e-01],
          [-4.9379e-01, -6.4248e-01, -5.8935e-01],
          [-6.8005e-01, -9.7448e-01, -7.6317e-01]],

         [[-1.6350e-02, -1.8482e-02,  6.2783e-02],
          [ 3.5436e-02,  5.8980e-02,  1.0693e-01],
          [ 1.6995e-01,  1.4699e-01,  1.8521e-01]],

         [[ 1.1395e-01,  1.6316e-01,  1.0483e-01],
          [ 4.0824e-01,  5.7489e-01,  4.7270e-01],
          [ 5.7547e-01,  7.1503e-01,  5.3702e-01]]],


        [[[ 2.9983e-03,  1.4297e-02,  5.9918e-02],
          [ 5.5779e-03,  3.0330e-02, -4.5050e-02],
          [ 1.2600e-01,  5.5075e-02, -9.2239e-01]],

         [[ 4.2316e-03, -6.6995e-02, -7.4151e-02],
          [ 1.4910e-02,  1.5604e-02, -6.8112e-02],
          [ 2.4632e-02, -4.4099e-02, -7.3383e-01]],

         [[-9.1925e-03, -3.3236e-02,  1.0019e-01],
          [ 3.9586e-03,  5.4139e-02,  1.0663e-01],
          [ 1.0968e-01,  3.5723e-02, -3.9510e-01]]],


        [[[ 1.2649e-01, -1.502

In [24]:
print('pls')
sim.get_activation_param_encodings()
print('wk')

pls
wk
