In [1]:
import json
def read_text_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return [l.strip() for l in lines]

def read_annot_file(file_path):
    annots = read_text_file(file_path)
    annots = {l.split(" ")[0]: int(l.split(" ")[1]) for l in annots}
    return annots

In [2]:
from torchvision import transforms
ANNOT_FILE = "/datasets/imagenet1k/tags.txt"
DATASET_DIR = '/datasets/imagenet1k/new_images'
image_size = 224
images_mean = [0.485, 0.456, 0.406]
images_std  = [0.229, 0.224, 0.225]

normalize = transforms.Normalize(mean=images_mean,
                                std=images_std)
transforms = transforms.Compose([
            transforms.Resize(image_size + 24),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize])
annots = read_annot_file(ANNOT_FILE)

In [3]:
import os
from glob import glob
import torch
from PIL import Image

class ImagenetDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_dir, annots_dict, transform=None):
        self.dataset_dir = dataset_dir
        self.image_paths = glob(os.path.join(dataset_dir, '*.JPEG'))
        self.transform = transform
        self.annots_dict = annots_dict
        

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        # print(f"{image_path=} \t {idx=}")
        image = Image.open(image_path)
        image = image.convert("RGB")
        image = self.transform(image) if self.transform else image
        label = self.annots_dict[os.path.basename(image_path)]
        return image, label

In [4]:
dataset = ImagenetDataset(DATASET_DIR, annots, transform=transforms)
dataloader = torch.utils.data.DataLoader(
            dataset, batch_size=1, shuffle=False)

In [5]:
import torch.nn as nn
from tqdm import tqdm

import torch.utils

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""

    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))

    return res

def evaluate(model: nn.Module, data_loader: torch.utils.data.DataLoader, iterations: int = None, use_cuda: bool = False) -> float:
    """
    Evaluate the specified model using the specified number of samples batches from the
    validation set.
    :param model: The model to be evaluated.
    :param iterations: The number of batches to use from the validation set.
    :param use_cuda: If True then use a GPU for inference.
    :return: The accuracy for the sample with the maximum accuracy.
    """

    device = torch.device('cpu')
    if use_cuda:
        if torch.cuda.is_available():
            device = torch.device('cuda')
        else:
            print('use_cuda is selected but no cuda device found.')
            raise RuntimeError("Found no CUDA Device while use_cuda is selected")

    if iterations is None:
        print('No value of iteration is provided, running evaluation on complete dataset.')
        iterations = len(data_loader)
    if iterations <= 0:
        print('Cannot evaluate on %d iterations', iterations)

    acc_top1 = 0
    acc_top5 = 0

    print("Evaluating nn.Module for %d iterations with batch_size %d",
                iterations, data_loader.batch_size)

    model = model.to(device)
    model = model.eval()

    with torch.no_grad():
        for i, (input_data, target_data) in tqdm(enumerate(data_loader), total=iterations):
            if i == iterations:
                break
            inputs_batch = input_data.to(device)
            target_batch = target_data.to(device)

            predicted_batch = model(inputs_batch)

            batch_avg_top_1_5 = accuracy(output=predicted_batch, target=target_batch,
                                            topk=(1, 5))

            acc_top1 += batch_avg_top_1_5[0].item()
            acc_top5 += batch_avg_top_1_5[1].item()

    acc_top1 /= iterations
    acc_top5 /= iterations

    print(f"Avg accuracy Top 1: {acc_top1}%\nAvg accuracy Top 5: {acc_top5}%\non validation Dataset")

    return acc_top1

In [6]:
# evaluate(model, dataloader, iterations=5000, use_cuda=False)

---

## 2. Load the model and evaluate to get a baseline FP32 accuracy score

**2.1 Load a pretrained resnet18 model from torchvision.** 

You can load any pretrained PyTorch model instead.

In [7]:
from torchvision.models import resnet18

model = resnet18(pretrained=True)



AIMET quantization simulation requires the model definition to follow certain guidelines. For example, functionals defined in the forward pass should be changed to the equivalent **torch.nn.Module**.
The [AIMET user guide](https://quic.github.io/aimet-pages/releases/latest/user_guide/index.html) lists all these guidelines.

**2.2 Use the following ModelPreparer API call to automate the model definition changes required to comply with the AIMET guidelines.** 

The call uses the graph transformation feature available in PyTorch 1.9+.

In [8]:
from aimet_torch.model_preparer import prepare_model

model = prepare_model(model)

2025-01-02 09:20:44,273 - root - INFO - AIMET
2025-01-02 09:20:44,323 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer1.0.module_add} 
2025-01-02 09:20:44,323 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.0.module_relu_1} 
2025-01-02 09:20:44,324 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer1.1.module_add_1} 
2025-01-02 09:20:44,324 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer1.1.module_relu_1} 
2025-01-02 09:20:44,325 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer2.0.module_add_2} 
2025-01-02 09:20:44,325 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for node: {layer2.0.module_relu_1} 
2025-01-02 09:20:44,325 - ModelPreparer - INFO - Functional         : Adding new module for node: {layer2.1.module_add_3} 
2025-01-02 09:20:44,326 - ModelPreparer - INFO - Reused/Duplicate   : Adding new module for 

---

**2.3 Decide whether to place the model on a CPU or CUDA device.** 

This example uses CUDA if it is available. You can change this logic and force a device placement if needed.

In [9]:
use_cuda = False
if torch.cuda.is_available():
    use_cuda = True
    model.to(torch.device('cuda'))
use_cuda

False

---

**2.4 Compute the floating point 32-bit (FP32) accuracy of this model using the evaluate() routine.**

In [10]:
evaluate(model, dataloader, iterations=5000, use_cuda=False)

Evaluating nn.Module for %d iterations with batch_size %d 5000 1


100%|██████████| 5000/5000 [01:08<00:00, 72.78it/s]

Avg accuracy Top 1: 69.56%
Avg accuracy Top 5: 88.82%
on validation Dataset





69.56

---

## 3. Create a quantization simulation model and determine quantized accuracy

### Fold Batch Norm layers

Before calculating the simulated quantized accuracy using QuantizationSimModel, fold the BatchNorm (BN) layers into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

BN folding improves inference performance on quantized runtimes but can degrade accuracy on these platforms. This step simulates this on-target drop in accuracy. 

**3.1 Use the following code to call AIMET to fold the BN layers in-place on the given model.**

In [11]:
from aimet_torch.batch_norm_fold import fold_all_batch_norms

_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

[2025-01-02 09:21:53,470] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cpu (auto detect)


In [12]:
len(_)

20

### Create the Quantization Sim Model

**3.2 Use AIMET to create a QuantizationSimModel.**

 In this step, AIMET inserts fake quantization ops in the model graph and configures them.

Key parameters:

- Setting **default_output_bw** to 8 performs all activation quantizations in the model using integer 8-bit precision
- Setting **default_param_bw** to 8 performs all parameter quantizations in the model using integer 8-bit precision
- **num_batches** is the number of batches to use to compute encodings. Only five batches are used here for the sake of speed

See [QuantizationSimModel in the AIMET API documentation](https://quic.github.io/aimet-pages/AimetDocs/api_docs/torch_quantsim.html#aimet_torch.quantsim.QuantizationSimModel.compute_encodings) for a full explanation of the parameters.

In [13]:
from aimet_common.defs import QuantScheme
from aimet_torch.v1.quantsim import QuantizationSimModel

dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
if use_cuda:
    dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=8,
                           default_param_bw=8,
                           config_file="/home/shayan/Desktop/aimet/my_config.json")

2025-01-02 09:21:54,515 - Quant - INFO - Unsupported op type Squeeze
2025-01-02 09:21:54,516 - Quant - INFO - Unsupported op type Mean
2025-01-02 09:21:54,517 - Quant - INFO - Selecting DefaultOpInstanceConfigGenerator to compute the specialized config. hw_version:default


In [14]:
from tqdm import tqdm

def pass_calibration_data(sim_model, use_cuda):
    dataloader = torch.utils.data.DataLoader(
            dataset, batch_size=1, shuffle=False)
    batch_size = dataloader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    idx = 0
    with torch.no_grad():
        for input_data, target_data in tqdm(dataloader):
            # if "cf135f199d8c7a9d0dce9aa35acfb4c70c14e0aa" not in path[0]:
            #     continue
            # if "cf" in path[0]:
            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)
            batch_cntr += 1
            if batch_cntr * batch_size >= samples:
                break


In [15]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=use_cuda)

  2%|▏         | 999/50000 [01:12<59:36, 13.70it/s]  


### Int 8 quantization accuracy

In [16]:
evaluate(sim.model, dataloader, iterations=5000, use_cuda=False)

Evaluating nn.Module for %d iterations with batch_size %d 5000 1


100%|██████████| 5000/5000 [06:55<00:00, 12.05it/s]

Avg accuracy Top 1: 69.22%
Avg accuracy Top 5: 88.5%
on validation Dataset





69.22

### 4 BIT Quantization accuracy

In [16]:
evaluate(sim.model, dataloader, iterations=5000, use_cuda=False)

Evaluating nn.Module for %d iterations with batch_size %d 5000 1


100%|██████████| 5000/5000 [07:02<00:00, 11.83it/s]

Avg accuracy Top 1: 1.38%
Avg accuracy Top 5: 6.62%
on validation Dataset





1.38