# Post Training Quantization a Pytorch Object Detection Model - A Quick-Start Guide

[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/example_pytorch_ssdlite_mobilenetv3.ipynb)

## Overview

This tutorial shows how to quantize a pre-trained object detection model from the torchvision package using the Model-Compression-Toolkit (MCT). We will do so by giving an example of MCT's post-training quantization. As we will see, post-training quantization is a low complexity yet effective quantization method. In this example, we will quantize the model and evaluate the accuracy before and after quantization.

As the pretrained object detection model contains a preprocessing and postprocessing layers that their quantization with MCT is out of this notebook's scope, we'll separate these layers from the model-to-quantize. These layers will be included in the evaluation code.

## Summary

In this tutorial we will cover:

1. Post-Training Quantization using MCT.
2. Loading and preprocessing COCO's validation dataset.
3. Loading and preprocessing an unlabeled representative dataset from the COCO trainset.
4. Accuracy evaluation of the floating-point and the quantized models.

## Setup

Install and import the relevant packages:

In [None]:
!pip install -q torch torchvision torchaudio
!pip install -q pycocotools
!pip install -q model-compression-toolkit

In [None]:
import torch
import torchvision
from torchvision.models.detection.ssdlite import SSDLite320_MobileNet_V3_Large_Weights
from torchvision.models.detection.anchor_utils import ImageList
import model_compression_toolkit as mct
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

## Float Model

### Load float model

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

image_size = (320, 320)
model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)
# mAP=0.2131 (float)
# mAP=0.2007 (quantized)

model.eval()
model = model.to(device)
print('model loaded')

### Evaluate float model

#### Create the COCO evaluation metric

In [None]:
def format_results(outputs, img_ids):
    detections = []

    # Process model outputs and convert to detection format
    for idx, output in enumerate(outputs):
        image_id = img_ids[idx]  # Adjust according to your batch size and indexing
        scores = output['scores'].cpu().numpy()
        labels = output['labels'].cpu().numpy()
        boxes = output['boxes'].cpu().numpy()

        for score, label, box in zip(scores, labels, boxes):
            detection = {
                "image_id": image_id,
                "category_id": label,
                "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]],
                "score": score
            }
            detections.append(detection)

    return detections


class CocoEval:
    def __init__(self, path2json):

        # Load ground truth annotations
        self.coco_gt = COCO(path2json)

        # A list of reformatted model outputs
        self.all_detections = []

    def add_batch_detections(self, outputs, targets):

        # Collect and format results from the batch
        img_ids, _outs = [], []
        for t, o in zip(targets, outputs):
            if len(t) > 0:
                img_ids.append(t[0]['image_id'])
                _outs.append(o)

        batch_detections = format_results(_outs, img_ids)  # Implement this function

        self.all_detections.extend(batch_detections)

    def result(self):
        # Initialize COCO evaluation object
        self.coco_dt = self.coco_gt.loadRes(self.all_detections)
        coco_eval = COCOeval(self.coco_gt, self.coco_dt, 'bbox')

        # Run evaluation
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        # Print mAP results
        print("mAP: {:.4f}".format(coco_eval.stats[0]))

        return coco_eval.stats

    def reset(self):
        self.all_detections = []

#### Evaluate float model

In [None]:
EVAL_DATASET_FOLDER = '/path/to/coco/evaluation/images/val2017'
EVAL_DATASET_ANNOTATION_FILE = '/path/to/coco/annotations/instances_val2017.json'


# The float model accepts a list of images in their original shapes and preprocesses them inside, so collate the batch images as a list
def collate_fn(batch_input):
    images = [b[0] for b in batch_input]
    targets = [b[1] for b in batch_input]
    return images, targets


# Initialize the COCO evaluation DataLoader
coco_eval = torchvision.datasets.CocoDetection(root=EVAL_DATASET_FOLDER,
                                               annFile=EVAL_DATASET_ANNOTATION_FILE,
                                               transform=torchvision.transforms.ToTensor())
batch_size = 50
data_loader = torch.utils.data.DataLoader(coco_eval, batch_size=batch_size, shuffle=False,
                                          num_workers=0, collate_fn=collate_fn)

# Initialize the evaluation metric object
coco_metric = CocoEval(EVAL_DATASET_ANNOTATION_FILE)

# Iterate and evaluate the COCO evaluation set
for batch_idx, (images, targets) in enumerate(data_loader):
    # Run inference on the batch
    images = list(image.to(device) for image in images)
    with torch.no_grad():
        outputs = model(images)

    # Add the model outputs to metric object (a dictionary of outputs after postprocess: boxes, scores & classes)
    coco_metric.add_batch_detections(outputs, targets)
    if (batch_idx+1) % 10 == 0:
        print(f'processed {(batch_idx+1)*data_loader.batch_size} images')

# Print float model mAP results
print("Float model mAP: {:.4f}".format(coco_metric.result()[0]))


## Quantize Model

### Extract model to be quantized

Extract the float model's backcone and head, and construct a torch model that only contains them

In [None]:
class SDD4Quant(torch.nn.Module):
    def __init__(self, in_sdd, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Save the float model under self.base as a module of the model. Later we'll only run "backbone" & "head"
        self.add_module("base", in_sdd)

    # Forward pass of the model to be quantized. This code is copied from the float model forward function (removed the preprocess and postprocess code)
    def forward(self, x):
        features = self.base.backbone(x)

        features = list(features.values())

        # compute the ssd heads outputs using the features
        head_outputs = self.base.head(features)
        return head_outputs


model4quant = SDD4Quant(model)

### Extract preproecss and postprocess

Extract the preprocess and postprocess functions from the float model object, and construct separate preprocess and postprocess functions for the representative dataset and evaluation code


Note: the MCT output model flattens the float model output data structure to a list, so the PostProcess manually rebuilds it as the original data structure (a dictionary).

In [None]:
def preprocess(image, targets):
    # need to save the original image sizes before resize for the postprocess part
    targets = {'gt': targets, 'img_size': list(image.size[::-1])}
    image = model.transform([torchvision.transforms.ToTensor()(image)])[0].tensors[0, ...]
    return image, targets


# Define the postprocess, which is the code copied from the float model forward code. These layers will not be quantized.
class PostProcess:
    def __init__(self):
        self.features = [torch.zeros((1, 1, s, s)) for s in [20, 10, 5, 3, 2, 1]]

    def __call__(self, head_outputs, image_list, original_image_sizes):
        anchors = [a.to(device) for a in model.anchor_generator(image_list, self.features)]

        # The MCT flattens the outputs of the head to a list, so need to change it to a dictionary as the psotprocess functions expect.
        if not isinstance(head_outputs, dict):
            if head_outputs[0].shape[-1] == 4:
                head_outputs = {"bbox_regression": head_outputs[0],
                                "cls_logits": head_outputs[1]}
            else:
                head_outputs = {"bbox_regression": head_outputs[1],
                                "cls_logits": head_outputs[0]}

        # Float model postprocess functions that handle box regression and NMS
        detections = model.postprocess_detections(head_outputs, anchors, image_list.image_sizes)
        detections = model.transform.postprocess(detections, image_list.image_sizes, original_image_sizes)
        return detections


postprocess = PostProcess()

### Dataset preparation

Assuming we've downloaded the COCO dataset to a folder, let's set the folder path:

In [None]:
TRAIN_DATASET_FOLDER = '/path/to/coco/training/images/train2017'
TRAIN_DATASET_ANNOTATION_FILE = '/path/to/coco/annotations/instances_train2017.json'

Now, let's create two dataset loader objects:
* Train DataLoader that we'll use to create the representative dataset for the quantization calibration.
* Evaluation DataLoader that we'll use the evaluate the quantized model.

Note that both objects include the "preprocess" function defined above.

In [None]:
def train_collate_fn(batch_input):
    # collating images for the quantized model should return a single tensor: [B, C, H, W]
    images = torch.stack([b[0] for b in batch_input])
    targets = [b[1] for b in batch_input]
    return images, targets


coco_train = torchvision.datasets.CocoDetection(root=TRAIN_DATASET_FOLDER, annFile=TRAIN_DATASET_ANNOTATION_FILE,
                                                transforms=preprocess)
train_loader = torch.utils.data.DataLoader(coco_train, batch_size=16, shuffle=False, num_workers=0,
                                           collate_fn=train_collate_fn)

coco_eval = torchvision.datasets.CocoDetection(root=EVAL_DATASET_FOLDER, annFile=EVAL_DATASET_ANNOTATION_FILE,
                                               transforms=preprocess)
eval_loader = torch.utils.data.DataLoader(coco_eval, batch_size=50, shuffle=False, num_workers=0,
                                          collate_fn=train_collate_fn)

### Quantize the model

In [None]:
def get_representative_dataset(n_iter):
    
    def representative_dataset():
        ds_iter = iter(train_loader)
        for _ in range(n_iter):
            yield [next(ds_iter)[0]]

    return representative_dataset


quant_model, _ = mct.ptq.pytorch_post_training_quantization_experimental(model4quant,
                                                                         get_representative_dataset(20))

### Evaluate quantized model

In [None]:
coco_metric = CocoEval(EVAL_DATASET_ANNOTATION_FILE)
for batch_idx, (images, targets) in enumerate(eval_loader):
    # Run inference on the batch
    with torch.no_grad():
        outputs = quant_model(images.to(device))
    
    image_hw = [t['img_size'] for t in targets]
    image_list = ImageList(images, [image_size] * images.shape[0])
    detections = postprocess(outputs, image_list, image_hw)

    coco_metric.add_batch_detections(detections, [t['gt'] for t in targets])
    if (batch_idx+1) % 10 == 0:
        print(f'processed {(batch_idx+1)*data_loader.batch_size} images')

# Print mAP results
print("Quantized model mAP: {:.4f}".format(coco_metric.result()[0]))
