# Post-trainin-Quantization for CIFAR10
We will do the following steps in order
1. Load and Normalize CIFAR10
2. Define Quant Model
3. Load Pretrained Model and Test
4. Calibration
5. Test Quant Model

In [13]:
# 导入相关的库函数
import argparse
import os
import random
import shutil
import time
import warnings
warnings.filterwarnings("ignore")
from enum import Enum
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
from torch.optim.lr_scheduler import StepLR
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from model import resnet20

from sparsebit.quantization import QuantModel, parse_qconfig

## 1.Load and Normalize CIFAR10

In [14]:
transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)
trainset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=128,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
)
testset = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=128,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
)

Files already downloaded and verified
Files already downloaded and verified


## 2.Load Pretrained Model and Test

In [15]:
# 模型采用resnet20
model = resnet20(num_classes=10)

PATH = "./pretrain_model.pth"
model.load_state_dict(torch.load(PATH))
if torch.cuda.is_available():
    model.cuda()
# 在float模型上进行精度的测试
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        image,labels = data
        if torch.cuda.is_available():
            image,labels = image.cuda(),labels.cuda()
        outputs = model(image)
        _,predicted = torch.max(outputs.data,1)
        total+=labels.size(0)
        correct += (predicted == labels).sum().item()
pretrain_acc1 = 100 * correct / total 
print(f'Accuracy of the Float Model on the 10000 test images: {pretrain_acc1} %')

Accuracy of the Float Model on the 10000 test images: 91.42 %


## 3. Define QuantModel
- use API from sparsebit

In [16]:
# 采用qconfig_file中定义的量化参数
qconfig_file = "qconfig.yaml"
qconfig = parse_qconfig(qconfig_file)
# 定义量化模型
qmodel = QuantModel(model, config=qconfig)

opcode         name                   target                   args                                     kwargs
-------------  ---------------------  -----------------------  ---------------------------------------  --------
placeholder    x                      x                        ()                                       {}
call_module    conv1                  conv1                    (x,)                                     {}
call_module    relu                   relu                     (conv1,)                                 {}
call_module    layer1_0_conv1         layer1.0.conv1           (relu,)                                  {}
call_module    layer1_0_relu          layer1.0.relu            (layer1_0_conv1,)                        {}
call_module    layer1_0_conv2         layer1.0.conv2           (layer1_0_relu,)                         {}
call_function  add                    <built-in function add>  (layer1_0_conv2, relu)                   {}
call_module    layer1_0_rel

## 4.Calibration
- Prepare Dataset for calibration,here we adopt 256 images
- Calibrate Model using calibration dataset

In [17]:
# Set calibration
qmodel.prepare_calibration()
# Forward Calibrate
calibration_size = 256
cur_size = 0
if torch.cuda.is_available():
    qmodel.cuda()
for data,target in trainloader:
    if torch.cuda.is_available():
        data,target = data.cuda(),target.cuda()
    res = qmodel(data)
    cur_size += data.shape[0]
    if cur_size >= calibration_size:
        break
qmodel.calc_qparams()

## 5.Test Quant Model
- Test PTQ Model in Testset, compare with float model
- export onnx

In [18]:
# Set Quantization
qmodel.set_quant(w_quant=True,a_quant=True)
correct = 0
total = 0
qmodel.eval()
with torch.no_grad():
    for data in testloader:
        image,labels = data
        if torch.cuda.is_available():
            image,labels= image.cuda(),labels.cuda()
        outputs = qmodel(image)
        _,predicted = torch.max(outputs.data,1)
        total+=labels.size(0)
        correct += (predicted == labels).sum().item()
acc1 = 100 * correct / total
print(f'Accuracy of the Quant Model on the 10000 test images: {acc1} %')
print(f'Accuracy of the Float   Model on the 10000 test images: {pretrain_acc1} %')

# 导出onnx
qmodel.export_onnx(torch.randn(1, 3, 224, 224), name="qresnet20.onnx")

Accuracy of the Quant Model on the 10000 test images: 91.36 %
Accuracy of the Float   Model on the 10000 test images: 91.42 %
