In [26]:
# Imports 
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
import os
import time
import sys
# for importing quantization specific libs
import torch.quantization
import torchvision.models.quantization as qmodels

In [27]:
# in this tutorial, we will use resnet18
# will train for cifar10 data
# test with fp32 weights
# then compress
# and test with int8 weights
model = qmodels.resnet18(pretrained=True)
feats = model.fc.in_features
model.fc = nn.Linear(feats, 10)
model = model.cuda()

In [28]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)

Size (MB): 44.808761


In [29]:
# define datasets: cifar10
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [30]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [31]:
model.train()
# training for only five epoch for faster demo..
# train for longer epochs to get better acc.
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.cuda(), labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

            torch.save(model.state_dict(), './model.pth')

print('Finished Training')

[1,  2000] loss: 2.485
[1,  4000] loss: 2.407
[1,  6000] loss: 2.368
[1,  8000] loss: 2.308
[1, 10000] loss: 2.274
[1, 12000] loss: 2.173
[2,  2000] loss: 2.086
[2,  4000] loss: 2.062
[2,  6000] loss: 2.007
[2,  8000] loss: 2.006
[2, 10000] loss: 1.924
[2, 12000] loss: 1.940
[3,  2000] loss: 1.864
[3,  4000] loss: 1.828
[3,  6000] loss: 1.770
[3,  8000] loss: 1.739
[3, 10000] loss: 1.733
[3, 12000] loss: 1.682
[4,  2000] loss: 1.637
[4,  4000] loss: 1.596
[4,  6000] loss: 1.565
[4,  8000] loss: 1.598
[4, 10000] loss: 1.572
[4, 12000] loss: 1.584
[5,  2000] loss: 1.534
[5,  4000] loss: 1.515
[5,  6000] loss: 1.507
[5,  8000] loss: 1.500
[5, 10000] loss: 1.506
[5, 12000] loss: 1.472
Finished Training


In [32]:
model.load_state_dict(torch.load('./model.pth'))
model.eval()

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 50 %


In [33]:
# quantization part
model.load_state_dict(torch.load('./model.pth'))
model.eval()
model.fuse_model()
model.cpu()

# Specify quantization configuration
# Start with simple min/max range estimation and per-tensor quantization of weights
model.qconfig = torch.quantization.default_qconfig
print(model.qconfig)
torch.quantization.prepare(model, inplace=True)

QConfig(activation=functools.partial(<class 'torch.quantization.observer.MinMaxObserver'>, reduce_range=True), weight=functools.partial(<class 'torch.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric))


  reduce_range will be deprecated in a future release of PyTorch."


QuantizableResNet(
  (conv1): ConvReLU2d(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU()
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (bn1): Identity()
  (relu): Identity()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): QuantizableBasicBlock(
      (conv1): ConvReLU2d(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
      )
      (bn1): Identity()
      (relu): Identity()
      (conv2): Conv2d(
        64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
      )
      (bn2): Identity()
      (add_relu): FloatFunctional(
        (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
      )
    )
    (1): QuantizableBa

In [34]:
def calibrate(model, data_loader, neval_batches):
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            output = model(image)
            cnt += 1 
            if cnt >= neval_batches:
                 return None
    return None

In [35]:
# Calibrate with the training set using only 32 images
calibrate(model, trainloader, neval_batches=32)
print('Post Training Quantization: Calibration done')

Post Training Quantization: Calibration done


In [36]:
# Convert to quantized model
torch.quantization.convert(model, inplace=True)
print('Post Training Quantization: Convert done')

Post Training Quantization: Convert done


In [37]:
print("Size of model after quantization")
print_size_of_model(model)

Size of model after quantization
Size (MB): 11.226885


In [38]:
# testing the int8 model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the quantized network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the quantized network on the 10000 test images: 45 %
