#Intro

This notebook is based on 'ImageNet training in PyTorch' [example](https://github.com/pytorch/examples/blob/master/imagenet/main.py).

The goal of this notebook is not to reach the best possible baselines for the quantization, but to demonstrate simple use cases of [NNCF](https://github.com/openvinotoolkit/nncf) with Pytorch. For more advanced usage refer to these [examples](https://github.com/openvinotoolkit/nncf/tree/develop/examples)

To make downloading and training fast, we suggest to use resnet-18 model with tiny-imagenet dataset. But it is possible to change it.

#Install pre-requisites

Create a separate Python* virtual environment and install the following prerequisites into it:

In [36]:
!pip install nncf[torch]
!pip install openvino openvino-dev



Import NNCF from your Python* code

In [37]:
import os
import time
from urllib.request import urlretrieve

import torch
import nncf  # Important - should be imported directly after torch
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args

import torch.nn as nn
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

Download Tiny ImageNet dataset
* 100k images of shape 3x64x64
* 200 different classes: snakes, spaiders, cats, trucks, grasshopper, gull, etc.

In [38]:
def download_tiny_imagenet_200(path,
                        url='http://cs231n.stanford.edu/tiny-imagenet-200.zip',
                        tarname='tiny-imagenet-200.zip'):
    if not os.path.exists(path):
        os.mkdir(path)
    archive_path = os.path.join(path, tarname)
    urlretrieve(url, archive_path)
    print(archive_path)
    import zipfile
    zip_ref = zipfile.ZipFile(archive_path, 'r')
    zip_ref.extractall()
    zip_ref.close()
  
DATASET_DIR = 'tiny-imagenet-200'
if not os.path.exists(DATASET_DIR):
    download_tiny_imagenet_200('.')

Connect to google drive to save and get access to the pretrained model on tiny-imagenet dataset

In [39]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [40]:
# path to the saved model checkpoint
# can be any
PATH = '/content/drive/MyDrive/Colab Notebooks/nncf/'

##Train function

In [41]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':6.3f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if torch.cuda.is_available():
            images = images.cuda()
            target = target.cuda()

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do opt step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        print_frequency = 50
        if i % print_frequency == 0:
            progress.display(i)


##Validate function

In [42]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':6.3f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if torch.cuda.is_available():
                images = images.cuda()
                target = target.cuda()

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            print_frequency = 10
            if i % print_frequency == 0:
                progress.display(i)

        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))
    return top1.avg

##Helpers

In [43]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [44]:
def adjust_learning_rate(optimizer, epoch, init_lr):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = init_lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

#Pre-train floating-point model

In [45]:
num_classes = 200  # 200 is for tiny-imagenet, default is 1000 for imagenet
init_lr = 1e-4
batch_size = 256
image_size = 64
epochs = 4

# create model
model = models.resnet18(pretrained=True)
# update the last FC layer for tiny-imagenet number of classes
model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True)
model.cuda()

# Data loading code
train_dir = os.path.join(DATASET_DIR, 'train')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])

dataset = datasets.ImageFolder(
    train_dir,
    transforms.Compose([
        transforms.Resize(image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [80000, 20000])

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True,
    num_workers=4, pin_memory=True, sampler=None)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False,
    num_workers=4, pin_memory=True)

# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)

acc1 = 0
# Training loop
for epoch in range(0, epochs):
    # run a single training epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)
print(f'Accuracy of FP32 model: {acc1:.3f}')

  cpuset_checked))


Epoch: [0][  0/313]	Time  0.945 ( 0.945)	Data  0.844 ( 0.844)	Loss  5.665 ( 5.665)	Acc@1   0.00 (  0.00)	Acc@5   3.12 (  3.12)
Epoch: [0][ 50/313]	Time  0.133 ( 0.157)	Data  0.000 ( 0.048)	Loss  4.283 ( 5.002)	Acc@1  17.97 (  6.40)	Acc@5  36.33 ( 15.91)
Epoch: [0][100/313]	Time  0.200 ( 0.153)	Data  0.140 ( 0.044)	Loss  3.594 ( 4.453)	Acc@1  21.88 ( 14.05)	Acc@5  52.34 ( 30.28)
Epoch: [0][150/313]	Time  0.137 ( 0.153)	Data  0.004 ( 0.043)	Loss  2.797 ( 4.047)	Acc@1  37.89 ( 20.00)	Acc@5  68.36 ( 39.62)
Epoch: [0][200/313]	Time  0.237 ( 0.152)	Data  0.182 ( 0.042)	Loss  2.670 ( 3.747)	Acc@1  39.06 ( 24.30)	Acc@5  69.92 ( 46.04)
Epoch: [0][250/313]	Time  0.127 ( 0.150)	Data  0.006 ( 0.040)	Loss  2.664 ( 3.526)	Acc@1  38.28 ( 27.51)	Acc@5  67.19 ( 50.38)
Epoch: [0][300/313]	Time  0.129 ( 0.148)	Data  0.010 ( 0.039)	Loss  2.309 ( 3.348)	Acc@1  48.05 ( 30.19)	Acc@5  72.27 ( 53.77)
Test: [ 0/79]	Time  0.862 ( 0.862)	Loss  2.188 ( 2.188)	Acc@1  55.47 ( 55.47)	Acc@5  73.83 ( 73.83)
Test: [10/7

# Create and initialize quantization of the model

In [46]:
nncf_config_dict = {
    "input_info": {
        "sample_size": [1, 3, image_size, image_size]
    },
    "compression": {
        "algorithm": "quantization",  # specify the algorithm here
    }
}
# Load a configuration file to specify compression
nncf_config = NNCFConfig.from_dict(nncf_config_dict)
# Provide data loaders for compression algorithm initialization, if necessary
nncf_config = register_default_init_args(nncf_config, train_loader)

compression_ctrl, model = create_compressed_model(model, nncf_config)

# evaluate on validation set after initialization of quantization
acc1 = validate(val_loader, model, criterion)
print(f'Accuracy of initialized INT8 model: {acc1:.3f}')


  cpuset_checked))


INFO:nncf:Please, provide execution parameters for optimal model initialization
INFO:nncf:Wrapping module ResNet/Conv2d[conv1] by ResNet/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/BasicBlock[0]/Conv2d[conv1] by ResNet/Sequential[layer1]/BasicBlock[0]/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/BasicBlock[0]/Conv2d[conv2] by ResNet/Sequential[layer1]/BasicBlock[0]/NNCFConv2d[conv2]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/BasicBlock[1]/Conv2d[conv1] by ResNet/Sequential[layer1]/BasicBlock[1]/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer1]/BasicBlock[1]/Conv2d[conv2] by ResNet/Sequential[layer1]/BasicBlock[1]/NNCFConv2d[conv2]
INFO:nncf:Wrapping module ResNet/Sequential[layer2]/BasicBlock[0]/Conv2d[conv1] by ResNet/Sequential[layer2]/BasicBlock[0]/NNCFConv2d[conv1]
INFO:nncf:Wrapping module ResNet/Sequential[layer2]/BasicBlock[0]/Conv2d[conv2] by ResNet/Sequential[layer2]/BasicBlock[0]/NNCFConv2d[conv2]

# Fine-tune the model - Quantization-Aware Training

In [47]:
# train for one epoch with NNCF
train(train_loader, model, criterion, optimizer, epoch=epochs)

# evaluate on validation set after Quantization-Aware Training (QAT case)
acc1 = validate(val_loader, model, criterion)

print(f'Accuracy of tuned INT8 model: {acc1:.3f}')

  cpuset_checked))


Epoch: [4][  0/313]	Time  1.240 ( 1.240)	Data  0.901 ( 0.901)	Loss  0.849 ( 0.849)	Acc@1  80.08 ( 80.08)	Acc@5  94.14 ( 94.14)
Epoch: [4][ 50/313]	Time  0.234 ( 0.246)	Data  0.001 ( 0.023)	Loss  0.879 ( 0.905)	Acc@1  78.12 ( 78.23)	Acc@5  92.97 ( 93.60)
Epoch: [4][100/313]	Time  0.202 ( 0.230)	Data  0.008 ( 0.015)	Loss  0.922 ( 0.914)	Acc@1  77.34 ( 78.01)	Acc@5  93.75 ( 93.24)
Epoch: [4][150/313]	Time  0.208 ( 0.224)	Data  0.000 ( 0.012)	Loss  1.070 ( 0.926)	Acc@1  71.48 ( 77.72)	Acc@5  90.62 ( 93.00)
Epoch: [4][200/313]	Time  0.207 ( 0.220)	Data  0.000 ( 0.010)	Loss  0.870 ( 0.926)	Acc@1  78.52 ( 77.56)	Acc@5  93.75 ( 92.96)
Epoch: [4][250/313]	Time  0.205 ( 0.218)	Data  0.007 ( 0.009)	Loss  0.990 ( 0.930)	Acc@1  76.95 ( 77.24)	Acc@5  92.19 ( 92.89)
Epoch: [4][300/313]	Time  0.226 ( 0.218)	Data  0.001 ( 0.009)	Loss  1.003 ( 0.935)	Acc@1  76.17 ( 76.99)	Acc@5  92.19 ( 92.84)
Test: [ 0/79]	Time  1.148 ( 1.148)	Loss  1.815 ( 1.815)	Acc@1  58.98 ( 58.98)	Acc@5  80.47 ( 80.47)
Test: [10/7

# Export model to ONNX

In [48]:
# Export to ONNX that is supported by the OpenVINO™ toolkit
compression_ctrl.export_model("model_compressed.onnx")

  self.shape = tuple(int(dim) for dim in shape)  # Handle cases when shape is a tuple of Tensors
  if not self.is_enabled_quantization():
  return self._num_bits.item()
  return self.signed_tensor.item() == 1
  self.get_padding_value_ref().data.fill_(padding_value.item())
  if not self.get_padding_value_ref():


#Export to OpenVINO™ Intermediate Representation (IR)
To export a model to the OpenVINO IR and run it using the Intel® Deep Learning Deployment Toolkit, refer to this [tutorial](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html).