# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms

# Fast AI (PyTorch wrapper)
from fastai import *
from fastai.vision.all import *
import fastai
fastai.__version__

'2.5.3'

In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

In [4]:
# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

## Import Created Modules

In [5]:
from quantization_functions import quant_aware_resnet_model
from quantization_functions import post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [6]:
BATCH_SIZE = 128
TEST_BATCH_SIZE = 32
N_CLASS = 10

In [7]:
# Download Imagenette 320 pixel

path = untar_data(URLs.IMAGENETTE_320)

In [8]:
imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(112),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize(*imagenet_stats,inplace=True)
])

test_tfms = transforms.Compose([
    transforms.Resize(128),
    transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(*imagenet_stats)
])
    


In [9]:
# PyTorch datasets

trainset = datasets.ImageFolder(path/"train", train_tfms)
testset = datasets.ImageFolder(path/"val", test_tfms)

In [10]:
# PyTorch data loaders

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

# Models

In [11]:
N_EPOCH = 10

## Base model

In [12]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [13]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint_imagenette/base_model'
)

train - epoch:  0: : 37it [00:32,  1.15it/s, loss=1.77]                      
val - epoch:  0: : 62it [00:13,  4.59it/s, val_loss=0.815, train_loss=1.77, acc=0.927]                      
train - epoch:  1: : 37it [00:31,  1.18it/s, loss=0.653]                      
val - epoch:  1: : 62it [00:13,  4.56it/s, val_loss=0.305, train_loss=0.653, acc=0.952]                      
train - epoch:  2: : 37it [00:30,  1.20it/s, loss=0.356]                      
val - epoch:  2: : 62it [00:13,  4.61it/s, val_loss=0.19, train_loss=0.356, acc=0.958]                       
train - epoch:  3: : 37it [00:30,  1.20it/s, loss=0.266]                      
val - epoch:  3: : 62it [00:13,  4.52it/s, val_loss=0.145, train_loss=0.266, acc=0.965]                       
train - epoch:  4: : 37it [00:29,  1.24it/s, loss=0.225]                      
val - epoch:  4: : 62it [00:13,  4.46it/s, val_loss=0.126, train_loss=0.225, acc=0.967]                       
train - epoch:  5: : 37it [00:30,  1.22it/s, loss=0.203

In [14]:
base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 62/62 [00:13<00:00,  4.48it/s, acc=0.978]

acc: 0.9778343949044586





## Post Training Quantization

### 8 bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, 
                                                  pretrained='checkpoint_imagenette/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=1, save='checkpoint_imagenette/ptq8bit'
)

train - epoch:  0: : 74it [00:36,  2.03it/s, loss=0.337]                      
val - epoch:  0: : 123it [00:18,  6.75it/s, val_loss=0.238, train_loss=0.337, acc=0.926]                       


In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 123/123 [00:15<00:00,  7.87it/s, acc=0.925]

acc: 0.9248407643312102





In [16]:
with open('checkpoint_imagenette/ptq8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, 
                                                  pretrained='checkpoint_imagenette/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=1, save='checkpoint_imagenette/ptq4bit'
)

train - epoch:  0: : 74it [00:36,  2.03it/s, loss=2.52]                      
val - epoch:  0: : 123it [00:17,  6.93it/s, val_loss=2.31, train_loss=2.52, acc=0.154]                       


In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 123/123 [00:15<00:00,  7.96it/s, acc=0.149] 

acc: 0.14929936305732483





In [16]:
with open('checkpoint_imagenette/ptq4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

## Quantization Aware Training

### 8-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint_imagenette/qat8bit'
)

train - epoch:  0: : 74it [00:35,  2.09it/s, loss=0.686]                      
val - epoch:  0: : 123it [00:18,  6.82it/s, val_loss=0.241, train_loss=0.686, acc=0.929]                       
train - epoch:  1: : 74it [00:34,  2.15it/s, loss=0.295]                      
val - epoch:  1: : 123it [00:18,  6.75it/s, val_loss=0.21, train_loss=0.295, acc=0.941]                        
train - epoch:  2: : 74it [00:35,  2.10it/s, loss=0.275]                      
val - epoch:  2: : 123it [00:20,  6.09it/s, val_loss=0.173, train_loss=0.275, acc=0.947]                       
train - epoch:  3: : 74it [00:34,  2.12it/s, loss=0.254]                      
val - epoch:  3: : 123it [00:17,  6.84it/s, val_loss=0.196, train_loss=0.254, acc=0.939]                       
train - epoch:  4: : 74it [00:34,  2.14it/s, loss=0.232]                      
val - epoch:  4: : 123it [00:17,  6.87it/s, val_loss=0.169, train_loss=0.232, acc=0.952]                       
train - epoch:  5: : 74it [00:34,  2.18it/s, 

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 123/123 [00:15<00:00,  7.93it/s, acc=0.956]

acc: 0.9556687898089172





In [16]:
with open('checkpoint_imagenette/qat8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint_imagenette/qat4bit'
)

train - epoch:  0: : 74it [00:36,  2.03it/s, loss=4.77]                      
val - epoch:  0: : 123it [00:17,  6.85it/s, val_loss=3.44, train_loss=4.77, acc=0.122]                       
train - epoch:  1: : 74it [00:34,  2.11it/s, loss=3.33]                      
val - epoch:  1: : 123it [00:17,  7.05it/s, val_loss=3.26, train_loss=3.33, acc=0.127]                       
train - epoch:  2: : 74it [00:34,  2.15it/s, loss=3.11]                      
val - epoch:  2: : 123it [00:18,  6.79it/s, val_loss=2.86, train_loss=3.11, acc=0.122]                       
train - epoch:  3: : 74it [00:33,  2.19it/s, loss=2.72]                      
val - epoch:  3: : 123it [00:17,  6.93it/s, val_loss=2.62, train_loss=2.72, acc=0.121]                       
train - epoch:  4: : 74it [00:34,  2.14it/s, loss=2.43]                      
val - epoch:  4: : 123it [00:17,  6.97it/s, val_loss=2.41, train_loss=2.43, acc=0.13]                        
train - epoch:  5: : 74it [00:34,  2.17it/s, loss=2.48]     

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 123/123 [00:16<00:00,  7.65it/s, acc=0.0968]

acc: 0.09681528662420383





In [16]:
with open('checkpoint_imagenette/qat4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)