# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms

# Fast AI (PyTorch wrapper)
from fastai import *
from fastai.vision.all import *
import fastai
fastai.__version__

'2.5.3'

In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

In [4]:
# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

## Import Created Modules

In [5]:
from quantization_functions import quant_aware_resnet_model
from quantization_functions import post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [6]:
BATCH_SIZE = 128
TEST_BATCH_SIZE = 16
N_CLASS = 10

In [7]:
# Download Imagenette 320 pixel

path = untar_data(URLs.IMAGENETTE_320)

In [8]:
imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(112),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize(*imagenet_stats,inplace=True)
])

test_tfms = transforms.Compose([
    transforms.Resize(128),
    transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(*imagenet_stats)
])

In [9]:
# PyTorch datasets

trainset = datasets.ImageFolder(path/"train", train_tfms)
testset = datasets.ImageFolder(path/"val", test_tfms)

# PyTorch data loaders

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

# Resnet 50 Models

In [10]:
N_EPOCH = 10

In [11]:
SAVE_DIR = 'checkpoint/imagenette_resnet50'

## Base model

In [12]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [13]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/base_model"
)

train - epoch:  0: : 74it [01:08,  1.08it/s, loss=0.48]                       
val - epoch:  0: : 123it [00:16,  7.67it/s, val_loss=0.148, train_loss=0.48, acc=0.95]                        
train - epoch:  1: : 74it [00:29,  2.49it/s, loss=0.21]                       
val - epoch:  1: : 123it [00:13,  9.30it/s, val_loss=0.126, train_loss=0.21, acc=0.96]                        
train - epoch:  2: : 74it [00:29,  2.50it/s, loss=0.203]                      
val - epoch:  2: : 123it [00:13,  8.93it/s, val_loss=0.13, train_loss=0.203, acc=0.959]                        
train - epoch:  3: : 74it [00:29,  2.55it/s, loss=0.171]                      
val - epoch:  3: : 123it [00:13,  9.32it/s, val_loss=0.14, train_loss=0.171, acc=0.957]                        
train - epoch:  4: : 74it [00:29,  2.54it/s, loss=0.173]                      
val - epoch:  4: : 123it [00:13,  9.25it/s, val_loss=0.133, train_loss=0.173, acc=0.961]                       
train - epoch:  5: : 74it [00:30,  2.46it/s, lo

In [14]:
base_model = torchvision.models.resnet50(pretrained=False)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

base_model.load_state_dict(torch.load(f'{SAVE_DIR}/base_model/model_weights.pt'))

# Validation accuracy
train_loop.test_model(test_loader, base_model)

100%|██████████| 123/123 [00:13<00:00,  8.90it/s, acc=0.96] 

acc: 0.9602547770700637





## Post Training Quantization

### 8 bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
train_loop.test_model(test_loader, c_base_model) # use test_loader to avoid out of memory

100%|██████████| 246/246 [00:20<00:00, 11.73it/s, acc=0.821]

acc: 0.8214012738853503





In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 13.87it/s, acc=0.809]

acc: 0.8089171974522293





In [16]:
with open(f'{SAVE_DIR}/ptq8bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=7, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
train_loop.test_model(test_loader, c_base_model) # use test_loader to avoid out of memory

100%|██████████| 246/246 [00:20<00:00, 11.87it/s, acc=0.821]

acc: 0.8214012738853503





In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.08it/s, acc=0.791]

acc: 0.7910828025477707





In [16]:
with open(f'{SAVE_DIR}/ptq7bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=6, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
train_loop.test_model(test_loader, c_base_model) # use test_loader to avoid out of memory

100%|██████████| 246/246 [00:21<00:00, 11.67it/s, acc=0.821]

acc: 0.8214012738853503





In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.17it/s, acc=0.755]

acc: 0.7554140127388536





In [16]:
with open(f'{SAVE_DIR}/ptq6bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=5, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
train_loop.test_model(test_loader, c_base_model) # use test_loader to avoid out of memory

100%|██████████| 246/246 [00:21<00:00, 11.58it/s, acc=0.821]

acc: 0.8214012738853503





In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:16<00:00, 14.52it/s, acc=0.365]

acc: 0.3653503184713376





In [16]:
with open(f'{SAVE_DIR}/ptq5bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [12]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [13]:
# Forward pass to have quantized weights
train_loop.test_model(test_loader, c_base_model) # use test_loader to avoid out of memory

100%|██████████| 246/246 [00:20<00:00, 12.00it/s, acc=0.821]

acc: 0.8214012738853503





In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.23it/s, acc=0.0721] 

acc: 0.07210191082802547





In [16]:
with open(f'{SAVE_DIR}/ptq4bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

## Quantization Aware Training

### 8-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat8bit"
)

train - epoch:  0: : 74it [00:34,  2.15it/s, loss=0.688]                      
val - epoch:  0: : 246it [00:23, 10.45it/s, val_loss=0.275, train_loss=0.688, acc=0.916]                       
train - epoch:  1: : 74it [00:33,  2.21it/s, loss=0.304]                      
val - epoch:  1: : 246it [00:22, 11.07it/s, val_loss=0.236, train_loss=0.304, acc=0.926]                       
train - epoch:  2: : 74it [00:33,  2.24it/s, loss=0.267]                      
val - epoch:  2: : 246it [00:23, 10.57it/s, val_loss=0.19, train_loss=0.267, acc=0.942]                        
train - epoch:  3: : 74it [00:33,  2.18it/s, loss=0.252]                      
val - epoch:  3: : 246it [00:21, 11.23it/s, val_loss=0.202, train_loss=0.252, acc=0.94]                        
train - epoch:  4: : 74it [00:33,  2.20it/s, loss=0.239]                      
val - epoch:  4: : 246it [00:22, 10.87it/s, val_loss=0.193, train_loss=0.239, acc=0.943]                       
train - epoch:  5: : 74it [00:32,  2.24it/s, 

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.04it/s, acc=0.951]

acc: 0.9513375796178344





In [16]:
with open(f'{SAVE_DIR}/qat8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=7, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat7bit"
)

train - epoch:  0: : 74it [00:34,  2.17it/s, loss=0.875]                      
val - epoch:  0: : 246it [00:22, 10.72it/s, val_loss=0.326, train_loss=0.875, acc=0.897]                       
train - epoch:  1: : 74it [00:33,  2.21it/s, loss=0.364]                      
val - epoch:  1: : 246it [00:22, 11.07it/s, val_loss=0.249, train_loss=0.364, acc=0.925]                       
train - epoch:  2: : 74it [00:33,  2.19it/s, loss=0.311]                      
val - epoch:  2: : 246it [00:21, 11.29it/s, val_loss=0.248, train_loss=0.311, acc=0.92]                        
train - epoch:  3: : 74it [00:32,  2.27it/s, loss=0.289]                      
val - epoch:  3: : 246it [00:22, 11.07it/s, val_loss=0.203, train_loss=0.289, acc=0.941]                       
train - epoch:  4: : 74it [00:33,  2.23it/s, loss=0.262]                      
val - epoch:  4: : 246it [00:21, 11.50it/s, val_loss=0.193, train_loss=0.262, acc=0.94]                        
train - epoch:  5: : 74it [00:33,  2.22it/s, 

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:18<00:00, 13.57it/s, acc=0.944]

acc: 0.944203821656051





In [16]:
with open(f'{SAVE_DIR}/qat7bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=6, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat6bit"
)

train - epoch:  0: : 74it [00:33,  2.19it/s, loss=1.19]                      
val - epoch:  0: : 246it [00:22, 10.95it/s, val_loss=0.532, train_loss=1.19, acc=0.831]                       
train - epoch:  1: : 74it [00:32,  2.25it/s, loss=0.705]                      
val - epoch:  1: : 246it [00:21, 11.24it/s, val_loss=0.46, train_loss=0.705, acc=0.86]                         
train - epoch:  2: : 74it [00:33,  2.22it/s, loss=0.638]                      
val - epoch:  2: : 246it [00:22, 10.95it/s, val_loss=0.43, train_loss=0.638, acc=0.863]                        
train - epoch:  3: : 74it [00:32,  2.28it/s, loss=0.571]                      
val - epoch:  3: : 246it [00:21, 11.59it/s, val_loss=0.37, train_loss=0.571, acc=0.879]                        
train - epoch:  4: : 74it [00:32,  2.25it/s, loss=0.526]                      
val - epoch:  4: : 246it [00:22, 11.04it/s, val_loss=0.426, train_loss=0.526, acc=0.849]                       
train - epoch:  5: : 74it [00:32,  2.30it/s, lo

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.16it/s, acc=0.843]

acc: 0.8433121019108281





In [16]:
with open(f'{SAVE_DIR}/qat6bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=5, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat5bit"
)

train - epoch:  0: : 74it [00:32,  2.24it/s, loss=2.14]                      
val - epoch:  0: : 246it [00:21, 11.29it/s, val_loss=1.78, train_loss=2.14, acc=0.377]                       
train - epoch:  1: : 74it [00:32,  2.26it/s, loss=1.72]                      
val - epoch:  1: : 246it [00:21, 11.65it/s, val_loss=1.5, train_loss=1.72, acc=0.472]                        
train - epoch:  2: : 74it [00:32,  2.28it/s, loss=1.58]                      
val - epoch:  2: : 246it [00:21, 11.30it/s, val_loss=1.46, train_loss=1.58, acc=0.489]                       
train - epoch:  3: : 74it [00:32,  2.27it/s, loss=1.48]                      
val - epoch:  3: : 246it [00:21, 11.37it/s, val_loss=1.3, train_loss=1.48, acc=0.554]                        
train - epoch:  4: : 74it [00:32,  2.29it/s, loss=1.39]                      
val - epoch:  4: : 246it [00:22, 10.96it/s, val_loss=1.24, train_loss=1.39, acc=0.576]                       
train - epoch:  5: : 74it [00:32,  2.25it/s, loss=1.34]     

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:17<00:00, 14.43it/s, acc=0.444]

acc: 0.4438216560509554





In [16]:
with open(f'{SAVE_DIR}/qat5bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [12]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [13]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat4bit"
)

train - epoch:  0: : 74it [00:34,  2.16it/s, loss=4.13]                      
val - epoch:  0: : 246it [00:21, 11.32it/s, val_loss=3.16, train_loss=4.13, acc=0.124]                       
train - epoch:  1: : 74it [00:32,  2.27it/s, loss=3]                         
val - epoch:  1: : 246it [00:21, 11.38it/s, val_loss=2.85, train_loss=3, acc=0.135]                       
train - epoch:  2: : 74it [00:32,  2.30it/s, loss=2.78]                      
val - epoch:  2: : 246it [00:21, 11.38it/s, val_loss=2.73, train_loss=2.78, acc=0.134]                       
train - epoch:  3: : 74it [00:32,  2.26it/s, loss=2.69]                      
val - epoch:  3: : 246it [00:21, 11.33it/s, val_loss=2.62, train_loss=2.69, acc=0.141]                       
val - epoch:  5: : 246it [00:21, 11.38it/s, val_loss=2.31, train_loss=2.34, acc=0.0991]                       
train - epoch:  6: : 74it [00:34,  2.16it/s, loss=2.51]                      
val - epoch:  6: : 246it [00:22, 11.11it/s, val_loss=2.94, tra

In [14]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [15]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 246/246 [00:16<00:00, 14.63it/s, acc=0.113]

acc: 0.11286624203821656





In [16]:
with open(f'{SAVE_DIR}/qat4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)