# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms


In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

In [4]:
# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

## Import Created Modules

In [5]:
from quantization_functions import quant_aware_resnet_model
from quantization_functions import post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [6]:
BATCH_SIZE = 256
TEST_BATCH_SIZE = 256
N_CLASS = 10

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]],
    )
])
    
trainset = datasets.CIFAR10(root='./dataCifar', train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./dataCifar', train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


# Resnet 50 Models

In [8]:
N_EPOCH = 10

In [9]:
SAVE_DIR = 'checkpoint/cifar10_resnet50'

## Base model

In [10]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [11]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/base_model"
)

train - epoch:  0: : 196it [00:30,  6.42it/s, loss=0.859]                       
val - epoch:  0: : 40it [00:03, 12.77it/s, val_loss=0.562, train_loss=0.859, acc=0.809]                      
train - epoch:  1: : 196it [00:29,  6.67it/s, loss=0.378]                       
val - epoch:  1: : 40it [00:03, 12.82it/s, val_loss=0.527, train_loss=0.378, acc=0.827]                      
train - epoch:  2: : 196it [00:29,  6.76it/s, loss=0.218]                       
val - epoch:  2: : 40it [00:03, 12.91it/s, val_loss=0.55, train_loss=0.218, acc=0.836]                       
train - epoch:  3: : 196it [00:29,  6.76it/s, loss=0.138]                       
val - epoch:  3: : 40it [00:03, 13.32it/s, val_loss=0.598, train_loss=0.138, acc=0.837]                      
train - epoch:  4: : 196it [00:30,  6.53it/s, loss=0.104]                        
val - epoch:  4: : 40it [00:02, 13.48it/s, val_loss=0.697, train_loss=0.104, acc=0.822]                      
train - epoch:  5: : 196it [00:29,  6.69it/s

In [12]:
# Validation accuracy
train_loop.test_model(test_loader, base_model)

100%|██████████| 40/40 [00:03<00:00, 13.23it/s, acc=0.846]

acc: 0.846





## Post Training Quantization

### 8 bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:21<00:00,  9.13it/s, acc=0.259]

acc: 0.25926





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:05<00:00,  7.75it/s, acc=0.242]

acc: 0.2418





In [14]:
with open(f'{SAVE_DIR}/ptq8bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=7, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:20<00:00,  9.51it/s, acc=0.259]

acc: 0.25926





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.17it/s, acc=0.238]

acc: 0.2377





In [14]:
with open(f'{SAVE_DIR}/ptq7bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=6, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:21<00:00,  9.14it/s, acc=0.259]

acc: 0.25926





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:05<00:00,  7.89it/s, acc=0.181]

acc: 0.1806





In [14]:
with open(f'{SAVE_DIR}/ptq6bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=5, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:20<00:00,  9.45it/s, acc=0.259]

acc: 0.25926





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.64it/s, acc=0.138]

acc: 0.1378





In [14]:
with open(f'{SAVE_DIR}/ptq5bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:21<00:00,  8.99it/s, acc=0.259]

acc: 0.25926





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:04<00:00,  9.72it/s, acc=0.106]

acc: 0.1061





In [14]:
with open(f'{SAVE_DIR}/ptq4bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

## Quantization Aware Training

### 8-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat8bit"
)

train - epoch:  0: : 196it [00:40,  4.82it/s, loss=1.19]                       
val - epoch:  0: : 40it [00:04,  8.72it/s, val_loss=0.916, train_loss=1.19, acc=0.675]                      
train - epoch:  1: : 196it [00:41,  4.70it/s, loss=0.761]                       
val - epoch:  1: : 40it [00:04,  8.56it/s, val_loss=0.742, train_loss=0.761, acc=0.738]                      
train - epoch:  2: : 196it [00:41,  4.78it/s, loss=0.637]                       
val - epoch:  2: : 40it [00:04,  8.79it/s, val_loss=0.707, train_loss=0.637, acc=0.749]                      
train - epoch:  3: : 196it [00:41,  4.75it/s, loss=0.556]                       
val - epoch:  3: : 40it [00:04,  9.01it/s, val_loss=0.685, train_loss=0.556, acc=0.766]                      
train - epoch:  4: : 196it [00:40,  4.85it/s, loss=0.477]                       
val - epoch:  4: : 40it [00:04,  8.13it/s, val_loss=0.743, train_loss=0.477, acc=0.754]                      
train - epoch:  5: : 196it [00:40,  4.79it/s, l

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:04<00:00,  8.08it/s, acc=0.793]

acc: 0.7934





In [14]:
with open(f'{SAVE_DIR}/qat8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=7, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat7bit"
)

train - epoch:  0: : 196it [00:41,  4.68it/s, loss=1.26]                       
val - epoch:  0: : 40it [00:04,  8.72it/s, val_loss=0.958, train_loss=1.26, acc=0.658]                      
train - epoch:  1: : 196it [00:40,  4.80it/s, loss=0.832]                       
val - epoch:  1: : 40it [00:04,  8.56it/s, val_loss=0.839, train_loss=0.832, acc=0.699]                      
train - epoch:  2: : 196it [00:40,  4.86it/s, loss=0.706]                       
val - epoch:  2: : 40it [00:04,  8.62it/s, val_loss=0.738, train_loss=0.706, acc=0.737]                      
train - epoch:  3: : 196it [00:41,  4.69it/s, loss=0.634]                       
val - epoch:  3: : 40it [00:04,  8.31it/s, val_loss=0.729, train_loss=0.634, acc=0.749]                      
train - epoch:  4: : 196it [00:40,  4.85it/s, loss=0.567]                       
val - epoch:  4: : 40it [00:04,  8.50it/s, val_loss=0.682, train_loss=0.567, acc=0.767]                      
train - epoch:  5: : 196it [00:40,  4.78it/s, l

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:05<00:00,  7.47it/s, acc=0.791]

acc: 0.7908





In [14]:
with open(f'{SAVE_DIR}/qat7bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=6, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat6bit"
)

train - epoch:  0: : 196it [00:41,  4.75it/s, loss=1.63]                       
val - epoch:  0: : 40it [00:04,  8.91it/s, val_loss=1.2, train_loss=1.63, acc=0.569]                       
train - epoch:  1: : 196it [00:41,  4.75it/s, loss=1.07]                       
val - epoch:  1: : 40it [00:04,  8.33it/s, val_loss=0.953, train_loss=1.07, acc=0.661]                      
train - epoch:  2: : 196it [00:40,  4.85it/s, loss=0.92]                        
val - epoch:  2: : 40it [00:04,  8.27it/s, val_loss=0.897, train_loss=0.92, acc=0.681]                      
train - epoch:  3: : 196it [00:41,  4.77it/s, loss=0.841]                       
val - epoch:  3: : 40it [00:04,  8.67it/s, val_loss=0.872, train_loss=0.841, acc=0.697]                      
train - epoch:  4: : 196it [00:40,  4.87it/s, loss=0.792]                       
val - epoch:  4: : 40it [00:04,  8.51it/s, val_loss=0.842, train_loss=0.792, acc=0.709]                      
train - epoch:  5: : 196it [00:41,  4.76it/s, loss=

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:04<00:00,  8.30it/s, acc=0.733]

acc: 0.7334





In [14]:
with open(f'{SAVE_DIR}/qat6bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=5, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
N_EPOCH=5

# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat5bit"
)

train - epoch:  0: : 196it [00:40,  4.84it/s, loss=2.04]                       
val - epoch:  0: : 40it [00:04,  8.15it/s, val_loss=1.8, train_loss=2.04, acc=0.319]                       
train - epoch:  1: : 196it [00:40,  4.78it/s, loss=1.71]                       
val - epoch:  1: : 40it [00:04,  9.05it/s, val_loss=1.68, train_loss=1.71, acc=0.359]                      
train - epoch:  2: : 196it [00:40,  4.83it/s, loss=1.61]                       
val - epoch:  2: : 40it [00:04,  8.18it/s, val_loss=1.57, train_loss=1.61, acc=0.411]                      
train - epoch:  3: : 196it [00:40,  4.88it/s, loss=1.58]                       
val - epoch:  3: : 40it [00:04,  8.48it/s, val_loss=1.59, train_loss=1.58, acc=0.388]                      
train - epoch:  4: : 196it [00:40,  4.85it/s, loss=1.55]                       
val - epoch:  4: : 40it [00:04,  8.42it/s, val_loss=1.52, train_loss=1.55, acc=0.426]                      


In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:05<00:00,  7.68it/s, acc=0.409]

acc: 0.4095





In [14]:
with open(f'{SAVE_DIR}/qat5bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
N_EPOCH=5

# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat4bit"
)

train - epoch:  0: : 196it [00:40,  4.84it/s, loss=3.15]                       
val - epoch:  0: : 40it [00:04,  8.40it/s, val_loss=2.61, train_loss=3.15, acc=0.105]                      
train - epoch:  1: : 196it [00:40,  4.85it/s, loss=2.66]                       
val - epoch:  1: : 40it [00:04,  8.56it/s, val_loss=2.58, train_loss=2.66, acc=0.11]                       
train - epoch:  2: : 196it [00:40,  4.87it/s, loss=2.69]                       
val - epoch:  2: : 40it [00:04,  8.19it/s, val_loss=2.48, train_loss=2.69, acc=0.104]                      
train - epoch:  3: : 196it [00:40,  4.90it/s, loss=2.82]                       
val - epoch:  3: : 40it [00:04,  8.45it/s, val_loss=2.73, train_loss=2.82, acc=0.109]                      
train - epoch:  4: : 196it [00:40,  4.88it/s, loss=2.6]                        
val - epoch:  4: : 40it [00:05,  7.95it/s, val_loss=2.39, train_loss=2.6, acc=0.0992]                      


In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:05<00:00,  7.83it/s, acc=0.099] 

acc: 0.099





In [14]:
with open(f'{SAVE_DIR}/qat4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)