# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms


In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

In [4]:
# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

## Import Created Modules

In [5]:
from quantization_functions import quant_aware_resnet_model
from quantization_functions import post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [6]:
BATCH_SIZE = 256
TEST_BATCH_SIZE = 256
N_CLASS = 10

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]],
    )
])
    
trainset = datasets.CIFAR10(root='./dataCifar', train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./dataCifar', train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


# Models

In [8]:
N_EPOCH = 10

## Base model

In [9]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [10]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint/base_model2'
)

train - epoch:  0: : 196it [00:29,  6.73it/s, loss=0.857]                       
val - epoch:  0: : 40it [00:03, 12.85it/s, val_loss=0.6, train_loss=0.857, acc=0.799]                        
train - epoch:  1: : 196it [00:29,  6.60it/s, loss=0.38]                        
val - epoch:  1: : 40it [00:03, 13.20it/s, val_loss=0.519, train_loss=0.38, acc=0.825]                      
train - epoch:  2: : 196it [00:29,  6.71it/s, loss=0.218]                       
val - epoch:  2: : 40it [00:03, 12.13it/s, val_loss=0.572, train_loss=0.218, acc=0.828]                      
train - epoch:  3: : 196it [00:28,  6.76it/s, loss=0.146]                       
val - epoch:  3: : 40it [00:02, 13.53it/s, val_loss=0.607, train_loss=0.146, acc=0.837]                      
train - epoch:  4: : 196it [00:29,  6.61it/s, loss=0.103]                        
val - epoch:  4: : 40it [00:02, 13.65it/s, val_loss=0.666, train_loss=0.103, acc=0.834]                      
train - epoch:  5: : 196it [00:29,  6.70it/s,

In [11]:
base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 40/40 [00:03<00:00, 12.85it/s, acc=0.842]

acc: 0.842





## Post Training Quantization

### 8 bit quantization

In [9]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, 
                                                  pretrained='checkpoint/base_model2/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [10]:
# Forward pass to have quantized weights
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=1, save='checkpoint/ptq8bit'
)

train - epoch:  0: : 196it [00:41,  4.76it/s, loss=0.818]                       
val - epoch:  0: : 40it [00:04,  8.24it/s, val_loss=0.712, train_loss=0.818, acc=0.752]                      


In [11]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [12]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 40/40 [00:04<00:00,  8.26it/s, acc=0.754]

acc: 0.7538





In [13]:
with open('checkpoint/ptq8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [14]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, 
                                                  pretrained='checkpoint/base_model2/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [15]:
# Forward pass to have quantized weights
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-4, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=1, save='checkpoint/ptq4bit'
)

train - epoch:  0: : 196it [00:41,  4.76it/s, loss=2.89]                       
val - epoch:  0: : 40it [00:05,  7.71it/s, val_loss=2.99, train_loss=2.89, acc=0.114]                      


In [16]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [17]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 40/40 [00:05<00:00,  7.99it/s, acc=0.127]

acc: 0.1266





In [18]:
with open('checkpoint/ptq4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

## Quantization Aware Training

### 8-bit quantization

In [22]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [23]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint/qat8bit'
)

train - epoch:  0: : 196it [00:41,  4.74it/s, loss=1.22]                       
val - epoch:  0: : 40it [00:04,  8.23it/s, val_loss=0.864, train_loss=1.22, acc=0.707]                      
train - epoch:  1: : 196it [00:41,  4.77it/s, loss=0.767]                       
val - epoch:  1: : 40it [00:04,  8.82it/s, val_loss=0.761, train_loss=0.767, acc=0.734]                      
train - epoch:  2: : 196it [00:41,  4.73it/s, loss=0.645]                       
val - epoch:  2: : 40it [00:04,  8.29it/s, val_loss=0.824, train_loss=0.645, acc=0.733]                      
train - epoch:  3: : 196it [00:40,  4.86it/s, loss=0.547]                       
val - epoch:  3: : 40it [00:04,  8.02it/s, val_loss=0.675, train_loss=0.547, acc=0.778]                      
train - epoch:  4: : 196it [00:39,  4.97it/s, loss=0.465]                       
val - epoch:  4: : 40it [00:04,  8.90it/s, val_loss=0.696, train_loss=0.465, acc=0.779]                      
train - epoch:  5: : 196it [00:41,  4.78it/s, l

In [24]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [25]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 40/40 [00:04<00:00,  8.23it/s, acc=0.79] 

acc: 0.7905





In [26]:
with open('checkpoint/qat8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [27]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=4, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [28]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-4, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save='checkpoint/qat4bit'
)

train - epoch:  0: : 196it [00:40,  4.80it/s, loss=6.51]                       
val - epoch:  0: : 40it [00:04,  8.26it/s, val_loss=5.39, train_loss=6.51, acc=0.0989]                      
train - epoch:  1: : 196it [00:41,  4.76it/s, loss=4.87]                       
val - epoch:  1: : 40it [00:04,  8.47it/s, val_loss=4.46, train_loss=4.87, acc=0.102]                       
train - epoch:  2: : 196it [00:40,  4.82it/s, loss=4.23]                       
val - epoch:  2: : 40it [00:04,  8.42it/s, val_loss=4.07, train_loss=4.23, acc=0.1]                         
train - epoch:  3: : 196it [00:40,  4.86it/s, loss=3.94]                       
val - epoch:  3: : 40it [00:04,  8.68it/s, val_loss=4.12, train_loss=3.94, acc=0.0991]                      
train - epoch:  4: : 196it [00:39,  4.91it/s, loss=3.21]                       
val - epoch:  4: : 40it [00:04,  8.86it/s, val_loss=2.98, train_loss=3.21, acc=0.102]                       
train - epoch:  5: : 196it [00:39,  4.93it/s, loss=2.94

In [29]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet50(num_class=10)
q_base_model.convert_from(c_base_model)

In [30]:
# Validation accuracy
q_base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 40/40 [00:04<00:00,  8.44it/s, acc=0.101] 

acc: 0.1013





In [31]:
with open('checkpoint/qat4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)