# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms


In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

## Import Created Modules

In [4]:
from quantization_functions import quant_aware_layers, quant_aware_resnet_model
from quantization_functions import post_training_quant_layers, post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [5]:
BATCH_SIZE = 64
TEST_BATCH_SIZE = 64
N_CLASS = 10

In [6]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
    
trainset = datasets.CIFAR10(root='./dataCifar', train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./dataCifar', train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./dataCifar/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./dataCifar/cifar-10-python.tar.gz to ./dataCifar
Files already downloaded and verified


# Models

In [7]:
N_EPOCH = 3

## Base model

In [8]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/ecbm4040/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [9]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    epochs=N_EPOCH, save='checkpoint/base_model'
)

train - epoch:  0: : 782it [00:50, 15.59it/s, loss=0.959]                       
val - epoch:  0: : 157it [00:04, 34.02it/s, val_loss=0.61, train_loss=0.959, acc=0.791]                        
train - epoch:  1: : 782it [00:48, 16.08it/s, loss=0.495]                       
val - epoch:  1: : 157it [00:04, 37.57it/s, val_loss=0.533, train_loss=0.495, acc=0.822]                       
train - epoch:  2: : 782it [00:48, 16.00it/s, loss=0.326]                       
val - epoch:  2: : 157it [00:04, 37.16it/s, val_loss=0.533, train_loss=0.326, acc=0.826]                       


In [10]:
base_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = base_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 157/157 [00:04<00:00, 37.57it/s, acc=0.826]

acc: 0.8258





## Post Training Quantization

In [11]:
base_model = torchvision.models.resnet50(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [12]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    epochs=N_EPOCH-1, save='checkpoint/ptq'
)

train - epoch:  0: : 782it [00:48, 16.25it/s, loss=0.955]                       
val - epoch:  0: : 157it [00:04, 35.75it/s, val_loss=0.644, train_loss=0.955, acc=0.777]                       
train - epoch:  1: : 782it [00:49, 15.67it/s, loss=0.491]                       
val - epoch:  1: : 157it [00:04, 37.29it/s, val_loss=0.515, train_loss=0.491, acc=0.826]                       


#### 8 bit quantization

In [13]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, pretrained='checkpoint/ptq/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [14]:
# Forward pass to have quantized weights
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-4, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    epochs=1, save=False
)

train - epoch:  0:   0%|          | 0/781 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (2) must match the size of tensor b (4) at non-singleton dimension 3

In [None]:
q_model = post_training_quant_model.QResnet50(num_class=10)
# Convert to quantized model
q_model.convert_from(c_base_model)

In [None]:
q_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 157/157 [00:06<00:00, 22.80it/s, acc=0.794]

acc: 0.7939





#### 4 bit quantization

## Quant Aware Model

#### 8 bit quantization

In [None]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet50(num_class=10, q_num_bit=8, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [None]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    epochs=N_EPOCH, save='checkpoint/qat'
)

train - epoch:  0: : 782it [01:38,  7.94it/s, loss=0.925]                       
val - epoch:  0: : 157it [00:11, 13.74it/s, val_loss=0.768, train_loss=0.925, acc=0.73]
train - epoch:  1: : 782it [01:38,  7.95it/s, loss=0.633]                       
val - epoch:  1: : 157it [00:11, 13.99it/s, val_loss=0.661, train_loss=0.633, acc=0.768]
train - epoch:  2: : 782it [01:40,  7.82it/s, loss=0.525]                       
val - epoch:  2: : 157it [00:10, 14.30it/s, val_loss=0.59, train_loss=0.525, acc=0.797]


In [None]:
q_model = post_training_quant_model.QResnet50(num_class=10)
# Convert to quantized model
q_model.convert_from(c_base_model)

In [None]:
q_model.eval()

acc = []
bar = tqdm.tqdm(test_loader)
for x, label in bar:
    x, label = x.cuda(), label.cuda()
    y = q_model(x)
    acc.extend((y.argmax(dim=1) == label).tolist())
    bar.set_postfix({'acc':sum(acc) / len(acc)})
print('acc:', sum(acc) / len(acc))

100%|██████████| 157/157 [00:06<00:00, 22.96it/s, acc=0.797]

acc: 0.7971



