# Initialization

## Import Libraries

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms


In [2]:
from collections import namedtuple
import tqdm

In [3]:
# make sure GPU is being used 
torch.cuda.current_device() 
torch.cuda.device(0)
torch.cuda.get_device_name(0)

'Tesla V100-SXM2-16GB'

In [4]:
# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

## Import Created Modules

In [5]:
from quantization_functions import quant_aware_resnet_model
from quantization_functions import post_training_quant_model
from quantization_functions import train_loop

## Load Dataset

In [6]:
BATCH_SIZE = 256
TEST_BATCH_SIZE = 256
N_CLASS = 10

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]],
    )
])
    
trainset = datasets.CIFAR10(root='./dataCifar', train=True, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./dataCifar', train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(testset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


# Resnet 18 Models

In [8]:
N_EPOCH = 10

In [9]:
SAVE_DIR = 'checkpoint/cifar10_resnet18'

## Base model

In [10]:
base_model = torchvision.models.resnet18(pretrained=True)
base_model.fc = nn.Linear(base_model.fc.in_features, N_CLASS) # Change top layer

In [11]:
### Train Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/base_model"
)

train - epoch:  0: : 196it [00:20,  9.60it/s, loss=0.93]                        
val - epoch:  0: : 40it [00:02, 17.43it/s, val_loss=0.672, train_loss=0.93, acc=0.775]                      
train - epoch:  1: : 196it [00:20,  9.46it/s, loss=0.49]                        
val - epoch:  1: : 40it [00:02, 17.40it/s, val_loss=0.607, train_loss=0.49, acc=0.801]                      
train - epoch:  2: : 196it [00:20,  9.80it/s, loss=0.336]                       
val - epoch:  2: : 40it [00:02, 18.33it/s, val_loss=0.618, train_loss=0.336, acc=0.798]                      
train - epoch:  3: : 196it [00:20,  9.73it/s, loss=0.225]                       
val - epoch:  3: : 40it [00:02, 19.54it/s, val_loss=0.733, train_loss=0.225, acc=0.797]                      
train - epoch:  4: : 196it [00:20,  9.73it/s, loss=0.172]                       
val - epoch:  4: : 40it [00:02, 18.16it/s, val_loss=0.78, train_loss=0.172, acc=0.803]                       
train - epoch:  5: : 196it [00:20,  9.47it/s, l

In [12]:
# Validation accuracy
train_loop.test_model(test_loader, base_model)

100%|██████████| 40/40 [00:02<00:00, 16.83it/s, acc=0.804]

acc: 0.8039





## Post Training Quantization

### 8 bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=8, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:14<00:00, 13.92it/s, acc=0.895]

acc: 0.8953





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.81it/s, acc=0.797]

acc: 0.7968





In [15]:
with open(f'{SAVE_DIR}/ptq8bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=7, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:13<00:00, 14.65it/s, acc=0.895]

acc: 0.8953





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.56it/s, acc=0.793]

acc: 0.7926





In [14]:
with open(f'{SAVE_DIR}/ptq7bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=6, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:14<00:00, 13.95it/s, acc=0.895]

acc: 0.8953





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.13it/s, acc=0.754]

acc: 0.7539





In [14]:
with open(f'{SAVE_DIR}/ptq6bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=5, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:14<00:00, 13.62it/s, acc=0.895]

acc: 0.8953





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.94it/s, acc=0.515]

acc: 0.5153





In [14]:
with open(f'{SAVE_DIR}/ptq5bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [10]:
# Convert base model to a custom quantization layer with the trained weights
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=4, qat=False,
                                                  pretrained=f'{SAVE_DIR}/base_model/model_weights.pt')
c_base_model.quantize(True)

remained state dict odict_keys([])


In [11]:
# Forward pass to have quantized weights
train_loop.test_model(train_loader, c_base_model)

100%|██████████| 196/196 [00:14<00:00, 13.64it/s, acc=0.895]

acc: 0.8953





In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.76it/s, acc=0.134]

acc: 0.1336





In [14]:
with open(f'{SAVE_DIR}/ptq4bit_model_weights.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

## Quantization Aware Training

### 8-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=8, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat8bit"
)

train - epoch:  0: : 196it [00:27,  7.17it/s, loss=0.992]                       
val - epoch:  0: : 40it [00:03, 11.33it/s, val_loss=0.766, train_loss=0.992, acc=0.739]                      
train - epoch:  1: : 196it [00:26,  7.37it/s, loss=0.681]                       
val - epoch:  1: : 40it [00:03, 13.13it/s, val_loss=0.669, train_loss=0.681, acc=0.773]                      
train - epoch:  2: : 196it [00:26,  7.52it/s, loss=0.551]                       
val - epoch:  2: : 40it [00:02, 13.34it/s, val_loss=0.63, train_loss=0.551, acc=0.783]                       
train - epoch:  3: : 196it [00:26,  7.52it/s, loss=0.462]                       
val - epoch:  3: : 40it [00:02, 13.56it/s, val_loss=0.644, train_loss=0.462, acc=0.794]                      
train - epoch:  4: : 196it [00:25,  7.59it/s, loss=0.398]                       
val - epoch:  4: : 40it [00:03, 13.23it/s, val_loss=0.62, train_loss=0.398, acc=0.806]                       
train - epoch:  5: : 196it [00:25,  7.67it/s,

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.70it/s, acc=0.806]

acc: 0.8057





In [14]:
with open(f'{SAVE_DIR}/qat8bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 7-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=7, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat7bit"
)

train - epoch:  0: : 196it [00:25,  7.73it/s, loss=0.966]                       
val - epoch:  0: : 40it [00:02, 13.44it/s, val_loss=0.806, train_loss=0.966, acc=0.719]                      
train - epoch:  1: : 196it [00:25,  7.59it/s, loss=0.682]                       
val - epoch:  1: : 40it [00:02, 13.80it/s, val_loss=0.698, train_loss=0.682, acc=0.767]                      
train - epoch:  2: : 196it [00:25,  7.63it/s, loss=0.56]                        
val - epoch:  2: : 40it [00:03, 12.74it/s, val_loss=0.701, train_loss=0.56, acc=0.765]                      
train - epoch:  3: : 196it [00:26,  7.49it/s, loss=0.489]                       
val - epoch:  3: : 40it [00:03, 13.14it/s, val_loss=0.607, train_loss=0.489, acc=0.805]                      
train - epoch:  4: : 196it [00:26,  7.47it/s, loss=0.403]                       
val - epoch:  4: : 40it [00:02, 13.97it/s, val_loss=0.668, train_loss=0.403, acc=0.796]                      
train - epoch:  5: : 196it [00:25,  7.62it/s, 

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:04<00:00,  9.99it/s, acc=0.812]

acc: 0.8118





In [14]:
with open(f'{SAVE_DIR}/qat7bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 6-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=6, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat6bit"
)

train - epoch:  0: : 196it [00:25,  7.60it/s, loss=1.06]                       
val - epoch:  0: : 40it [00:03, 12.22it/s, val_loss=0.847, train_loss=1.06, acc=0.701]                      
train - epoch:  1: : 196it [00:26,  7.48it/s, loss=0.728]                       
val - epoch:  1: : 40it [00:03, 13.22it/s, val_loss=0.805, train_loss=0.728, acc=0.733]                      
train - epoch:  2: : 196it [00:25,  7.59it/s, loss=0.626]                       
val - epoch:  2: : 40it [00:02, 13.47it/s, val_loss=0.698, train_loss=0.626, acc=0.77]                       
train - epoch:  3: : 196it [00:25,  7.63it/s, loss=0.563]                       
val - epoch:  3: : 40it [00:02, 13.60it/s, val_loss=0.707, train_loss=0.563, acc=0.763]                      
train - epoch:  4: : 196it [00:25,  7.67it/s, loss=0.515]                       
val - epoch:  4: : 40it [00:02, 13.64it/s, val_loss=0.677, train_loss=0.515, acc=0.781]                      
train - epoch:  5: : 196it [00:25,  7.70it/s, l

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.79it/s, acc=0.769]

acc: 0.7689





In [14]:
with open(f'{SAVE_DIR}/qat6bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 5-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=5, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 1e-3, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat5bit"
)

train - epoch:  0: : 196it [00:26,  7.29it/s, loss=1.53]                       
val - epoch:  0: : 40it [00:03, 12.85it/s, val_loss=1.3, train_loss=1.53, acc=0.545]                       
train - epoch:  1: : 196it [00:26,  7.40it/s, loss=1.17]                       
val - epoch:  1: : 40it [00:03, 13.01it/s, val_loss=1.15, train_loss=1.17, acc=0.588]                      
train - epoch:  2: : 196it [00:25,  7.67it/s, loss=1.07]                       
val - epoch:  2: : 40it [00:02, 13.95it/s, val_loss=1.09, train_loss=1.07, acc=0.615]                      
train - epoch:  3: : 196it [00:26,  7.52it/s, loss=1.03]                       
val - epoch:  3: : 40it [00:02, 13.98it/s, val_loss=1.06, train_loss=1.03, acc=0.624]                      
train - epoch:  4: : 196it [00:25,  7.58it/s, loss=0.989]                       
val - epoch:  4: : 40it [00:02, 13.61it/s, val_loss=1.02, train_loss=0.989, acc=0.638]                       
train - epoch:  5: : 196it [00:25,  7.75it/s, loss=0.96] 

In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 10.60it/s, acc=0.634]

acc: 0.6344





In [14]:
with open(f'{SAVE_DIR}/qat5bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)

### 4-bit quantization

In [10]:
# Create model with custom quantization layer from the start
c_base_model = quant_aware_resnet_model.CResnet18(num_class=10, q_num_bit=4, qat=True, pretrained=True)
c_base_model.quantize(True)

remained state dict odict_keys(['fc.weight', 'fc.bias'])


In [11]:
N_EPOCH=5

# Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(c_base_model.parameters(), 5e-4, momentum=0.9, weight_decay=1e-5)

train_loop.train_model(
    train_dl=train_loader, 
    val_dl=test_loader, 
    model=c_base_model, 
    optimizer=optimizer, 
    criterion=criterion,
    clip_value=1e-2,
    epochs=N_EPOCH, save=f"{SAVE_DIR}/qat4bit"
)

train - epoch:  0: : 196it [00:26,  7.45it/s, loss=2.35]                       
val - epoch:  0: : 40it [00:03, 12.12it/s, val_loss=2.21, train_loss=2.35, acc=0.175]                      
train - epoch:  1: : 196it [00:27,  7.01it/s, loss=2.13]                       
val - epoch:  1: : 40it [00:03, 12.65it/s, val_loss=2.1, train_loss=2.13, acc=0.231]                       
train - epoch:  2: : 196it [00:26,  7.49it/s, loss=2.07]                       
val - epoch:  2: : 40it [00:03, 12.87it/s, val_loss=2.03, train_loss=2.07, acc=0.226]                      
train - epoch:  3: : 196it [00:25,  7.76it/s, loss=2.12]                       
val - epoch:  3: : 40it [00:02, 13.77it/s, val_loss=2.1, train_loss=2.12, acc=0.213]                       
train - epoch:  4: : 196it [00:25,  7.70it/s, loss=2.07]                       
val - epoch:  4: : 40it [00:02, 14.06it/s, val_loss=2.03, train_loss=2.07, acc=0.228]                      


In [12]:
# Convert to quantized model
q_base_model = post_training_quant_model.QResnet18(num_class=10)
q_base_model.convert_from(c_base_model)

In [13]:
# Validation accuracy
train_loop.test_model(test_loader, q_base_model)

100%|██████████| 40/40 [00:03<00:00, 11.03it/s, acc=0.226]

acc: 0.2257





In [14]:
with open(f'{SAVE_DIR}/qat4bit/model_weights_quantized.pt', 'wb') as f:
    torch.save(q_base_model.state_dict(), f)