# Preprocessing

In [50]:
import copy
import math
import random
from collections import OrderedDict, defaultdict

from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
from tqdm.auto import tqdm

import torch
from torch import nn
from torch.optim import *
from torch.optim.lr_scheduler import *
import torchvision.models as models
import torchvision
from torch.utils.data import DataLoader

from torchvision.datasets import *
from torchvision.transforms import *


no_cuda = False
use_gpu = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu")

In [51]:
class h_sigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(h_sigmoid, self).__init__()
        self.relu = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return self.relu(x + 3) / 6


class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.sigmoid = h_sigmoid(inplace=inplace)

    def forward(self, x):
        return x * self.sigmoid(x)

In [52]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

batch_size = 32

#Dataset
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

#Dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Create NN model

In [53]:
class ToyModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.Conv = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=5, kernel_size=1, stride=1,padding= 0, bias=False),
      h_swish(inplace=True),
      nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1,padding= 0, bias=False),
      h_swish(inplace=True)
    )  
    self.backbone = nn.Sequential(
      nn.Linear(28*28, 120, bias=False),
      nn.ReLU(),
      nn.Linear(120, 84, bias=False),
      nn.ReLU(),
      nn.Linear(84, 10, bias=False)
    )

  def forward(self, x):
    x=self.Conv(x)
    x = x.view(-1, 28 * 28) #transform 28*28 figure to 784 vector
    x = self.backbone(x)
    return x

FP32_model = ToyModel()
print(FP32_model)

ToyModel(
  (Conv): Sequential(
    (0): Conv2d(1, 5, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): h_swish(
      (sigmoid): h_sigmoid(
        (relu): ReLU6(inplace=True)
      )
    )
    (2): Conv2d(5, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (3): h_swish(
      (sigmoid): h_sigmoid(
        (relu): ReLU6(inplace=True)
      )
    )
  )
  (backbone): Sequential(
    (0): Linear(in_features=784, out_features=120, bias=False)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=False)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=False)
  )
)


In [54]:
#train model
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  #Set the model to train mode
  model.train()
  for batch, (x, y) in enumerate(dataloader):
    if use_gpu:
      x, y = x.cuda(), y.cuda()
    optimizer.zero_grad()
    #forward
    pred = model(x)

    #loss
    loss = loss_fn(pred, y)

    #backward
    loss.backward()

    #optimize
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch + 1) * len(x)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
  #set model to evaluate mode
  model.eval()
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss, correct = 0, 0
  with torch.no_grad():
    for x, y in dataloader:
      if use_gpu:
        x, y = x.cuda(), y.cuda()
      pred = model(x)
      test_loss = loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item() #calculate accuracy
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [55]:
learning_rate = 1e-3
epochs = 3
loss_fn = nn.CrossEntropyLoss() #define loss function
optimizer = torch.optim.SGD(FP32_model.parameters(), lr=learning_rate, momentum=0.9)  #define optimizer

FP32_model.to(device) #let model on GPU

ToyModel(
  (Conv): Sequential(
    (0): Conv2d(1, 5, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): h_swish(
      (sigmoid): h_sigmoid(
        (relu): ReLU6(inplace=True)
      )
    )
    (2): Conv2d(5, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (3): h_swish(
      (sigmoid): h_sigmoid(
        (relu): ReLU6(inplace=True)
      )
    )
  )
  (backbone): Sequential(
    (0): Linear(in_features=784, out_features=120, bias=False)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=False)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=False)
  )
)

In [56]:
#Training
for i in range(epochs):
  print(f"Epoch {i+1}\n-------------------------------")
  train_loop(train_loader, FP32_model, loss_fn, optimizer)
  test_loop(test_loader, FP32_model, loss_fn)

Epoch 1
-------------------------------
loss: 2.302464  [   32/60000]
loss: 2.301297  [ 3232/60000]
loss: 2.299700  [ 6432/60000]
loss: 2.294851  [ 9632/60000]
loss: 2.294723  [12832/60000]
loss: 2.282074  [16032/60000]
loss: 2.275557  [19232/60000]
loss: 2.238265  [22432/60000]
loss: 2.170888  [25632/60000]
loss: 1.727330  [28832/60000]
loss: 0.981952  [32032/60000]
loss: 0.651869  [35232/60000]
loss: 0.797447  [38432/60000]
loss: 0.621260  [41632/60000]
loss: 0.383997  [44832/60000]
loss: 0.526528  [48032/60000]
loss: 0.594913  [51232/60000]
loss: 0.434933  [54432/60000]
loss: 0.332496  [57632/60000]
Test Error: 
 Accuracy: 80.8%, Avg loss: 0.000947 

Epoch 2
-------------------------------
loss: 0.439545  [   32/60000]
loss: 0.378272  [ 3232/60000]
loss: 0.454065  [ 6432/60000]
loss: 0.348154  [ 9632/60000]
loss: 0.262269  [12832/60000]
loss: 0.659274  [16032/60000]
loss: 0.231311  [19232/60000]
loss: 0.417038  [22432/60000]
loss: 0.399859  [25632/60000]
loss: 0.295577  [28832/60000

# Quantization definition

####Question 1.####

Use
>$S=(r_{\mathrm{max}} - r_{\mathrm{min}}) / (q_{\mathrm{max}} - q_{\mathrm{min}})$

>$Z = q_{\mathrm{min}} - r_{\mathrm{min}} / S$

to calculate scale factor and zero point of a tensor


In [57]:
def get_scale_and_zero_point(fp32_tensor, bitwidth=8):
  q_min, q_max = -2**(bitwidth-1), 2**(bitwidth-1) - 1
  fp_min = fp32_tensor.min().item()
  fp_max = fp32_tensor.max().item()

  #####################################################

  scale = (fp_max-fp_min) / (q_max-q_min)
  zero_point = q_min-fp_min /scale

  #####################################################


  zero_point = round(zero_point)          #round
  zero_point = max(q_min, min(zero_point, q_max)) #clip

  return scale, int(zero_point)

####Question 2.####

Use $q=r/S + Z$ to quantize a tensor

In [58]:
def linear_quantize(fp32_tensor, bitwidth=8):
  q_min, q_max = -2**(bitwidth-1), 2**(bitwidth-1) - 1

  scale, zero_point = get_scale_and_zero_point(fp32_tensor)

  #####################################################

  q_tensor = torch.round( fp32_tensor/scale ) +zero_point

  #####################################################

  #clamp
  q_tensor = torch.clamp(q_tensor, q_min, q_max)
  return q_tensor, scale, zero_point

####Question 3.####

Use
> $q_{\mathrm{output}} = M * \mathrm{Linear}[q_{\mathrm{input}}, q_{\mathrm{weight}}] + Z_{\mathrm{output}}$

> $M = S_{\mathrm{input}} * S_{\mathrm{weight}} / S_{\mathrm{output}}$

to compute quantized linear operation

In [59]:
def quantized_linear(input, weights, input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point, device, bitwidth=8, activation_bitwidth=8):
  input, weights = input.to(device), weights.to(device)

  #####################################################

  M = input_scale * weight_scale / output_scale
  output = torch.nn.functional.linear((input - input_zero_point ), (weights - weight_zero_point))
  output *= M
  output += output_zero_point

  #####################################################

  #clamp and round
  output = output.round().clamp(-2**(activation_bitwidth-1), 2**(activation_bitwidth-1)-1)

  return output

def quantized_conv(input, weights,stride, padding,groups,input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point, device, bitwidth=8, activation_bitwidth=8):
  input, weights = input.to(device), weights.to(device)

  #####################################################

  M = input_scale * weight_scale / output_scale
  output = torch.nn.functional.conv2d((input - input_zero_point ), (weights - weight_zero_point),stride=stride,padding=padding,groups=groups)
  output *= M
  output += output_zero_point

  #####################################################

  #clamp and round
  output = output.round().clamp(-2**(activation_bitwidth-1), 2**(activation_bitwidth-1)-1)

  return output

# Design quantized linear layer and preprocess

In [60]:
class QuantizedConv(nn.Module):
  def __init__(self, weights,stride,padding,groups ,input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point, bitwidth=8, activation_bitwidth=8):
    super().__init__()
    self.stride, self.padding, self.groups = stride, padding,groups
    self.weights = weights
    self.input_scale, self.input_zero_point = input_scale, input_zero_point
    self.weight_scale, self.weight_zero_point = weight_scale, weight_zero_point
    self.output_scale, self.output_zero_point = output_scale, output_zero_point

    self.bitwidth = bitwidth
    self.activation_bitwidth = activation_bitwidth

  def forward(self, x):
    return quantized_conv(x, self.weights, self.stride, self.padding, self.groups, self.input_scale, self.weight_scale, self.output_scale, self.input_zero_point, self.weight_zero_point, self.output_zero_point, device)
  def __repr__(self):
    return f"QuantizedConv(in_channels={self.weights.size(1)}, out_channels={self.weights.size(0)})"

class QuantizedLinear(nn.Module):
  def __init__(self, weights, input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point, bitwidth=8, activation_bitwidth=8):
    super().__init__()
    self.weights = weights
    self.input_scale, self.input_zero_point = input_scale, input_zero_point
    self.weight_scale, self.weight_zero_point = weight_scale, weight_zero_point
    self.output_scale, self.output_zero_point = output_scale, output_zero_point

    self.bitwidth = bitwidth
    self.activation_bitwidth = activation_bitwidth

  def forward(self, x):
    return quantized_linear(x, self.weights, self.input_scale, self.weight_scale, self.output_scale, self.input_zero_point, self.weight_zero_point, self.output_zero_point, device)
  def __repr__(self):
    return f"QuantizedLinear(in_channels={self.weights.size(1)}, out_channels={self.weights.size(0)})"

#Transform input data to correct integer range
class Preprocess(nn.Module):
  def __init__(self, input_scale, input_zero_point, activation_bitwidth=8):
    super().__init__()
    self.input_scale, self.input_zero_point = input_scale, input_zero_point
    self.activation_bitwidth = activation_bitwidth
  def forward(self, x):
    x = x / self.input_scale + self.input_zero_point
    return x
  
class Quantizer(nn.Module):
  def __init__(self,scale,bitwidth=8):
    super().__init__()
    self.scale = scale

    def do_requant(self, input, scale,zero_point,bitwidth):
        output = input / scale
        output = output.round()
        output += zero_point
        output = output.clamp(-2**(bitwidth-1), 2**(bitwidth-1)-1)
        return output
    
    def forward(self,x):
      return do_requant(x,self.scale)
    
    

# Calibration

In [61]:
# add hook to record the min max value of the activation
input_activation = {}
output_activation = {}

#Define a hook to record the feature map of each layer
def add_range_recoder_hook(model):
    import functools
    def _record_range(self, x, y, module_name):
        x = x[0]
        input_activation[module_name] = x.detach()
        output_activation[module_name] = y.detach()

    all_hooks = []
    for name, m in model.named_modules():
        if isinstance(m, (nn.Linear, nn.ReLU,nn.Conv2d,h_swish)):
            all_hooks.append(m.register_forward_hook(
                functools.partial(_record_range, module_name=name)))


    return all_hooks

hooks = add_range_recoder_hook(FP32_model)
sample_data = iter(train_loader).__next__()[0].to(device) #Use a batch of training data to calibrate
FP32_model(sample_data) #Forward to use hook
print(FP32_model.Conv[0].weight)
print(output_activation["Conv.0"])
# remove hooks
for h in hooks:
    h.remove()


Parameter containing:
tensor([[[[ 0.9064]]],


        [[[-0.3772]]],


        [[[ 0.6363]]],


        [[[-2.0393]]],


        [[[-0.6909]]]], device='cuda:0', requires_grad=True)
tensor([[[[-0.9064, -0.9064, -0.9064,  ..., -0.9064, -0.9064, -0.9064],
          [-0.9064, -0.9064, -0.9064,  ..., -0.9064, -0.9064, -0.9064],
          [-0.9064, -0.9064, -0.9064,  ..., -0.9064, -0.9064, -0.9064],
          ...,
          [-0.9064, -0.9064, -0.9064,  ...,  0.1813, -0.9064, -0.9064],
          [-0.9064, -0.9064, -0.9064,  ..., -0.9064, -0.9064, -0.9064],
          [-0.9064, -0.9064, -0.9064,  ..., -0.9064, -0.9064, -0.9064]],

         [[ 0.3772,  0.3772,  0.3772,  ...,  0.3772,  0.3772,  0.3772],
          [ 0.3772,  0.3772,  0.3772,  ...,  0.3772,  0.3772,  0.3772],
          [ 0.3772,  0.3772,  0.3772,  ...,  0.3772,  0.3772,  0.3772],
          ...,
          [ 0.3772,  0.3772,  0.3772,  ..., -0.0754,  0.3772,  0.3772],
          [ 0.3772,  0.3772,  0.3772,  ...,  0.3772,  0.3772,  0.

# Quantize model

In [82]:
#copy original model
quantized_model = copy.deepcopy(FP32_model)

#Record each layer in original model
quantized_backbone = []
quantized_Conv = []
i = 0

#Record input scale and zero point
input_scale, input_zero_point = get_scale_and_zero_point(input_activation["Conv.0"])
preprocess = Preprocess(input_scale, input_zero_point)
quantized_Conv.append(preprocess)

input_scale, input_zero_point = get_scale_and_zero_point(input_activation['Conv.0'])
output_scale, output_zero_point = get_scale_and_zero_point(output_activation['Conv.1'])
quantized_weights, weight_scale, weight_zero_point = linear_quantize(FP32_model.Conv[0].weight.data)
quantizedConv1 = QuantizedConv(quantized_weights, 1,0,1,input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point)
#quantized_model.Conv[0] = quantizedConv1 

quantized_Conv.append(quantizedConv1)


input_scale, input_zero_point = get_scale_and_zero_point(input_activation['Conv.2'])
output_scale, output_zero_point = get_scale_and_zero_point(output_activation['Conv.3'])
quantized_weights, weight_scale, weight_zero_point = linear_quantize(FP32_model.Conv[2].weight.data)
quantizedConv2 = QuantizedConv(quantized_weights, 1,0,1,input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point)
#quantized_model.Conv[2] = quantizedConv2 
quantized_Conv.append(quantizedConv2)

#Record Linear + ReLU of the model (except the last Linear)
while i < len(quantized_model.backbone) - 1:
  if isinstance(quantized_model.backbone[i], nn.Linear) and isinstance(quantized_model.backbone[i+1], nn.ReLU):
    linear = quantized_model.backbone[i]
    linear_name = f"backbone.{i}"
    relu = quantized_model.backbone[i + 1]
    relu_name = f"backbone.{i + 1}"

    #Use the calibration data to calculate scale and zero point of each layer
    input_scale, input_zero_point = get_scale_and_zero_point(input_activation[linear_name])
    output_scale, output_zero_point = get_scale_and_zero_point(output_activation[relu_name])
    quantized_weights, weight_scale, weight_zero_point = linear_quantize(linear.weight.data)

    quantizedLinear = QuantizedLinear(quantized_weights, input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point)

    quantized_backbone.append(quantizedLinear)
    i += 2

#Record the last Linear layer
linear = quantized_model.backbone[4]
linear_name = f"backbone.4"
input_scale, input_zero_point = get_scale_and_zero_point(input_activation[linear_name])
output_scale, output_zero_point = get_scale_and_zero_point(output_activation[linear_name])
quantized_weights, weight_scale, weight_zero_point = linear_quantize(linear.weight.data)
quantizedLinear = QuantizedLinear(quantized_weights, input_scale, weight_scale, output_scale, input_zero_point, weight_zero_point, output_zero_point)
quantized_backbone.append(quantizedLinear)

quantized_model.Conv = nn.Sequential(*quantized_Conv)
quantized_model.backbone = nn.Sequential(*quantized_backbone)

In [83]:
print(quantized_model)

ToyModel(
  (Conv): Sequential(
    (0): Preprocess()
    (1): QuantizedConv(in_channels=1, out_channels=5)
    (2): QuantizedConv(in_channels=5, out_channels=1)
  )
  (backbone): Sequential(
    (0): QuantizedLinear(in_channels=784, out_channels=120)
    (1): QuantizedLinear(in_channels=120, out_channels=84)
    (2): QuantizedLinear(in_channels=84, out_channels=10)
  )
)


In [84]:
# add hook to record the min max value of the activation
q_input_activation = {}
q_output_activation = {}

#Define a hook to record the feature map of each layer
def add_range_recoder_hook(model):
    import functools
    def _record_range(self, x, y, module_name):
        x = x[0]
        q_input_activation[module_name] = x.detach()
        q_output_activation[module_name] = y.detach()

    all_hooks = []
    for name, m in model.named_modules():
        if isinstance(m, (QuantizedConv,  QuantizedLinear,h_swish)):
            all_hooks.append(m.register_forward_hook(
                functools.partial(_record_range, module_name=name)))


    return all_hooks

hooks = add_range_recoder_hook(quantized_model)
sample_data = iter(train_loader).__next__()[0].to(device) #Use a batch of training data to calibrate
quantized_model(sample_data) #Forward to use hook
#print(quantized_model.Conv[1].weights)
print(q_output_activation.keys())
#print(q_output_activation["Conv.1"])
# remove hooks
for h in hooks:
    h.remove()

dict_keys(['Conv.1', 'Conv.2', 'backbone.0', 'backbone.1', 'backbone.2'])


# Evaluate

In [85]:
test_loop(test_loader, FP32_model, loss_fn)

Test Error: 
 Accuracy: 85.5%, Avg loss: 0.000520 



In [86]:
test_loop(test_loader, quantized_model, loss_fn)

Test Error: 
 Accuracy: 83.7%, Avg loss: 0.004596 

