# **1.Import Pytorch**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models.feature_extraction as feature_extraction
from torchsummary import summary

import os
import copy

no_cuda = False
use_gpu = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu")

# **2.Load Fashion MNIST Dataset**

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

batch_size = 32

#Dataset
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

#Dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 20891683.39it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 339669.38it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 6258375.26it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 10872244.21it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



# **3.Create and train a NN model**

In [None]:
class ToyModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.nn1 = nn.Linear(28*28, 120)
    self.nn2 = nn.Linear(120, 84)
    self.nn3 = nn.Linear(84, 10)

  def forward(self, x):
    x = x.view(-1, 28 * 28) #transform 28*28 figure to 784 vector
    x = F.relu(self.nn1(x))
    x = F.relu(self.nn2(x))
    x = self.nn3(x)
    return x

In [None]:
FP32_model = ToyModel().to(device)
summary(FP32_model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 120]          94,200
            Linear-2                   [-1, 84]          10,164
            Linear-3                   [-1, 10]             850
Total params: 105,214
Trainable params: 105,214
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.40
Estimated Total Size (MB): 0.41
----------------------------------------------------------------


In [None]:
#train model
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  #Set the model to train mode
  model.train()
  for batch, (x, y) in enumerate(dataloader):
    if use_gpu:
      x, y = x.cuda(), y.cuda()
    optimizer.zero_grad()
    #forward
    pred = model(x)

    #loss
    loss = loss_fn(pred, y)

    #backward
    loss.backward()

    #optimize
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch + 1) * len(x)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
  #set model to evaluate mode
  model.eval()
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss, correct = 0, 0
  with torch.no_grad():
    for x, y in dataloader:
      if use_gpu:
        x, y = x.cuda(), y.cuda()
      pred = model(x)
      test_loss = loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item() #calculate accuracy
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
learning_rate = 1e-3
epochs = 3
loss_fn = nn.CrossEntropyLoss() #define loss function
optimizer = torch.optim.SGD(FP32_model.parameters(), lr=learning_rate, momentum=0.9)  #define optimizer

FP32_model.to(device) #let model on GPU

ToyModel(
  (nn1): Linear(in_features=784, out_features=120, bias=True)
  (nn2): Linear(in_features=120, out_features=84, bias=True)
  (nn3): Linear(in_features=84, out_features=10, bias=True)
)

In [None]:
for i in range(epochs):
  print(f"Epoch {i+1}\n-------------------------------")
  train_loop(train_loader, FP32_model, loss_fn, optimizer)
  test_loop(test_loader, FP32_model, loss_fn)

Epoch 1
-------------------------------
loss: 2.272040  [   32/60000]
loss: 2.154126  [ 3232/60000]
loss: 1.485687  [ 6432/60000]
loss: 1.180371  [ 9632/60000]
loss: 0.725408  [12832/60000]
loss: 0.711334  [16032/60000]
loss: 0.684661  [19232/60000]
loss: 0.740626  [22432/60000]
loss: 0.809315  [25632/60000]
loss: 0.246356  [28832/60000]
loss: 0.649688  [32032/60000]
loss: 0.488470  [35232/60000]
loss: 0.579858  [38432/60000]
loss: 0.660828  [41632/60000]
loss: 0.699628  [44832/60000]
loss: 0.413648  [48032/60000]
loss: 0.490594  [51232/60000]
loss: 0.600312  [54432/60000]
loss: 0.290745  [57632/60000]
Test Error: 
 Accuracy: 80.2%, Avg loss: 0.001068 

Epoch 2
-------------------------------
loss: 0.448714  [   32/60000]
loss: 0.499115  [ 3232/60000]
loss: 0.544766  [ 6432/60000]
loss: 0.660541  [ 9632/60000]
loss: 0.322607  [12832/60000]
loss: 0.325584  [16032/60000]
loss: 0.368600  [19232/60000]
loss: 0.439019  [22432/60000]
loss: 0.455746  [25632/60000]
loss: 0.575191  [28832/60000

# **4. Quantize**

下面兩格為需要實作的程式碼，請完成normal、clip的scale及zero point算法並且根據計算出來的s, z去進行tensor的quantize

算法可以參考EAI Lab6 Page5、6的部分或是example的程式碼

normal: 15%, clip: 15%, 結報請截圖實作部分的程式碼以及最後的兩種model的Accuracy

In [None]:
def Calculate_scale_zero_point(x, mode="normal"):
  if mode == "normal":
    '''
    請完成以下程式碼
    '''

  elif mode == "clip":
    '''
    請完成以下程式碼
    '''

  return scale, zero_point

In [None]:
class Quantize_per_tensor(nn.Module):
  def __init__(self, x, scale, zero_point, mode="normal"):
    super().__init__()
    self.tensor = x
    self.scale = scale
    self.zero_point = zero_point
    self._quantize(mode)

  def repr(self):
    return self.qtensor

  def int_repr(self):
    return self.qtensor_int

  def _get_scale_zero(self):
    return self.scale, self.zero_point

  def _quantize(self, mode):
    if mode == "normal":
      self.qtensor_int = #請完成以下程式碼
      self.qtensor = #請完成以下程式碼

    elif mode == "clip":
      self.qtensor_int = #請完成以下程式碼
      self.qtensor_int = #請完成以下程式碼   clamp qtensor_int
      self.qtensor = #請完成以下程式碼

In [None]:
class QuantizedLinear(nn.Module):
  def __init__(self, in_features, out_features, weight, bias, scale, zero_point, mode):
    super(QuantizedLinear, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.mode = mode
    self.scale, self.zero_point = scale, zero_point
    self.weight = self._weight_quantize(weight)
    self.bias = bias


  def forward(self, x):
    x = torch.matmul(x, self.weight.t())
    output = Quantize_per_tensor(x, self.scale, self.zero_point, mode=self.mode).repr() + self.bias

    return output

  def _weight_quantize(self, weight):
    s, z = Calculate_scale_zero_point(weight)
    qweight = Quantize_per_tensor(weight, s, z, mode=self.mode)
    return qweight.repr()

  def __repr__(self):
    return f'QuantizedLinear(in_features={self.in_features}, out_features={self.out_features}, scale={self.scale}, zero_point={self.zero_point})'

class QuantizedLinearReLU(nn.Module):
  def __init__(self, in_features, out_features, weight, bias, scale, zero_point, mode):
    super(QuantizedLinearReLU, self).__init__()
    self.in_features = in_features
    self.out_features = out_features
    self.mode = mode
    self.scale, self.zero_point = scale, zero_point
    self.weight = self._weight_quantize(weight)
    self.bias = bias


  def forward(self, x):
    x = torch.matmul(x, self.weight.t())
    output = Quantize_per_tensor(x, self.scale, self.zero_point, mode=self.mode).repr() + self.bias
    output = F.relu(output)
    return output

  def _weight_quantize(self, weight):
    s, z = Calculate_scale_zero_point(weight)
    qweight = Quantize_per_tensor(weight, s, z, mode=self.mode)
    return qweight.repr()

  def __repr__(self):
    return f'QuantizedLinearReLU(in_features={self.in_features}, out_features={self.out_features}, scale={self.scale}, zero_point={self.zero_point})'

In [None]:
class QuantizedModel(nn.Module):
  def __init__(self, model, scale, zero_point, mode="normal"):
    super(QuantizedModel, self).__init__()
    self.weight_dic = []
    self.bias_dic = []
    self.scale, self.zero_point = scale, zero_point #Scale and zero point of all layer
    self.mode = mode

    self._get_weight()
    self.nn1 = QuantizedLinearReLU(in_features=28*28, out_features=120, weight=self.weight_dic[0], bias=self.bias_dic[0], scale=self.scale[1], zero_point=self.zero_point[1], mode=self.mode)
    self.nn2 = QuantizedLinearReLU(in_features=120, out_features=84, weight=self.weight_dic[1], bias=self.bias_dic[1], scale=self.scale[2], zero_point=self.zero_point[2], mode=self.mode)
    self.nn3 = QuantizedLinear(in_features=84, out_features=10, weight=self.weight_dic[2], bias=self.bias_dic[2], scale=self.scale[3], zero_point=self.zero_point[3], mode=self.mode)

  def forward(self, x):
    x = x.view(-1, 28 * 28)
    x = Quantize_per_tensor(x, self.scale[0], self.zero_point[0], mode=self.mode).repr()
    x = self.nn1(x)
    x = self.nn2(x)
    x = self.nn3(x)
    x = x.dequantize()
    return x

  def _get_weight(self):
    for name, paras in model.named_parameters():
      if "weight" in name:
        self.weight_dic.append(paras)
      elif "bias" in name:
        self.bias_dic.append(paras)

# Normal quantization

In [None]:
model = copy.deepcopy(FP32_model)

In [None]:
scale_dic = []
zero_dic = []

#Calibrate to compute s、z of all layer at the same time
for batch in train_loader:
  input, label = batch
  for node in ['x', 'relu', 'relu_1', 'nn3']:
    extractor = feature_extraction.create_feature_extractor(model, [node]).cpu()
    output = extractor(input)[node]
    q_min, q_max = -128, 127
    min_val, max_val = np.min(output.detach().numpy()), np.max(output.detach().numpy())
    scale = (max_val - min_val) / (q_max - q_min)
    zero = round(q_min - min_val / scale)
    q = Quantize_per_tensor(output, scale=scale, zero_point=zero, mode="normal")
    scale_dic.append(scale)
    zero_dic.append(zero)
  break

print(scale_dic)
print(zero_dic)


In [None]:
Quantized_normal_model = QuantizedModel(model, scale_dic, zero_dic, mode="normal")
print(Quantized_normal_model)

# Clip quantization

In [None]:
model = copy.deepcopy(FP32_model)

In [None]:
scale_dic = []
zero_dic = []

#Calibrate to compute s、z of all layer at the same time
for batch in train_loader:
  input, label = batch
  for node in ['x', 'relu', 'relu_1', 'nn3']:
    extractor = feature_extraction.create_feature_extractor(model, [node]).cpu()
    output = extractor(input)[node]
    q_min, q_max = -128, 127
    min_val, max_val = np.min(output.detach().numpy()), np.max(output.detach().numpy())
    scale = (max_val - min_val) / (q_max - q_min)
    zero = round(q_min - min_val / scale)
    q = Quantize_per_tensor(output, scale=scale, zero_point=zero, mode="clip")
    scale_dic.append(scale)
    zero_dic.append(zero)
  break

print(scale_dic)
print(zero_dic)


In [None]:
Quantized_clip_model = QuantizedModel(model, scale_dic, zero_dic, mode="clip")
print(Quantized_clip_model)

In [None]:
#define evaluate function
def Evaluate(model, loader):
  total = 0
  correct = 0
  with torch.no_grad():
    for data in loader:
      images, labels = data
      outputs = model(images)
      # the class with the highest energy is what we choose as prediction
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  test_loss = 0

  print("========================================= PERFORMANCE =============================================")
  print('\nAccuracy: {}/{} ({:.0f}%)\n'.format( correct, total,100. * correct / total))

In [None]:
#Normal quantize
Evaluate(Quantized_normal_model, test_loader)

In [None]:
#Clip quantize
Evaluate(Quantized_clip_model, test_loader)