# Homework 7 - Network Compression (Weight Quantization)

> Author: Arvin Liu (b05902127@ntu.edu.tw)

## **goal**

 ----- strong baseline -----   0.84100

----- simple baseline -----   0.83682

# Readme


HW7的任務是模型壓縮 - Neural Network Compression。

Compression有很多種門派，在這裡我們會介紹上課出現過的其中四種，分別是:

* 知識蒸餾 Knowledge Distillation
* 網路剪枝 Network Pruning
* 用少量參數來做CNN Architecture Design
* 參數量化 Weight Quantization

在這個notebook中我們會介紹非常簡單的Weight Quantization，
而我們有提供已經做完Knowledge Distillation的小model來做Quantization。

* Model架構 / Architecute Design在同目錄中的hw7_Architecture_Design.ipynb。
* 下載已經train好的小model(0.99M): https://drive.google.com/open?id=12wtIa0WVRcpboQzhgRUJOpcXe23tgWUL
  * 參數為 base=16, width_mult=1 (default)


## Weight Quantization
<img src="https://i.imgur.com/SMsaiAo.png" width="500px">

我們這邊會示範如何實作第一條: Using less bits to represent a value。

## 好的Quantization很重要。
這邊提供一些TA的數據供各位參考。

|bit|state_dict size|accuracy|
|-|-|-|
|32|1047430 Bytes|0.81315|
|16|522958 Bytes|0.81347|
|8|268472 Bytes|0.80791|
|7|268472 Bytes|0.80791|


## Byte Cost
根據[torch的官方手冊](https://pytorch.org/docs/stable/tensors.html)，我們知道torch.FloatTensor預設是32-bit，也就是佔了4byte的空間，而FloatTensor系列最低可以容忍的是16-bit。

為了方便操作，我們之後會將state_dict轉成numpy array做事。
因此我們可以先看看numpy有甚麼樣的type可以使用。(ps.)mantissa = 有效位數
![](https://i.imgur.com/3N7tiEc.png)      
而我們發現numpy最低有float16可以使用，因此我們可以直接靠轉型將32-bit的tensor轉換成16-bit的ndarray存起來。

In [None]:
folder_path = './'

In [None]:
import random
import numpy as np
import torch

# 固定隨機種子
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(0)

# Read state_dict

下載我們已經train好的小model的state_dict進行測試。

In [None]:
# !gdown --id '10usrlxc7KhTbwRTzG7IAmaFbsVdWqlQ3' --output student_custom_small.bin
!gdown --id '1-BVZoTUkX0faW4sYk7L2qiZbo-uYdn0P' --output student_custom_small.bin

import os
import torch

print(f"\noriginal cost: {os.stat('student_custom_small.bin').st_size} bytes.")
params = torch.load('student_custom_small.bin')

Downloading...
From: https://drive.google.com/uc?id=1-BVZoTUkX0faW4sYk7L2qiZbo-uYdn0P
To: /content/student_custom_small.bin
  0% 0.00/1.05M [00:00<?, ?B/s]100% 1.05M/1.05M [00:00<00:00, 67.3MB/s]

original cost: 1047706 bytes.


# 32-bit Tensor -> 16-bit 

In [None]:
import numpy as np
import pickle

def encode16(params, fname):
    '''將params壓縮成16-bit後輸出到fname。

    Args:
      params: model的state_dict。
      fname: 壓縮後輸出的檔名。
    '''

    custom_dict = {}
    for (name, param) in params.items():
        param = np.float64(param.cpu().numpy())
        # 有些東西不屬於ndarray，只是一個數字，這個時候我們就不用壓縮。
        if type(param) == np.ndarray:
            custom_dict[name] = np.float16(param)
        else:
            custom_dict[name] = param

    pickle.dump(custom_dict, open(fname, 'wb'))


def decode16(fname):
    '''從fname讀取各個params，將其從16-bit還原回torch.tensor後存進state_dict內。

    Args:
      fname: 壓縮後的檔名。
    '''

    params = pickle.load(open(fname, 'rb'))
    custom_dict = {}
    for (name, param) in params.items():
        param = torch.tensor(param)
        custom_dict[name] = param

    return custom_dict


encode16(params, '16_bit_model.pkl')
print(f"16-bit cost: {os.stat('16_bit_model.pkl').st_size} bytes.")

16-bit cost: 522958 bytes.


# 32-bit Tensor -> 8-bit (OPTIONAL)

這邊提供轉成8-bit的方法，僅供大家參考。
因為沒有8-bit的float，所以我們先對每個weight記錄最小值和最大值，進行min-max正規化後乘上$2^8-1$在四捨五入，就可以用np.uint8存取了。

$W' = round(\frac{W - \min(W)}{\max(W) - \min(W)} \times (2^8 - 1)$)



> 至於能不能轉成更低的形式，例如4-bit呢? 當然可以，待你實作。

In [None]:
import numpy as np
import pickle
def encode8(params, fname):
    custom_dict = {}
    for (name, param) in params.items():
        param = np.float64(param.cpu().numpy())
        if type(param) == np.ndarray:
            min_val = np.min(param)
            max_val = np.max(param)
            param = np.round((param - min_val) / (max_val - min_val) * 255)
            param = np.uint8(param)
            custom_dict[name] = (min_val, max_val, param)
        else:
            custom_dict[name] = param

    pickle.dump(custom_dict, open(fname, 'wb'))


def decode8(fname):
    params = pickle.load(open(fname, 'rb'))
    custom_dict = {}
    for (name, param) in params.items():
        if type(param) == tuple:
            min_val, max_val, param = param
            param = np.float64(param)
            param = (param / 255 * (max_val - min_val)) + min_val
            param = torch.tensor(param)
        else:
            param = torch.tensor(param)

        custom_dict[name] = param

    return custom_dict

encode8(params, '8_bit_model.pkl')
print(f"8-bit cost: {os.stat('8_bit_model.pkl').st_size} bytes.")

8-bit cost: 268471 bytes.


# testing

## load model

In [None]:
# 讀 train 好的檔
# !gdown --id '10usrlxc7KhTbwRTzG7IAmaFbsVdWqlQ3' --output student_custom_small.bin #predict_0.8402332361516035_student_model.csv
!gdown --id '1-BVZoTUkX0faW4sYk7L2qiZbo-uYdn0P' --output student_custom_small.bin #predict_0.8131195335276968student_model.csv

student_net = StudentNet(base=16).cuda()
student_net.load_state_dict(torch.load('student_custom_small.bin'))

In [None]:
# decode
state_dicts = decode8(folder_path + '8_bit_model.pkl')
# state_dicts = decode8('8_bit_model.pkl')

In [None]:
import torch
import os
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models

# Load進我們的Model架構(在hw7_Architecture_Design.ipynb內) TA_Student_Net
!gdown --id '1lJS0ApIyi7eZ2b3GMyGxjPShI8jXM2UC' --output "hw7_Architecture_Design.ipynb"
%run "hw7_Architecture_Design.ipynb"

In [None]:
student_net_final = StudentNet(base=16).cuda()
student_net_final.load_state_dict(state_dicts)
student_net_final.eval()


# check parameters
from torchsummary import summary
summary(student_net_final, input_size=(3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 128, 128]             448
       BatchNorm2d-2         [-1, 16, 128, 128]              32
             ReLU6-3         [-1, 16, 128, 128]               0
         MaxPool2d-4           [-1, 16, 64, 64]               0
            Conv2d-5           [-1, 16, 64, 64]             160
       BatchNorm2d-6           [-1, 16, 64, 64]              32
             ReLU6-7           [-1, 16, 64, 64]               0
            Conv2d-8           [-1, 32, 64, 64]             544
         MaxPool2d-9           [-1, 32, 32, 32]               0
           Conv2d-10           [-1, 32, 32, 32]             320
      BatchNorm2d-11           [-1, 32, 32, 32]              64
            ReLU6-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]           2,112
        MaxPool2d-14           [-1, 64,

## load testing data

In [None]:
import re
import torch
from glob import glob
from PIL import Image
import torchvision.transforms as transforms

class MyDataset(torch.utils.data.Dataset):

    def __init__(self, folderName, transform=None):
        self.transform = transform
        self.data = []
        self.label = []

        for img_path in sorted(glob(folderName + '/*.jpg')):
            try:
                # Get classIdx by parsing image path
                class_idx = int(re.findall(re.compile(r'\d+'), img_path)[1])
            except:
                # if inference mode (there's no answer), class_idx default 0
                class_idx = 0

            image = Image.open(img_path)
            # Get File Descriptor
            image_fp = image.fp
            image.load()
            # Close File Descriptor (or it'll reach OPEN_MAX)
            image_fp.close()

            self.data.append(image)
            self.label.append(class_idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        image = self.data[idx]
        if self.transform:
            image = self.transform(image)
        return image, self.label[idx]


trainTransform = transforms.Compose([
    transforms.RandomCrop(256, pad_if_needed=True, padding_mode='symmetric'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
])
testTransform = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.ToTensor(),
])

def get_dataloader(mode='training', batch_size=32):

    assert mode in ['training', 'testing', 'validation']

    dataset = MyDataset(
        f'./food-11/{mode}', #原本的
        # f'./{mode}', #之前發現 zip 沒 folder
        transform=trainTransform if mode == 'training' else testTransform)
    print(f'./food-11/{mode}')
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=(mode == 'training'))

    return dataloader


In [None]:
test_dataloader = get_dataloader('testing', batch_size=32)

./food-11/testing


In [None]:
import time
SaveDirectory = os.getcwd()
print (SaveDirectory) #Wa LA 路徑出來啦~~~

/content


In [None]:
# Download dataset
!gdown --id '1GzukFVznTp_RG7b2ury7hr9TwA-MyMYj' --output food-11.zip
# Unzip the files
!unzip food-11.zip

test_dataloader = get_dataloader('testing', batch_size=32)
print('finish test_dataloader')

In [None]:
import numpy as np
student_net_final.eval()
prediction = []

# optimizer = optim.AdamW(student_net_final.parameters(), lr=1e-3)
for now_step, batch_data in enumerate(test_dataloader):
    # 清空 optimizer
    # optimizer.zero_grad()
    # 處理 input
    inputs, hard_labels = batch_data
    inputs = inputs.cuda()

    with torch.no_grad():
        logits = student_net_final(inputs)
        test_label = np.argmax(logits.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y) 

In [None]:
# 丟到 hw7
from google.colab import files

#將結果寫入 csv 檔
with open("predict_report1.csv", 'w') as f:
    f.write('Id,label\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))
#存到本機端
# files.download('predict_report1.csv')

In [None]:
# 丟到 hw3
from google.colab import files

#將結果寫入 csv 檔
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))
#存到本機端
files.download('predict.csv')

In [None]:
import pandas as pd
pd.read_csv("predict_report1.csv")

In [None]:
# Kaggle Score Record

# 1. predict_TA.csv
#   acc = 0.82964

# 2. predict_0.8440233236151603_student_model.csv
#   acc = 0.04064
#   https://drive.google.com/open?id=10uOiw6Hsn0dYQe9TnNpxGaJVp4V6QVbt

# 3. predict_0.8402332361516035_student_model.csv
#   acc = 0.86072
#   https://drive.google.com/open?id=10usrlxc7KhTbwRTzG7IAmaFbsVdWqlQ3

# 4. predict_0.8402332361516035_student_model_8bytes.csv
#   acc = 0.85475
#   https://drive.google.com/open?id=10usrlxc7KhTbwRTzG7IAmaFbsVdWqlQ3

# 5. predict_0.8131195335276968student_model.csv
#   acc = 0.83024

# 6. predicti_8_bit_model_0.8259475218658893student_model.pkl.csv
#   acc = 0.84578 str_best

# Q&A

有任何問題Network Compression的問題可以寄信到b05902127@ntu.edu.tw。

時間允許的話我會更新在這裡。