<a href="https://colab.research.google.com/github/m-majchrzak/Optimization_Adaquant/blob/main/main_mobilenet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Libraries & Setup

In [1]:
%cd /content/
!rm -r ./Optimization_Adaquant/
!git clone https://github.com/m-majchrzak/Optimization_Adaquant.git
%cd Optimization_Adaquant/
!pip install pyunpack
from pyunpack import Archive
Archive('calibration_datasets.zip').extractall("")

/content
rm: cannot remove './Optimization_Adaquant/': No such file or directory
Cloning into 'Optimization_Adaquant'...
remote: Enumerating objects: 128, done.[K
remote: Counting objects: 100% (128/128), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 128 (delta 62), reused 70 (delta 25), pack-reused 0[K
Receiving objects: 100% (128/128), 6.31 MiB | 20.72 MiB/s, done.
Resolving deltas: 100% (62/62), done.
/content/Optimization_Adaquant
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyunpack
  Downloading pyunpack-0.3-py2.py3-none-any.whl (4.1 kB)
Collecting easyprocess (from pyunpack)
  Downloading EasyProcess-1.1-py3-none-any.whl (8.7 kB)
Collecting entrypoint2 (from pyunpack)
  Downloading entrypoint2-1.1-py2.py3-none-any.whl (9.9 kB)
Installing collected packages: entrypoint2, easyprocess, pyunpack
Successfully installed easyprocess-1.1 entrypoint2-1.1 pyunpack-0.3


In [2]:
import torch.nn.parallel
import torch.backends.cudnn as cudnn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD

from utils.trainer import Trainer
from utils.adaquant import optimize_layer_adaquant
import numpy as np
import pandas as pd
from functools import partial
import random

from utils.quantize import QConv2d, QLinear
from utils.load_dataset import load_dataset
from utils.resnet import ResNet_imagenet
from utils.mobilenet_v2 import mobilenet_v2
from utils.misc import set_global_seeds

from torch.optim import SGD

In [3]:
acc = -1
loss = -1
best_prec1 = 0
dtype = torch.float32

### SET SEED
seed = 123
set_global_seeds(seed)

device_ids = list(range(torch.cuda.device_count()))
if torch.cuda.is_available():
    torch.cuda.set_device(device_ids[0])
    cudnn.benchmark = True
else:
    device_ids = None

## Data

In [4]:
# Calib data loading code
train_directory='./calibration_datasets/tiny_imagenet/train'
train_batch_size = 400
train_data = load_dataset(train_directory, train_batch_size)

## Model, Optimizer, Trainer

In [5]:
### CREATE MODEL
#model = ResNet_imagenet()
model = mobilenet_v2()

# define loss function (criterion)
criterion = CrossEntropyLoss()

# optimizer configuration
optimizer = SGD(model.parameters(),lr=1e-2, momentum=0.5, weight_decay=0)

# TRAINER
#trainer = Trainer(model, criterion, optimizer, device=torch.device('cuda'))
trainer = Trainer(model, criterion, optimizer, device=torch.device('cpu'))


## Cache, Hook

In [6]:
cached_qinput = {}
def Qhook(name, module, input, output):
    if module not in cached_qinput:
        cached_qinput[module] = []
        # Meanwhile store data in the RAM.
        cached_qinput[module].append(input[0].detach().cpu())
        # print(name)


In [7]:
cached_input_output = {}
def hook(name,module, input, output):
    if module not in cached_input_output:
        cached_input_output[module] = []
    # Meanwhile store data in the RAM.
    cached_input_output[module].append((input[0].detach().cpu(), output.detach().cpu()))
    # print(name)

In [8]:
for name, m in model.named_modules():
    # print(name)
    if isinstance(m, QConv2d) or isinstance(m, QLinear):
        print(name)
        m.name = name
        # print(m.name)

features.0.0
features.1.conv.0.0
features.1.conv.1
features.2.conv.0.0
features.2.conv.1.0
features.2.conv.2
features.3.conv.0.0
features.3.conv.1.0
features.3.conv.2
features.4.conv.0.0
features.4.conv.1.0
features.4.conv.2
features.5.conv.0.0
features.5.conv.1.0
features.5.conv.2
features.6.conv.0.0
features.6.conv.1.0
features.6.conv.2
features.7.conv.0.0
features.7.conv.1.0
features.7.conv.2
features.8.conv.0.0
features.8.conv.1.0
features.8.conv.2
features.9.conv.0.0
features.9.conv.1.0
features.9.conv.2
features.10.conv.0.0
features.10.conv.1.0
features.10.conv.2
features.11.conv.0.0
features.11.conv.1.0
features.11.conv.2
features.12.conv.0.0
features.12.conv.1.0
features.12.conv.2
features.13.conv.0.0
features.13.conv.1.0
features.13.conv.2
features.14.conv.0.0
features.14.conv.1.0
features.14.conv.2
features.15.conv.0.0
features.15.conv.1.0
features.15.conv.2
features.16.conv.0.0
features.16.conv.1.0
features.16.conv.2
features.17.conv.0.0
features.17.conv.1.0
features.17.conv

In [9]:

handlers = []
count = 0
for name, m in model.named_modules():
    if isinstance(m, QConv2d) or isinstance(m, QLinear):
    #if isinstance(m, Conv2d) or isinstance(m, Linear):
    # if isinstance(m, QConv2d):
        m.quantize = False
        #if count < 10:
        # if (isinstance(m, QConv2d) and m.groups == 1) or isinstance(m, QLinear):
        handlers.append(m.register_forward_hook(partial(hook,name)))
        count += 1

In [10]:
# Store input/output for all quantizable layers
trainer.validate(train_data)
print("Input/outputs cached")

Input/outputs cached


In [11]:
for handler in handlers:
    handler.remove()

In [12]:
for m in model.modules():
    if isinstance(m, QConv2d) or isinstance(m, QLinear):
        m.quantize = True

In [13]:
print(cached_input_output.keys())

dict_keys([QConv2d(
  3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
), QConv2d(
  24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quant

## Loop Through Layers

In [14]:
mse_df = pd.DataFrame(index=np.arange(len(cached_input_output)), columns=['name', 'bit', 'shape', 'mse_before', 'mse_after'])
print_freq = 100
evaluate = "evaluate"

In [None]:
for i, layer in enumerate(cached_input_output):
    if i>0: # and seq_adaquant = True
        count = 0
        cached_qinput = {}
        for name, m in model.named_modules():
            if layer.name==name:
                if count < 1000:
                    handler= m.register_forward_hook(partial(Qhook,name))
                    count += 1
        # Store input/output for all quantizable layers
        trainer.validate(train_data)
        print("cashed quant Input%s"%layer.name)
        cached_input_output[layer][0] = (cached_qinput[layer][0],cached_input_output[layer][0][1])
        handler.remove()
    print("\nOptimize {}:{} for {} bit of shape {}".format(i, layer.name, layer.num_bits, layer.weight.shape))

    mse_before, mse_after = optimize_layer_adaquant(layer, cached_input_output[layer])

    print("\nMSE before optimization: {}".format(mse_before))
    print("MSE after optimization:  {}".format(mse_after))
    mse_df.loc[i, 'name'] = layer.name
    mse_df.loc[i, 'bit'] = layer.num_bits
    mse_df.loc[i, 'shape'] = str(layer.weight.shape)
    mse_df.loc[i, 'mse_before'] = mse_before
    mse_df.loc[i, 'mse_after'] = mse_after


mse_csv = evaluate + '.mse.csv'
mse_df.to_csv(mse_csv)

filename = evaluate + '.adaquant'
torch.save(model.state_dict(), filename)

train_data = None
cached_input_output = None
val_results = trainer.validate(val_data.get_loader())
#logging.info(val_results)


Optimize 0:features.0.0 for 8 bit of shape torch.Size([32, 3, 3, 3])


100%|██████████| 100/100 [00:39<00:00,  2.53it/s]



MSE before optimization: 0.36453860998153687
MSE after optimization:  0.2530604898929596


In [None]:
model.conv1.name = "QConv2d"

In [None]:
model.conv1.name

'QConv2d'

In [None]:
for k, v in cached_input_output.items():
    print(k)

QConv2d(
  3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quantize_weight): QuantThUpdate()
)
QConv2d(
  64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
  (quantize_input): QuantThUpdate()
  (quanti