In [1]:
# references:
# https://pytorch.org/blog/quantization-in-practice/

In [2]:
import copy

import pandas as pd
import torch
from torch.quantization import get_default_qconfig, quantize_fx

from lib.mobilenetv2 import mobilenet_v2
from lib.utils import calibrate, configure_cudnn, prepare_calib_dataloader, prepare_dataloaders, replace_relu, set_seed, test

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
backend = "fbgemm"
torch.backends.quantized.engine = backend

seed = 1000
set_seed(seed)
configure_cudnn(deterministic=True, benchmark=False)

In [4]:
# prepare float model
model = mobilenet_v2()
model.eval()

weight_path = "../models/exp_4000/best_model.pth"
state_dict = torch.load(weight_path)
model.load_state_dict(state_dict)

replace_relu(model)

In [5]:
_, test_dataloader = prepare_dataloaders(batch_size=32)
calib_dataloader = prepare_calib_dataloader(batch_size=32)

example_inputs = (torch.randn(1, 3, 32, 32),)
model_quantized = copy.deepcopy(model)
qconfig = {
    "": get_default_qconfig(backend)
}
model_quantized = quantize_fx.prepare_fx(model_quantized.eval(), qconfig, example_inputs)  # fuse

# quantize the model
calibrate(model_quantized, calib_dataloader, n_calib_batch=32)
model_quantized = quantize_fx.convert_fx(model_quantized.eval())  # quantize

test(model_quantized, "cpu", test_dataloader)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.79it/s]
test: 100%|██████████| 313/313 [00:13<00:00, 23.73it/s]


0.9622

In [6]:
# target blocks for sensitivity analysis
block_names = [f"features.{i}" for i in range(18 + 1)]
block_names.append("classifier")

block_names

['features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'features.15',
 'features.16',
 'features.17',
 'features.18',
 'classifier']

In [7]:
# run sensitivity analysis per block
results = []

for block_quantized in block_names:
    model_quantized = copy.deepcopy(model)
    qconfig = {
        "": None,
        "module_name": [
            (block_quantized, get_default_qconfig(backend))
        ]
    }
    model_quantized = quantize_fx.prepare_fx(model_quantized.eval(), qconfig, example_inputs)  # fuse

    calib_dataloader = prepare_calib_dataloader(batch_size=32)
    calibrate(model_quantized, calib_dataloader, n_calib_batch=32)
    model_quantized = quantize_fx.convert_fx(model_quantized.eval())  # quantize

    test_accuracy = test(model_quantized, "cpu", test_dataloader)

    print(block_quantized, test_accuracy)

    results.append({
        "block_quantized": block_quantized,
        "test_accuracy": test_accuracy
    })

Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:03<00:00,  9.12it/s]
test: 100%|██████████| 313/313 [00:46<00:00,  6.67it/s]


features.0 0.9637
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:07<00:00,  4.29it/s]
test: 100%|██████████| 313/313 [00:45<00:00,  6.82it/s]


features.1 0.9644
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.97it/s]
test: 100%|██████████| 313/313 [00:46<00:00,  6.77it/s]


features.2 0.9634
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:09<00:00,  3.33it/s]
test: 100%|██████████| 313/313 [01:20<00:00,  3.87it/s]


features.3 0.9633
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.97it/s]
test: 100%|██████████| 313/313 [00:50<00:00,  6.17it/s]


features.4 0.964
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.82it/s]
test: 100%|██████████| 313/313 [01:26<00:00,  3.63it/s]


features.5 0.9642
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.66it/s]
test: 100%|██████████| 313/313 [00:32<00:00,  9.50it/s]


features.6 0.9639
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.79it/s]
test: 100%|██████████| 313/313 [00:55<00:00,  5.66it/s]


features.7 0.9637
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.64it/s]
test: 100%|██████████| 313/313 [01:24<00:00,  3.69it/s]


features.8 0.9639
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.76it/s]
test: 100%|██████████| 313/313 [01:22<00:00,  3.78it/s]


features.9 0.9641
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.74it/s]
test: 100%|██████████| 313/313 [01:23<00:00,  3.77it/s]


features.10 0.9637
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.76it/s]
test: 100%|██████████| 313/313 [01:15<00:00,  4.13it/s]


features.11 0.9638
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:09<00:00,  3.46it/s]
test: 100%|██████████| 313/313 [01:21<00:00,  3.85it/s]


features.12 0.964
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:09<00:00,  3.41it/s]
test: 100%|██████████| 313/313 [01:20<00:00,  3.91it/s]


features.13 0.964
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.88it/s]
test: 100%|██████████| 313/313 [01:19<00:00,  3.95it/s]


features.14 0.9638
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  4.00it/s]
test: 100%|██████████| 313/313 [01:26<00:00,  3.61it/s]


features.15 0.9642
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:07<00:00,  4.04it/s]
test: 100%|██████████| 313/313 [01:24<00:00,  3.71it/s]


features.16 0.9641
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:08<00:00,  3.80it/s]
test: 100%|██████████| 313/313 [01:25<00:00,  3.65it/s]


features.17 0.964
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:09<00:00,  3.27it/s]
test: 100%|██████████| 313/313 [01:26<00:00,  3.63it/s]


features.18 0.9638
Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:09<00:00,  3.43it/s]
test: 100%|██████████| 313/313 [01:26<00:00,  3.60it/s]

classifier 0.9638





In [8]:
df = pd.DataFrame(results)
df.sort_values(by="test_accuracy", inplace=True, ascending=True)
df

Unnamed: 0,block_quantized,test_accuracy
3,features.3,0.9633
2,features.2,0.9634
0,features.0,0.9637
10,features.10,0.9637
7,features.7,0.9637
14,features.14,0.9638
11,features.11,0.9638
18,features.18,0.9638
19,classifier,0.9638
6,features.6,0.9639


In [9]:
# disable quantization for N-most sensitive blocks
N = 6
blocks_not_quantized = []

for i, row in df.iterrows():
    blocks_not_quantized.append((row.block_quantized, None))
    if len(blocks_not_quantized) >= N:
        break

blocks_not_quantized

[('features.3', None),
 ('features.2', None),
 ('features.0', None),
 ('features.10', None),
 ('features.7', None),
 ('features.14', None)]

In [11]:
model_quantized = copy.deepcopy(model)

qconfig = {
    "": get_default_qconfig(backend),
    "module_name": blocks_not_quantized
}
model_quantized = quantize_fx.prepare_fx(model_quantized.eval(), qconfig, example_inputs)  # fuse

calib_dataloader = prepare_calib_dataloader(batch_size=32)
calibrate(model_quantized, calib_dataloader, n_calib_batch=32)
model_quantized = quantize_fx.convert_fx(model_quantized.eval())  # quantize

test_accuracy = test(model_quantized, "cpu", test_dataloader)

print(test_accuracy)

Files already downloaded and verified


calib: 100%|██████████| 32/32 [00:14<00:00,  2.20it/s]
test: 100%|██████████| 313/313 [00:20<00:00, 15.35it/s]

0.9638



