In [1]:
DEVICE = 'cuda:2'
BATCH_SIZE = 256

In [2]:
import os
import sys
import torch
import torchvision.datasets as Datasets
from torchvision import transforms as tfms
from torch.utils.data import DataLoader
sys.path.append("../../")
torch.cuda.set_device(int(DEVICE[-1]))

In [3]:
stats = ((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
train_tfms = tfms.Compose([
    tfms.RandomCrop(32, padding=4, padding_mode='reflect'),
    tfms.RandomHorizontalFlip(),
    tfms.ToTensor(),
    tfms.Normalize(*stats, inplace=True)
])
test_tfms = tfms.Compose([
    tfms.ToTensor(),
    tfms.Normalize(*stats)
])

In [4]:
cifar100_train = Datasets.CIFAR100(root='./data', train=True, download=True, transform=test_tfms)
cifar100_test = Datasets.CIFAR100(root='./data', train=False, download=True, transform=test_tfms)

train_loader = DataLoader(cifar100_train, shuffle=True, num_workers=1, batch_size=BATCH_SIZE)
test_loader = DataLoader(cifar100_test, shuffle=False, num_workers=1, batch_size=4*BATCH_SIZE)

dataloaders = {"train" : train_loader , "val" : test_loader}

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# import libraries
from trailmet.models import resnet
from trailmet.algorithms.quantize.lapq import LAPQ

In [6]:
# load model
cnn=resnet.make_resnet50(100,32)
checkpoint = torch.load("./resnet50_cifar100-pretrained.pth", map_location=DEVICE)
cnn.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [7]:
# test model
from trailmet.algorithms.algorithms import BaseAlgorithm
BaseAlgorithm().test(model=cnn, dataloader=test_loader, device=torch.device(DEVICE))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:06<00:00,  1.65it/s, acc1=72.6, acc5=91.5]


(72.61120834350587, 91.51686019897461)

In [8]:
# quantize model
kwargs = {
    'W_BITS':4, 
    'A_BITS':8, 
    'ACT_QUANT':True,
    'CALIB_BATCHES':1024//BATCH_SIZE, 
    'MAX_ITER':1000,
    'MAX_FEV':1000,
    'VERBOSE':True,
    'PRINT_FREQ':5,
    'GPU_ID':int(DEVICE[-1]),
    'SEED':42
    }
qnn = LAPQ(cnn, dataloaders, **kwargs)
qnn.compress_model()

==> Using seed: 42 and device: cuda:2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:06<00:00,  1.64it/s, acc1=12.6, acc5=31.9]


==> Quantization (W4A8) accuracy before LAPQ: 12.6389 | 31.9342


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:55<00:00,  5.55s/it, loss=0.583, p_val=4]


==> using p intr : 3.16


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.17it/s, acc1=66.7, acc5=87.3, loss=1.45]


==> Quantization (W4A8) accuracy before Optimization: 66.6829 | 87.3047
==> Loss after LpNorm Quantization: 1.4529
==> Starting Powell Optimization


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [30:42<00:00,  1.84s/it, curr_loss=0.194, min_loss=0.194]


==> Layer-wise Scales :
 [0.25123631 0.23003933 0.46711149 1.63267737 0.41464745 0.15834516
 0.19356048 0.52230773 0.38443539 0.09406899 0.15986991 0.61930502
 3.3141314  0.25260699 0.10736936 0.45736727 0.50539958 0.21927392
 0.07660881 0.15374391 0.32250034 1.24971072 0.10082186 0.13913764
 0.44117871 0.35104352 0.12423531 0.22276621 0.50254854 0.45813685
 0.18309907 0.04532639 0.25169037 0.63995644 0.19961704 0.0553401
 0.08803868 0.34746182 1.42343625 0.05268827 0.09324313 0.28673273
 0.33089256 0.05356183 0.09022998 0.34158519 0.34553242 0.07120442
 0.1080137  0.30454904 0.3600522  0.09251782 0.12302133 0.26132783
 0.40595856 0.17598431 0.10362461 0.44794682 0.73541707 0.14286925
 0.02222029 0.18939885 0.39786395 0.51783663 0.0080806  0.09720036]


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.15it/s, acc1=68.9, acc5=88.7]


==> Full quantization (W4A8) accuracy: (68.88372955322265, 88.67466506958007)


In [9]:
# test quantized model
from trailmet.algorithms.algorithms import BaseAlgorithm
BaseAlgorithm().test(model=qnn.model, dataloader=test_loader, device=torch.device(DEVICE))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.16it/s, acc1=68.9, acc5=88.7]


(68.88372955322265, 88.67466506958007)