In [1]:
DEVICE = 'cuda:2'
DATA_PATH = '/workspace/code/Akash/ImageNet'
BATCH_SIZE = 64

In [2]:
import os
import sys
import torch
from torchvision import transforms as transforms
from torchvision import datasets as datasets
sys.path.append("../../")
torch.cuda.set_device(int(DEVICE[-1]))

In [4]:
def build_imagenet_data(data_path: str = '', input_size: int = 224, batch_size: int = 128, workers: int = 4,
                        dist_sample: bool = False):

    traindir = os.path.join(data_path, 'train')
    valdir = os.path.join(data_path, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    #torchvision.set_image_backend('accimage')
    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize,
        ]))

    if dist_sample:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = None
        val_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
        num_workers=workers, pin_memory=True, sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,batch_size=2*batch_size, shuffle=False,
        num_workers=workers, pin_memory=True, sampler=val_sampler)
    print('==> Using Imagenet Dataset')
    
    return train_loader, val_loader

In [5]:
trainloader, valloader = build_imagenet_data(data_path=DATA_PATH, batch_size=BATCH_SIZE)
dataloaders = {'train' : trainloader,'val' : valloader}

==> Using Imagenet Dataset


In [6]:
# import libraries
from trailmet.models import resnet
from trailmet.algorithms.quantize.lapq import LAPQ

In [7]:
# load model
cnn = resnet.get_resnet_model('resnet50', 1000, 224, pretrained=True)

In [8]:
# test model
from trailmet.algorithms.algorithms import BaseAlgorithm
BaseAlgorithm().test(model=cnn, dataloader=dataloaders['val'], device=torch.device(DEVICE))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [02:54<00:00,  2.16it/s, acc1=80.6, acc5=95.2]


(80.57368279009346, 95.23171871600164)

In [9]:
# quantize model
kwargs = {
    'W_BITS':8, 
    'A_BITS':8, 
    'ACT_QUANT':True,
    'CALIB_BATCHES':1024//BATCH_SIZE, 
    'MAX_ITER':100,
    'MAX_FEV':100,
    'VERBOSE':True,
    'PRINT_FREQ':20,
    'GPU_ID':int(DEVICE[-1]),
    'SEED':42
    }
qnn = LAPQ(cnn, dataloaders, **kwargs)
qnn.compress_model()

==> Using seed: 42 and device: cuda:2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [05:22<00:00,  1.17it/s, acc1=79.6, acc5=94.8]


==> Quantization (W8A8) accuracy before LAPQ: 79.5825 | 94.7696


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:59<00:00, 17.93s/it, loss=1.47, p_val=4]


==> using p intr : 2.26


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [05:29<00:00,  1.15it/s, acc1=78.6, acc5=94.5, loss=1.49]


==> Quantization (W8A8) accuracy before Optimization: 78.5940 | 94.4722
==> Loss after LpNorm Quantization: 1.4926
==> Starting Powell Optimization


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [13:17<00:00,  7.97s/it, curr_loss=1.47, min_loss=1.47]


==> Layer-wise Scales :
 [ 1.08419957  0.66990249  3.11701354 23.20393628  2.29466365  5.01725674
  0.88589263  2.71039438 13.73392868  1.71813965  1.08358109  4.9899826
 13.23023605  0.99806833  0.23113976  1.4500705  21.30290604  1.43157232
  0.60028028  0.52513981  2.32189894 11.73145962  0.63942772  0.90473968
  2.75055075 11.44539928  0.41463852  0.57082433  1.68877172 18.08906937
  0.8276599   0.3363277   1.09530067 23.4847126   0.85443014  0.61002809
  0.82910395  1.98843825 14.02664661  0.42910361  0.84472984  2.2858727
 13.89950275  0.38925895  0.81029177  1.74581611 27.58506393  0.51952708
  0.53956091  1.79805577 29.66171265  0.35620919  0.51285464  1.15933418
 18.59424782  0.53665894  0.21605009  1.12820017 20.25750923  0.93485618
  0.27749813  0.37784204  1.60273576 15.03666401  0.41211852  0.68783522]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [04:09<00:00,  1.51it/s, acc1=79.3, acc5=94.9]


==> Full quantization (W8A8) accuracy: (79.28890613788636, 94.85007988805796)


In [10]:
# test quantized model
from trailmet.algorithms.algorithms import BaseAlgorithm
BaseAlgorithm().test(model=qnn.model, dataloader=dataloaders['val'], device=torch.device(DEVICE))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [04:12<00:00,  1.49it/s, acc1=79.3, acc5=94.9]


(79.28890613788636, 94.85007988805796)