In [1]:
import os
import sys
sys.path.append("../../")

In [2]:
import torch
from torchvision import transforms as transforms
from torchvision import datasets as datasets
torch.cuda.set_device(7)

In [3]:
def build_imagenet_data(data_path: str = '', input_size: int = 224, batch_size: int = 64, workers: int = 4,
                        dist_sample: bool = False):
    print('==> Using Imagenet Dataset')

    traindir = os.path.join(data_path, 'train')
    valdir = os.path.join(data_path, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    #torchvision.set_image_backend('accimage')
    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            normalize,
        ]))

    if dist_sample:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    else:
        train_sampler = None
        val_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
        num_workers=workers, pin_memory=True, sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,batch_size=batch_size, shuffle=False,
        num_workers=workers, pin_memory=True, sampler=val_sampler)
    return train_loader, val_loader

In [4]:
# load data
dataloaders = {'train':[], 'val':[]}
dataloaders['train'], dataloaders['val'] = build_imagenet_data(data_path='/workspace/code/Akash/ImageNet')

==> Using Imagenet Dataset


In [5]:
# import libraries
from trailmet.models import resnet
from trailmet.algorithms.quantize.brecq import BRECQ
from trailmet.algorithms.quantize.quantize import BaseQuantization

In [6]:
# load model
cnn = resnet.get_resnet_model('resnet50', 1000, 224, pretrained=True)

In [7]:
# test full precision model
bq = BaseQuantization()
bq.test(model=cnn, dataloader=dataloaders['val'], device=torch.device('cuda:7'))

(80.53082833429231, 95.22303150693048, 0.0)

In [8]:
# quantize model
kwargs = {
    'W_BITS':4, 
    'A_BITS':8, 
    'CHANNEL_WISE':True, 
    'ACT_QUANT':True, 
    'NUM_SAMPLES':1024, 
    'ITERS_W':2000, 
    'ITERS_A':2000, 
    'CALIB_BS':64, 
    'GPU_ID':7,
    }
qnn = BRECQ(cnn, dataloaders, **kwargs)
qnn.compress_model()

# increase iterations from 2000 to 20000 to achieve full optimization potential.

==> Setting the first and the last layer to 8-bit
==> Initializing weight quantization parameters


Quantized accuracy before brecq: (55.30476949069481, 77.8277628010717, 0.0)
==> Starting weight calibration
Ignore reconstruction of layer conv1
Reconstruction for block 0
Total loss:	483.689 (rec:0.644, round:483.046)	b=13.25	count=1000
Total loss:	199.292 (rec:0.959, round:198.333)	b=2.00	count=2000
Reconstruction for block 1
Total loss:	509.780 (rec:40.379, round:469.401)	b=13.25	count=1000
Total loss:	254.552 (rec:37.905, round:216.647)	b=2.00	count=2000
Reconstruction for block 2
Total loss:	489.750 (rec:42.663, round:447.087)	b=13.25	count=1000
Total loss:	222.848 (rec:45.072, round:177.776)	b=2.00	count=2000
Reconstruction for block 0
Total loss:	2423.465 (rec:53.310, round:2370.155)	b=13.25	count=1000
Total loss:	964.340 (rec:54.799, round:909.542)	b=2.00	count=2000
Reconstruction for block 1
Total loss:	1784.104 (rec:65.987, round:1718.117)	b=13.25	count=1000
Total loss:	705.271 (rec:62.737, round:642.534)	b=2.00	count=2000
Reconstruction for block 2
Total loss:	1730.266 (rec:

Weight quantization accuracy: (80.07952153334884, 95.056438122251, 0.0)
Ignore reconstruction of layer conv1
Reconstruction for block 0
Total loss:	73.976 (rec:73.976, round:0.000)	b=0.00	count=1000
Total loss:	72.456 (rec:72.456, round:0.000)	b=0.00	count=2000
Reconstruction for block 1
Total loss:	62.294 (rec:62.294, round:0.000)	b=0.00	count=1000
Total loss:	61.987 (rec:61.987, round:0.000)	b=0.00	count=2000
Reconstruction for block 2
Total loss:	23.575 (rec:23.575, round:0.000)	b=0.00	count=1000
Total loss:	23.473 (rec:23.473, round:0.000)	b=0.00	count=2000
Reconstruction for block 0
Total loss:	89.514 (rec:89.514, round:0.000)	b=0.00	count=1000
Total loss:	92.396 (rec:92.396, round:0.000)	b=0.00	count=2000
Reconstruction for block 1
Total loss:	35.816 (rec:35.816, round:0.000)	b=0.00	count=1000
Total loss:	36.481 (rec:36.481, round:0.000)	b=0.00	count=2000
Reconstruction for block 2
Total loss:	17.994 (rec:17.994, round:0.000)	b=0.00	count=1000
Total loss:	26.201 (rec:26.201, roun

Full quantization (W4A8) accuracy: (80.08159381451594, 94.97354687556664, 0.0)


In [17]:
torch.cuda.empty_cache()

In [18]:
!nvidia-smi

Fri Jul 15 19:23:23 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.04   Driver Version: 450.119.04   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   51C    P0   212W / 300W |  31301MiB / 32510MiB |     99%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   57C    P0   199W / 300W |    670MiB / 32510MiB |     93%      Default |
|       