In [1]:
import torch
import torch.nn.functional as F
from torchvision.models import resnet18,resnet50,vgg16
import torch.nn as nn
from tqdm import tqdm
sys.path.insert(0,'..')
import datasets
import net_wrap
from quantization.bf16_quantize import bf16_quantize_model
import numpy as np
import math
from quantization.quantizer import quant_calib,EasyQuant


In [2]:
g=datasets.ImageNetLoaderGenerator('/datasets/imagenet','imagenet',128,256,8)
test_loader=g.test_loader()
calib_loader=g.calib_loader(num=128)

In [8]:
def get_net(name):
    if name=='resnet18':
        net=resnet18(True)
    elif name=='resnet50':
        net=resnet50(True)
    elif name=='vgg16':
        net=vgg16(True)
    net=net.cuda()
    net=net.eval()
    return net

def quant_net(quant_method,net,calib_loader,rows,cols):
    if quant_method=='crossbarwise_easy_quant_debug':
        layer_quantizer=EasyQuant
        quantizer_kwargs={'a_bit':8,'w_bit':8,'a_channel_wise':False,'w_channel_wise':False,'output_quant':False,'eq_n':50}
    
    wrapped_modules=net_wrap.wrap_modules_to_crossbar(net.layer1,rows,cols,layer_quantizer,quantizer_kwargs,fuse_bn=True)
    # quant_calib(net,wrapped_modules,calib_loader)
    
    return wrapped_modules,net

In [4]:
def test_classification(net):
    pos=0
    tot=0
    with torch.no_grad():
        q=tqdm(test_loader)
        for inp,target in q:
            inp=inp.cuda()
            target=target.cuda()
            out=net(inp)
            pos_num=torch.sum(out.argmax(1)==target).item()
            pos+=pos_num
            tot+=inp.size(0)
            q.set_postfix({"acc":pos/tot})
    print(pos/tot)


In [9]:
net=get_net('resnet18')
wrapped_modules,net=quant_net('crossbarwise_easy_quant_debug',net,calib_loader,64*9,64)
test_classification(net)
        

Map CrossbarWiseQuantMappedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) to 1 crossbars (R=576 C=64)
Map CrossbarWiseQuantMappedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) to 1 crossbars (R=576 C=64)
Map CrossbarWiseQuantMappedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) to 1 crossbars (R=576 C=64)
Map CrossbarWiseQuantMappedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) to 1 crossbars (R=576 C=64)
Layer Fuse: 0.bn1->0.conv1; 0.bn2->0.conv2; 1.bn1->1.conv1; 1.bn2->1.conv2; 


  4%|▍         | 8/196 [00:04<01:56,  1.62it/s, acc=0.801]


KeyboardInterrupt: 

In [5]:
net=get_net('resnet18')
net=quant_net('channelwise_easy_quant',net,calib_loader)
test_classification(net)

Layer Fuse: bn1->conv1; layer1.0.bn1->layer1.0.conv1; layer1.0.bn2->layer1.0.conv2; layer1.1.bn1->layer1.1.conv1; layer1.1.bn2->layer1.1.conv2; layer2.0.bn1->layer2.0.conv1; layer2.0.bn2->layer2.0.conv2; layer2.0.downsample.1->layer2.0.downsample.0; layer2.1.bn1->layer2.1.conv1; layer2.1.bn2->layer2.1.conv2; layer3.0.bn1->layer3.0.conv1; layer3.0.bn2->layer3.0.conv2; layer3.0.downsample.1->layer3.0.downsample.0; layer3.1.bn1->layer3.1.conv1; layer3.1.bn2->layer3.1.conv2; layer4.0.bn1->layer4.0.conv1; layer4.0.bn2->layer4.0.conv2; layer4.0.downsample.1->layer4.0.downsample.0; layer4.1.bn1->layer4.1.conv1; layer4.1.bn2->layer4.1.conv2; 
prepare calibration for ['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.downsample.

In [6]:
net=get_net('resnet18')
net=quant_net('channelwise_easy_quant_inq',net,calib_loader)
test_classification(net)

Layer Fuse: bn1->conv1; layer1.0.bn1->layer1.0.conv1; layer1.0.bn2->layer1.0.conv2; layer1.1.bn1->layer1.1.conv1; layer1.1.bn2->layer1.1.conv2; layer2.0.bn1->layer2.0.conv1; layer2.0.bn2->layer2.0.conv2; layer2.0.downsample.1->layer2.0.downsample.0; layer2.1.bn1->layer2.1.conv1; layer2.1.bn2->layer2.1.conv2; layer3.0.bn1->layer3.0.conv1; layer3.0.bn2->layer3.0.conv2; layer3.0.downsample.1->layer3.0.downsample.0; layer3.1.bn1->layer3.1.conv1; layer3.1.bn2->layer3.1.conv2; layer4.0.bn1->layer4.0.conv1; layer4.0.bn2->layer4.0.conv2; layer4.0.downsample.1->layer4.0.downsample.0; layer4.1.bn1->layer4.1.conv1; layer4.1.bn2->layer4.1.conv2; 
prepare calibration for ['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.downsample.

In [7]:
net=get_net('resnet18')
net=quant_net('layerwise_easy_quant_inq',net,calib_loader)
test_classification(net)

Layer Fuse: bn1->conv1; layer1.0.bn1->layer1.0.conv1; layer1.0.bn2->layer1.0.conv2; layer1.1.bn1->layer1.1.conv1; layer1.1.bn2->layer1.1.conv2; layer2.0.bn1->layer2.0.conv1; layer2.0.bn2->layer2.0.conv2; layer2.0.downsample.1->layer2.0.downsample.0; layer2.1.bn1->layer2.1.conv1; layer2.1.bn2->layer2.1.conv2; layer3.0.bn1->layer3.0.conv1; layer3.0.bn2->layer3.0.conv2; layer3.0.downsample.1->layer3.0.downsample.0; layer3.1.bn1->layer3.1.conv1; layer3.1.bn2->layer3.1.conv2; layer4.0.bn1->layer4.0.conv1; layer4.0.bn2->layer4.0.conv2; layer4.0.downsample.1->layer4.0.downsample.0; layer4.1.bn1->layer4.1.conv1; layer4.1.bn2->layer4.1.conv2; 
prepare calibration for ['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.downsample.

In [6]:
net=get_net('resnet18')
net=quant_net('layerwise_easy_quant',net,calib_loader)
test_classification(net)

Layer Fuse: bn1->conv1; layer1.0.bn1->layer1.0.conv1; layer1.0.bn2->layer1.0.conv2; layer1.1.bn1->layer1.1.conv1; layer1.1.bn2->layer1.1.conv2; layer2.0.bn1->layer2.0.conv1; layer2.0.bn2->layer2.0.conv2; layer2.0.downsample.1->layer2.0.downsample.0; layer2.1.bn1->layer2.1.conv1; layer2.1.bn2->layer2.1.conv2; layer3.0.bn1->layer3.0.conv1; layer3.0.bn2->layer3.0.conv2; layer3.0.downsample.1->layer3.0.downsample.0; layer3.1.bn1->layer3.1.conv1; layer3.1.bn2->layer3.1.conv2; layer4.0.bn1->layer4.0.conv1; layer4.0.bn2->layer4.0.conv2; layer4.0.downsample.1->layer4.0.downsample.0; layer4.1.bn1->layer4.1.conv1; layer4.1.bn2->layer4.1.conv2; 
prepare calibration for ['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.downsample.

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Start calibration step=2
Set weight_interval=tensor([0.0029], device='cuda:0')
Set weight_interval=tensor([0.0027], device='cuda:0')
Set weight_interval=tensor([0.0065], device='cuda:0')
Set weight_interval=tensor([0.0022], device='cuda:0')
Set weight_interval=tensor([0.0074], device='cuda:0')
Set weight_interval=tensor([0.0014], device='cuda:0')
Set weight_interval=tensor([0.0045], device='cuda:0')
Set weight_interval=tensor([0.0053], device='cuda:0')
Set weight_interval=tensor([0.0022], device='cuda:0')
Set weight_interval=tensor([0.0055], device='cuda:0')
Set weight_interval=tensor([0.0015], device='cuda:0')
Set weight_interval=tensor([0.0029], device='cuda:0')
Set weight_interval=tensor([0.0028], device='cuda:0')
Set weight_interval=tensor([0.0017], device='cuda:0')
Set weight_interval=tensor([0.0048], device='cuda:0')
Set weight_interval=tensor([0.0012], device='cuda:0')
Set weight_interval=tensor([0.0045], device='cuda:0')
Set weight_interval=tensor([0.0067], device='cuda:0')
Set

 48%|████▊     | 95/196 [00:31<00:22,  4.46it/s, acc=0.741]

In [12]:
net=get_net(net_name)
quantize_model('channelwise_weightonly_bf16',net)
test_classification(net)

  0%|          | 0/196 [00:00<?, ?it/s]fuse BN ['bn1', 'layer1.0.bn1', 'layer1.0.bn2', 'layer1.1.bn1', 'layer1.1.bn2', 'layer2.0.bn1', 'layer2.0.bn2', 'layer2.0.downsample.1', 'layer2.1.bn1', 'layer2.1.bn2', 'layer3.0.bn1', 'layer3.0.bn2', 'layer3.0.downsample.1', 'layer3.1.bn1', 'layer3.1.bn2', 'layer4.0.bn1', 'layer4.0.bn2', 'layer4.0.downsample.1', 'layer4.1.bn1', 'layer4.1.bn2']
quantize_layers ['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.downsample.0', 'layer4.1.conv1', 'layer4.1.conv2']
100%|██████████| 196/196 [00:58<00:00,  3.35it/s, acc=0.695]0.69462



In [15]:
net=get_net('vgg16')
test_classification(net)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/yzh/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=553433881.0), HTML(value='')))


  0%|          | 0/196 [00:00<?, ?it/s]fuse BN []
100%|██████████| 196/196 [01:28<00:00,  2.20it/s, acc=0.716]0.71592



In [16]:
net=get_net('vgg16')
quantize_model('bf16',net)
test_classification(net)

  0%|          | 0/196 [00:00<?, ?it/s]fuse BN []
quantize features.0
quantize features.2
quantize features.5
quantize features.7
quantize features.10
quantize features.12
quantize features.14
quantize features.17
quantize features.19
quantize features.21
quantize features.24
quantize features.26
quantize features.28
100%|██████████| 196/196 [02:11<00:00,  1.49it/s, acc=0.716]0.71568



In [17]:
net=get_net('vgg16')
quantize_model('channelwise_bf16',net)
test_classification(net)


  0%|          | 0/196 [00:00<?, ?it/s]fuse BN []
transform weight features.0 exponent tensor([-0., -1., -0., -1., -0., -0., -0.,  1., -1., -1., -1., -1., -0.,  1.,
        -0., -2., -2., -1., -0., -0., -1., -1., -2., -2., -1., -1., -1., -1.,
        -0., -1., -2., -1., -2., -1., -0., -2., -1., -1., -1., -2., -3., -2.,
        -0., -0., -0., -2., -2., -2., -2., -1., -1., -1., -1., -2., -1., -0.,
        -1., -0., -0., -0., -1., -0., -1., -0., -0., -1., -1., -1., -1., -1.,
        -1., -1., -1., -2., -1., -1., -1., -2., -2., -1., -1., -2., -1., -2.,
        -1., -1., -1., -1., -1., -1., -1., -2., -2., -1., -2., -2., -0., -0.,
        -1., -0., -0., -1., -0.,  1., -1., -2., -2., -2., -1., -0., -1., -1.,
        -2., -3., -1., -1., -1., -1., -0., -1., -1., -0., -1., -2., -1., -2.,
        -0., -0., -1., -2., -2., -2., -2., -1., -2., -2., -1., -2., -1., -1.,
        -1., -1., -1., -2., -2., -1., -1., -1., -2., -2., -2., -3., -2., -0.,
         1., -1., -2., -2., -2., -0., -0., -0., -5., -3

In [18]:
net=get_net('vgg16')
quantize_model('channelwise_weightonly_bf16',net)
test_classification(net)


., -2., -3., -3., -3., -3.,
        -3., -2., -2., -3., -3., -3., -3., -2., -3., -3., -3., -3., -3., -2.,
        -2., -2., -2., -3., -2., -2., -3., -2., -3., -2., -3., -2., -3., -3.,
        -3., -3., -2., -3., -2., -3., -3., -2., -1., -2., -3., -3., -3., -2.,
        -3., -2., -2., -3., -3., -2., -3., -3., -3., -2., -3., -3., -3., -3.,
        -3., -3., -3., -2., -2., -3., -3., -3., -2., -2., -2., -2., -3., -3.,
        -2., -3., -2., -3., -2., -3., -3., -3., -3., -2., -3., -3., -3., -2.,
        -3., -2., -3., -3., -2., -3., -3., -3., -3., -3., -3., -3., -2., -2.,
        -2., -3., -3., -3., -3., -3., -3., -2., -2., -3., -3., -3., -3., -3.,
        -2., -3., -3., -3., -3., -3., -3., -3., -1., -3., -3., -3., -3., -2.,
        -1., -2., -3., -2., -3., -2., -3., -3., -3., -3., -3., -2., -2., -3.,
        -3., -3., -2., -3., -2., -3., -3., -1., -3., -3., -2., -3., -3., -3.,
        -2., -2., -3., -3.], device='cuda:0')
transform weight features.14 exponent tensor([-2., -2., -2., -3., -3

In [13]:
net=get_net('vgg16')
quantize_model('layerwise_bf16',net)
test_classification(net)


  0%|          | 0/196 [00:00<?, ?it/s]fuse BN []
quantize_layers ['features.0', 'features.2', 'features.5', 'features.7', 'features.10', 'features.12', 'features.14', 'features.17', 'features.19', 'features.21', 'features.24', 'features.26', 'features.28']
100%|██████████| 196/196 [02:21<00:00,  1.38it/s, acc=0.714]0.71446

