<a href="https://colab.research.google.com/github/meghbhalerao/nrlpq/blob/main/NLRPQ_quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch.nn as nn
import torch
import torchvision
import numpy as np
from torch.utils.data import Subset
import torchvision.transforms as transforms
import torch
import torch.nn as nn
from typing import Type, Any, Callable, Union, List, Optional
import os
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
from torchvision import models
import torch.nn.functional as F
import copy

Mounted at /content/gdrive


In [3]:
num_classes = 10

In [4]:
#os.listdir("/content/gdrive/MyDrive/Imagenet1k_R50_sh1_mh0_ns3_ff2048/")

In [5]:
nesting_start=3

class BlurPoolConv2d(torch.nn.Module):
    def __init__(self, conv):
        super().__init__()
        default_filter = torch.tensor([[[[1, 2, 1], [2, 4, 2], [1, 2, 1]]]]) / 16.0
        filt = default_filter.repeat(conv.in_channels, 1, 1, 1)
        self.conv = conv
        self.register_buffer('blur_filter', filt)

    def forward(self, x):
        blurred = F.conv2d(x, self.blur_filter, stride=1, padding=(1, 1),
                           groups=self.conv.in_channels, bias=None)
        return self.conv.forward(blurred)

class Model():
    def __init__(self, gpu, nesting, single_head, fixed_feature, use_blurpool):
        super().__init__()
        self.gpu = gpu
        self.nesting = nesting
        self.sh = single_head
        self.ff = fixed_feature
        self.use_blurpool = use_blurpool


    def load_model(self, model, model_weights_disk, modify_keys = True):
        if os.path.isfile(model_weights_disk):
            print("=> loading checkpoint '{}'".format(model_weights_disk))
            if self.gpu is None:
                checkpoint = torch.load(model_weights_disk)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(self.gpu)
                checkpoint = torch.load(model_weights_disk, map_location=loc)
            if modify_keys:
              checkpoint = self.change_str_dict(checkpoint)
            


            try:
              model.load_state_dict(checkpoint)
            except:
              proxy_layer = nn.Linear(2048, num_classes)
              print("randomly init last fc layer")
              checkpoint["fc.bias"] = proxy_layer.bias
              checkpoint["fc.weight"] = proxy_layer.weight
              print(checkpoint.keys())
              model.load_state_dict(checkpoint)

            print("=> loaded checkpoint '{}' "
                  .format(model_weights_disk))
        else:
            print("=> no model found at '{}'".format(model_weights_disk))

        return model


    def change_str_dict(self, x):
      x_ = {}
      for key, val in x.items():
        key_new = str(key).replace("module.","")
        x_[key_new] = copy.copy(val)
      return x_

    def initModel(self):
        print("Model init: nesting=%d, sh=%d, ff=%d" %(self.nesting, self.sh, self.ff))
        model = models.resnet50(pretrained=True)
        nesting_list = [2**i for i in range(nesting_start, 12)] if self.nesting else None

        # Nesting/Fixed Feature Modification code block
        if self.nesting:
            ff= "Single Head" if self.sh else "Multi Head"
            print("Using Nesting of type - {}".format(ff))
            print("Nesting Starts from {}".format(2**nesting_start))
            if self.sh:
                model.fc =  SingleHeadNestedLinear(nesting_list, num_classes=num_classes)
            else:
                model.fc =  MultiHeadNestedLinear(nesting_list, num_classes=num_classes)
        elif self.ff != 2048:
            print(f"Using Fixed Features = {self.ff}")
            model.fc =  FixedFeatureLayer(self.ff, num_classes)

        def apply_blurpool(mod: torch.nn.Module):
            for (name, child) in mod.named_children():
                if isinstance(child, torch.nn.Conv2d) and (np.max(child.stride) > 1 and child.in_channels >= 16):
                    setattr(mod, name, BlurPoolConv2d(child))
                else: apply_blurpool(child)
        if self.use_blurpool: apply_blurpool(model)

        model = model.to(memory_format=torch.channels_last)
        model = model.to(self.gpu)

        return model


import torch
import torch.nn as nn
from typing import Type, Any, Callable, Union, List, Optional

class SingleHeadNestedLinear(nn.Linear):
	def __init__(self, nesting_list: List, num_classes=num_classes, **kwargs):
		super(SingleHeadNestedLinear, self).__init__(nesting_list[-1], num_classes, **kwargs)
		self.nesting_list=nesting_list
		self.num_classes=num_classes # Number of classes for classification

	def forward(self, x):
		nesting_logits = ()
		for i, num_feat in enumerate(self.nesting_list):
			if not (self.bias is None):
				logit = torch.matmul(x[:, :num_feat], (self.weight[:, :num_feat]).t()) + self.bias
			else:
				logit = torch.matmul(x[:, :num_feat], (self.weight[:, :num_feat]).t())
			nesting_logits+= (logit,)
		return nesting_logits

class MultiHeadNestedLinear(nn.Module):
	def __init__(self, nesting_list: List, num_classes=num_classes, **kwargs):
		super(MultiHeadNestedLinear, self).__init__()
		self.nesting_list=nesting_list
		self.num_classes=num_classes # Number of classes for classification
		for i, num_feat in enumerate(self.nesting_list):
			setattr(self, f"nesting_classifier_{i}", nn.Linear(num_feat, self.num_classes, **kwargs))		

	def forward(self, x):
		nesting_logits = ()
		for i, num_feat in enumerate(self.nesting_list):
			nesting_logits +=  (getattr(self, f"nesting_classifier_{i}")(x[:, :num_feat]),)
		return nesting_logits

		
class FixedFeatureLayer(nn.Linear):
    # This layer just takes the first "K" Features for the classification. 
    # Creating a separate layer and customized fwd pass helps to not change the base codes at all.
    def __init__(self, in_features, out_features, **kwargs):
        super(FixedFeatureLayer, self).__init__(in_features, out_features, **kwargs)

    def forward(self, x):
        if not (self.bias is None):
            out = torch.matmul(x[:, :self.in_features], self.weight.t()) + self.bias
        else:
            out = torch.matmul(x[:, :self.in_features], self.weight.t())
        return out

class NestedCELoss(nn.Module):
	def __init__(self, **kwargs):
		super(NestedCELoss, self).__init__()
		self.criterion = nn.CrossEntropyLoss(**kwargs)
	def forward(self, output, target):
		loss=0
		for o in output:
			loss+= self.criterion(o, target)

		return loss

In [6]:

def training_loop(model, criterion, optimizer, train_loader, valid_loader, epochs, device):
    train_losses = []
    valid_losses = []

    for epoch in range(0, epochs):
        # training
        print(f'EPOCH:{epoch}')
        print('Computing Train Loss..')
        model, optimizer, train_loss = train(train_loader, model, criterion, optimizer, device)
      
        print(train_loss)
        train_losses.append(train_loss)

        # validation
        with torch.no_grad():
            print('Computing Validation Loss..')
            model, valid_loss = validate(valid_loader, model, criterion, device)
            print(valid_loss)
            valid_losses.append(valid_loss)
        print('--------------------------------')
  
    return model, train_losses, valid_losses

def train(train_loader, model, criterion, optimizer, device):
    model.train()
    running_loss = 0
    for X, y in train_loader:
        optimizer.zero_grad()
        X = X.to(device)
        y = y.to(device)
        pred = model(X) 
        loss = criterion(pred, y) 
        running_loss += loss.item() * X.size(0)
        loss.backward()
        optimizer.step()
      #  print(loss.cpu().data.item())
        
    epoch_loss = running_loss / len(train_loader.dataset)
    return model, optimizer, epoch_loss

def validate(valid_loader, model, criterion, device):
    model.eval()
    running_loss = 0
    correct_count, all_count = 0, 0
    
    for X, y in valid_loader:
        X = X.to(device)
        y = y.to(device)
        pred = model(X) 
        
        loss = criterion(pred, y) 
        running_loss += loss.item() * X.size(0)

    epoch_loss = running_loss / len(valid_loader.dataset)
    return model, epoch_loss

def get_accuracy(model,data_loader,rep_no, device):
  correct_count, all_count = 0, 0
  model.to(device)
  for images,labels in data_loader:
    images,labels = images.to(device), labels.to(device)
    images = images.to(device)
    true_label = labels.to(device)
    pred = model(images) 
    pred = pred[rep_no]
    pred_label = torch.argmax(pred, dim=1)
    correct_count += torch.eq(true_label, pred_label).sum().item()
    all_count +=len(true_label)
  return correct_count/all_count

def unabstract_model(model):
  for no in range(2,5):
    l1 = getattr(getattr(getattr(getattr(getattr(model,f'layer{no}'),'0'),'downsample'),'0'),'conv')
    l2 = getattr(getattr(getattr(getattr(model,f'layer{no}'),'0'),'downsample'),'1')
    setattr(getattr(getattr(model,f'layer{no}'),'0'),'downsample',nn.Sequential(l1,l2))

    conv_layer = getattr(getattr(getattr(getattr(model,f'layer{no}'),'0'),'conv2'),'conv')
    setattr(getattr(getattr(model,f'layer{no}'),'0'),'conv2',conv_layer)

class QuantizedModel(nn.Module):
    def __init__(self, model_fp32):
        super(QuantizedModel, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()
        self.model_fp32 = model_fp32

    def forward(self, x):
        x = self.quant(x)
        x = self.model_fp32(x)
        x = self.dequant(x)
        return x

def set_random_seeds(random_seed=0):
    import random
    torch.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

In [7]:
mdl_wts = os.path.join("/content/gdrive/MyDrive/nrlpq/Imagenet1k_R50_sh1_mh0_ns3_ff2048/final_weights.pt")
model_wts_path = os.path.join(mdl_wts)
nesting = 1
single_head = 1
fixed_feature = 2048
model = Model(0, nesting, single_head, fixed_feature, use_blurpool=1)
set_random_seeds(random_seed=0)
model_init = model.initModel()
model = model.load_model(model_init, model_wts_path)
print("Loaded pretrained model: " + str(model_wts_path))
unabstract_model(model)
batch_size = 128
print("batch size is", batch_size)
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset_all = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
n_alltrain = len(trainset_all)
print("len of all train data is", n_alltrain)

n_train = int(n_alltrain * 0.8)
n_val = n_alltrain - n_train
print("len of train val split is ", n_train, n_val, "respectively")

val_idxs = np.random.choice(n_alltrain, size = n_val ,replace=False)

trainset = Subset(trainset_all, list(set(range(len(trainset_all))) -  set(val_idxs)))
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

valset = Subset(trainset_all, val_idxs)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


print("length of train, val and test DataLoader is ", len(train_loader), len(val_loader), len(test_loader))

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = NestedCELoss()
epochs = 1
device = torch.device('cuda')

import torch.quantization

#quantized_model = torch.quantization.quantize_dynamic(model,dtype=torch.float16)

model,_,_ = training_loop(model, criterion, optimizer, train_loader, val_loader, epochs, device)


res = get_accuracy(model, test_loader,3, device)
print(res)

Model init: nesting=1, sh=1, ff=2048
Using Nesting of type - Single Head
Nesting Starts from 8
=> loading checkpoint '/content/gdrive/MyDrive/nrlpq/Imagenet1k_R50_sh1_mh0_ns3_ff2048/final_weights.pt'
randomly init last fc layer
dict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.0.conv3.weight', 'layer1.0.bn3.weight', 'layer1.0.bn3.bias', 'layer1.0.bn3.running_mean', 'layer1.0.bn3.running_var', 'layer1.0.bn3.num_batches_tracked', 'layer1.0.downsample.0.weight', 'layer1.0.downsample.1.weight', 'layer1.0.downsample.1.bias', 'layer1.0.downsample.1.running_mean', 'layer1.0.downsample.1.running_var'

In [8]:
#layers = [["conv1", "bn1"], ["conv2", "bn2"], ["conv3", "bn3", 'relu']]

In [9]:
model.eval()
fused_model = copy.deepcopy(model)
fused_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
quantized_model = QuantizedModel(model_fp32=fused_model)
quantization_config = torch.quantization.get_default_qconfig("fbgemm")
quantized_model.qconfig = quantization_config
torch.quantization.prepare(quantized_model, inplace=True)
quantized_model = quantized_model.cpu()
quantized_model = torch.quantization.convert(quantized_model, inplace=True)
quantized_model.eval()
print('Finished Quanization!')

  reduce_range will be deprecated in a future release of PyTorch."
  Returning default scale and zero point "


Finished Quanization!


In [16]:
def calibrate_model(model, loader, device=torch.device("cpu:0")):

    model.to(device)
    model.eval()

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)

In [225]:
#get_accuracy(model, test_loader,8, device)

0.7475

In [17]:
#get_accuracy(quantized_model, test_loader,8, device=torch.device("cpu:0"))

calibrate_model(quantized_model, train_loader, device=torch.device("cpu:0"))

NotImplementedError: ignored

In [232]:
#quantized_model

In [184]:
# import time
# #time.start()
# get_accuracy(quantized_model, test_loader,0, device)


In [13]:
def get_accuracy(model,data_loader,rep_no):
  correct_count, all_count = 0, 0
  for images,labels in data_loader:
    images,labels = images, labels
    images = images
    true_label = labels
    pred = model(images) 
    pred = pred[rep_no]
    pred_label = torch.argmax(pred, dim=1)
    correct_count += torch.eq(true_label, pred_label).sum().item()
    all_count +=len(true_label)
  return correct_count/all_count

In [15]:
get_accuracy(quantized_model, test_loader,0)

NotImplementedError: ignored

In [186]:
# relu_block = getattr(model,'relu')
# setattr(model,'relu1',relu_block)
# model

In [187]:

#layers = [["conv1", "bn1","conv2", "bn2","conv3", "bn3", 'relu']]
#layers = [["conv1", "bn1", "relu"], ["conv2", "bn2"]]

In [188]:
#fused_model

In [189]:
#fused_model

In [190]:
# model.to(device)
# fused_model = copy.deepcopy(model)
# model.eval()
# fused_model.eval()
# fused_model = torch.quantization.fuse_modules(fused_model, [["conv1", "bn1", "relu"]], inplace=False)


In [191]:


# fused_model = torch.quantization.fuse_modules(fused_model, [["conv1", "bn1", "relu"]], inplace=True)
# for module_name, module in fused_model.named_children():
#     if "layer" in module_name:
#         if module_name=='layer3':
#           break
#         for basic_block_name, basic_block in module.named_children():
#             torch.quantization.fuse_modules(basic_block, layers, inplace=True)
            # for sub_block_name, sub_block in basic_block.named_children():
            #     if sub_block_name == "downsample":
            #         torch.quantization.fuse_modules(sub_block, [["0", "1"]], inplace=True)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [192]:


#get_accuracy(quantized_model, test_loader,5, device)

  reduce_range will be deprecated in a future release of PyTorch."
  Returning default scale and zero point "


QuantizedModel(
  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (model_fp32): ResNet(
    (conv1): QuantizedConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.0, zero_point=0, padding=(3, 3), bias=False)
    (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): QuantizedConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0, bias=False)
        (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1), bias=False)
        (bn2): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

0.7995

In [203]:
quantized_model

QuantizedModel(
  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (model_fp32): ResNet(
    (conv1): QuantizedConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.0, zero_point=0, padding=(3, 3), bias=False)
    (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): QuantizedConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0, bias=False)
        (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1), bias=False)
        (bn2): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

NotImplementedError: ignored

In [109]:
# def model_equivalence(model_1, model_2, device, rtol=1e-05, atol=1e-08, num_tests=100, input_size=(1,3,32,32)):

#     model_1.to(device)
#     model_2.to(device)

#     for _ in range(num_tests):
#         x = torch.rand(size=input_size).to(device)
#         y1 = model_1(x)[8].detach().cpu().numpy()
#         y2 = model_2(x)[8].detach().cpu().numpy()
#         if np.allclose(a=y1, b=y2, rtol=rtol, atol=atol, equal_nan=False) == False:
#             print("Model equivalence test sample failed: ")
#             print(y1)
#             print(y2)
#             return False

#     return True

# assert model_equivalence(model_1=model, model_2=fused_model, device=device, rtol=1e-03, atol=1e-06, num_tests=100, input_size=(128,3,32,32))

Model equivalence test sample failed: 
[[-1.3497736  -1.9306124   0.809975   ... -1.540997   -2.0340204
  -1.6975423 ]
 [ 0.35886928 -2.5413606   1.459985   ... -0.74503696 -0.8438016
  -2.3565965 ]
 [-0.76082635 -2.4551046   1.4766833  ... -1.074823   -1.7633088
  -2.3552608 ]
 ...
 [-0.03687507 -2.4396904   1.4542462  ... -0.99596477 -1.1657438
  -2.3178477 ]
 [-0.87402976 -2.1000443   1.2193356  ... -1.2010355  -1.7229868
  -1.9780126 ]
 [ 0.11102991 -2.0676823   1.0886701  ... -1.0281341  -0.77912706
  -1.9582287 ]]
[[-282.73883   -22.689774 -114.752914 ...  -86.11989  -180.01921
  -173.47017 ]
 [-271.13123   -29.372564 -112.232864 ...  -85.584785 -173.45412
  -186.00728 ]
 [-278.8739    -21.408386 -122.76468  ...  -91.03527  -187.70753
  -168.32854 ]
 ...
 [-286.6249    -17.682814 -126.0453   ...  -88.72858  -187.74008
  -160.5738  ]
 [-284.482     -32.275623 -124.43947  ...  -87.67357  -184.02524
  -196.73003 ]
 [-286.13937   -23.976547 -124.62892  ...  -91.41022  -185.43211
  -1

AssertionError: ignored