## Implementing Resnet from Scratch to Add custom number of layers 

Resnet has two main components BasicBlock and make layers

### Basic Block

In [1]:
import torch.nn as nn
import torch
import torch.optim as optim
from torch import Tensor
from typing import Type
import torchvision
import torchvision.transforms as transforms
import time
import copy
import os
import numpy as np

In [2]:
class CustomBasicBlock(nn.Module):
    def __init__(self, inplanes, filters, stride=1, downsample=None):
        super().__init__()
        
        self.First_Conv = nn.Conv2d(inplanes, filters, kernel_size=3, stride=stride,
                     padding=1, bias=False)
        self.Batch_Norm_1 = nn.BatchNorm2d(filters)
        self.Relu1 = nn.ReLU(inplace=True)
        self.Second_Conv = nn.Conv2d(filters, filters, kernel_size=3, stride=1,
                     padding=1, bias=False)
        self.Batch_Norm_2 = nn.BatchNorm2d(filters)
        
        self.downsample = downsample
        self.stride = stride
        
        # Normal addition is replaced with Skip addition for quantization purposes
        self.skip_addition = nn.quantized.FloatFunctional()
        
        self.Relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        identity = x
        
        # 2 Convolutional layers are stacked together
        
        z = self.First_Conv(x)
        z = self.Batch_Norm_1(z)
        z = self.Relu1(z)

        z = self.Second_Conv(z)
        z = self.Batch_Norm_2(z)

        # Downsample the input if the size of the input and the output is different
        
        if self.downsample is not None:
            identity = self.downsample(x)

        #Skip connection (output = F(x) + x) 
        
        z = self.skip_addition.add(identity, z)
        z = self.Relu2(z)

        return z

### Make Layer Block

In [3]:
def make_layers(block_type, inplanes, filters, num_blocks, stride =1):
    downsample = None
    
    if stride != 1 or inplanes != filters:
        downsample = nn.Sequential(
        nn.Conv2d(inplanes, filters, 1, stride, bias=False),
        nn.BatchNorm2s(filters),
        )
        
    layers_list = []
    layers_list.append(block_type(inplanes,filters, stride, downsample))
    inplanes = filters
    for _ in range(1, blocks):
        layers.append(block_type(inplanes,filters))
    return nn.Sequential(*layers_list)

We finally implement the Custom Resnet Class using the components defined above

### Custom Resnet Class

In [4]:
class ResNetCustom(nn.Module):
    def __init__(self, block_type , layers_list, num_classes):
        
        super().__init__()

        self.inplanes = 64
        
        self.First_Conv = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.Batch_Norm_1 = nn.BatchNorm2d(self.inplanes)
        self.Relu = nn.ReLU(inplace=True)
        self.MaxPool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.Layer_1 = self.make_layers(block_type, 64, layers_list[0])
        self.Layer_2 = self.make_layers(block_type, 128, layers_list[1], stride=2)
        self.Layer_3 = self.make_layers(block_type, 256, layers_list[2], stride=2)
        self.Layer_4 = self.make_layers(block_type, 512, layers_list[3], stride=2)

        self.AvgPool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.Fully_Connected_1 = nn.Linear(512, num_classes)
        
    def make_layers(self, block_type, filters, num_blocks, stride =1):
        downsample = None
    
        if stride != 1 or self.inplanes != filters:
            downsample = nn.Sequential(
            nn.Conv2d(self.inplanes, filters, 1, stride, bias=False),
            nn.BatchNorm2d(filters),
            )
        
        layers = []
        layers.append(block_type(self.inplanes,filters, stride, downsample))
        self.inplanes = filters
        for _ in range(1, num_blocks):
            layers.append(block_type(self.inplanes,filters))
            
        return nn.Sequential(*layers)
        
        
    def forward(self, x):
        # First Conv Layer
        
        z = self.First_Conv(x)
        z = self.Batch_Norm_1(z)
        z = self.Relu(z)
        z = self.MaxPool(z)
        
        # Collection of Basic Blocks which implements skip connection and each basic block has 2 convolutional layers
        
        z = self.Layer_1(z)          
        z = self.Layer_2(z)         
        z = self.Layer_3(z)         
        z = self.Layer_4(z)
    
        # Here we have an average pooling layer + flattening + fully connected layer
        
        z = self.AvgPool(z)         
        z = torch.flatten(z, 1)     
        z = self.Fully_Connected_1(z)
        
        
        return z

Passing a Random Tensor to our Custom Resnet Model

In [5]:
tensor = torch.rand([1, 3, 224, 224])
layers=[2, 2, 4, 2]
model = ResNetCustom(CustomBasicBlock, layers, 10)
print(model)
    
    # Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")
output = model(tensor)

ResNetCustom(
  (First_Conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (Batch_Norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (Relu): ReLU(inplace=True)
  (MaxPool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (Layer_1): Sequential(
    (0): CustomBasicBlock(
      (First_Conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (Batch_Norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (Relu1): ReLU(inplace=True)
      (Second_Conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (Batch_Norm_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (skip_addition): FloatFunctional(
        (activation_post_process): Identity()
      )
      (Relu2): ReLU(inplace=True)
    )
    (1): CustomBasicBlock(
      (First_Conv): Conv2d(64,

### Resnet 18

In [6]:
layers=[2, 2, 2, 2]
resnet18 = ResNetCustom(CustomBasicBlock, layers, 10)

### Resnet 22

In [7]:
layers=[2, 2, 4, 2]
resnet22 = ResNetCustom(CustomBasicBlock, layers, 10)

### Resnet 26

In [8]:
layers=[2, 4, 4, 2]
resnet26 = ResNetCustom(CustomBasicBlock, layers, 10)

### Resnet 30

In [9]:
layers=[3, 4, 4, 3]
resnet30 = ResNetCustom(CustomBasicBlock, layers, 10)

### Resnet 34

In [10]:
layers=[3, 4, 6, 3]
resnet34 = ResNetCustom(CustomBasicBlock, layers, 10)

### Setting up the dataloader for the CIFAR-10 dataset

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=256, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


### Training code

In [12]:
model = resnet22
model = model.to(device)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr = 0.01,momentum = 0.9, weight_decay = 5e-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

In [14]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [15]:
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [16]:

def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda()
        input_var = input.cuda()
        target_var = target
        

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))

### Test code

In [17]:
def validate(val_loader, model, criterion, device):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.to(device)
            input_var = input.to(device)
            target_var = target.to(device)

            

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg

In [18]:
print_freq = 50
best_prec1 = 0
for epoch in range(0, 10):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(trainloader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(testloader, model, criterion, device)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

current lr 1.00000e-02
Epoch: [0][0/196]	Time 1.365 (1.365)	Data 0.146 (0.146)	Loss 2.4358 (2.4358)	Prec@1 14.844 (14.844)
Epoch: [0][50/196]	Time 0.054 (0.061)	Data 0.032 (0.017)	Loss 1.7824 (1.9029)	Prec@1 39.062 (30.653)
Epoch: [0][100/196]	Time 0.019 (0.049)	Data 0.001 (0.018)	Loss 1.4561 (1.7797)	Prec@1 43.359 (34.704)
Epoch: [0][150/196]	Time 0.025 (0.045)	Data 0.001 (0.017)	Loss 1.5983 (1.7025)	Prec@1 42.578 (37.779)
Test: [0/100]	Time 0.118 (0.118)	Loss 1.4243 (1.4243)	Prec@1 51.000 (51.000)
Test: [50/100]	Time 0.005 (0.012)	Loss 1.5195 (1.5123)	Prec@1 50.000 (47.922)
 * Prec@1 47.260
current lr 9.99753e-03
Epoch: [1][0/196]	Time 0.179 (0.179)	Data 0.152 (0.152)	Loss 1.4110 (1.4110)	Prec@1 50.391 (50.391)
Epoch: [1][50/196]	Time 0.021 (0.036)	Data 0.002 (0.015)	Loss 1.2553 (1.3732)	Prec@1 56.250 (49.602)
Epoch: [1][100/196]	Time 0.019 (0.037)	Data 0.001 (0.017)	Loss 1.1820 (1.3346)	Prec@1 61.719 (51.245)
Epoch: [1][150/196]	Time 0.019 (0.038)	Data 0.001 (0.018)	Loss 1.1013 (1.3

### Inserting qunatization operators in the model

In [52]:
class ModelQuantization(nn.Module):
    def __init__(self,model):
        super(ModelQuantization,self).__init__()
        
        # Inserting a quantization operator before the input
        self.quantstub = torch.quantization.QuantStub()
        
        # Inserting a dequantization operator after the output
        self.dequantstub = torch.quantization.DeQuantStub()
        
        # Original floating point model
        self.model = model

    def forward(self, x):
        # Floating values to integer values
        z = self.quantstub(x)
        
        z = self.model(z)
        
        # Integer values to floating values
        z = self.dequantstub(z)
        return z

### Fusing the Conv + Batch Norm Layers for correct quantization

In [53]:
cpu_dev = torch.device('cpu:0')
model.to(cpu_dev)
# Deep copy of the model for layer fusion
fused_model = copy.deepcopy(model)

model.eval()
# Swithcing model in evaluation mode before layer fusion
fused_model.eval()
fused_model = torch.quantization.fuse_modules(fused_model, [["First_Conv", "Batch_Norm_1", "Relu"]], inplace=True)
for module_name, module in fused_model.named_children():
        if "layer" in module_name:
            for basic_block_name, basic_block in module.named_children():
                torch.quantization.fuse_modules(basic_block, [["First_Conv", "Batch_Norm_1", "Relu1"], ["Second_Conv", "Batch_Norm_2"]], inplace=True)
                for sub_block_name, sub_block in basic_block.named_children():
                    if sub_block_name == "downsample":
                        torch.quantization.fuse_modules(sub_block, [["0", "1"]], inplace=True)


In [54]:
def model_equivalence(model_1, model_2, device, rtol=1e-05, atol=1e-08, num_tests=100, input_size=(1,3,32,32)):

    model_1.to(device)
    model_2.to(device)

    for _ in range(num_tests):
        x = torch.rand(size=input_size).to(device)
        y1 = model_1(x).detach().cpu().numpy()
        y2 = model_2(x).detach().cpu().numpy()
        if np.allclose(a=y1, b=y2, rtol=rtol, atol=atol, equal_nan=False) == False:
            print("Model equivalence test sample failed: ")
            print(y1)
            print(y2)
            return False

    return True


In [55]:
assert model_equivalence(model_1=model, model_2=fused_model, device=cpu_dev, rtol=1e-03, atol=1e-06, num_tests=100, input_size=(1,3,32,32)), "Fused model is not equivalent to the original model!"


### Setting up the model for quantization by inserting quantization operators and model calibration

In [56]:
# Inserting qunatization operators on the fused model
quantized_model = ModelQuantization(model=fused_model)
  
# Selecting quantization schemes on the model
backend = 'fbgemm' # This backend is used when we want to quantize the model to work on Android devices
#backend = 'fbgemm' # This backend is used to convert for X86 devices
quantization_config = torch.quantization.get_default_qconfig(backend)
   
quantized_model.qconfig = quantization_config
    
# Print quantization configurations
print(quantized_model.qconfig)

# Preparing model for calibration
torch.quantization.prepare(quantized_model, inplace=True)



QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})


ModelQuantization(
  (quantstub): QuantStub(
    (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
  )
  (dequantstub): DeQuantStub()
  (model): ResNetCustom(
    (First_Conv): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): ReLU(inplace=True)
      (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
    )
    (Batch_Norm_1): Identity()
    (Relu): Identity()
    (MaxPool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (Layer_1): Sequential(
      (0): CustomBasicBlock(
        (First_Conv): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (Batch_Norm_1): BatchNorm2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
 

### Calibration of the values of the quantization equation

In [57]:
def model_calibration(model, data, device):
    
    model = model.to(device)
    model.eval()
    
    for input_values, target_values in data:
        input_values,target_values = input_values.to(device), target_values.to(device)
        
        _ = model(input_values)

In [58]:
model_calibration(quantized_model, trainloader, cpu_dev)

### Finally we quantize the model

In [59]:
# We finally qunatize the model
quant_model_final = torch.quantization.convert(quantized_model, inplace=True)

    
# We put the model in evaluation mode
quant_model_final.eval()

# Print quantized model.
print(quant_model_final)

ModelQuantization(
  (quantstub): Quantize(scale=tensor([0.0408]), zero_point=tensor([60]), dtype=torch.quint8)
  (dequantstub): DeQuantize()
  (model): ResNetCustom(
    (First_Conv): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.0534542053937912, zero_point=0, padding=(3, 3))
    (Batch_Norm_1): Identity()
    (Relu): Identity()
    (MaxPool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (Layer_1): Sequential(
      (0): CustomBasicBlock(
        (First_Conv): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.10369772464036942, zero_point=73, padding=(1, 1), bias=False)
        (Batch_Norm_1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (Relu1): ReLU(inplace=True)
        (Second_Conv): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.052992481738328934, zero_point=72, padding=(1, 1), bias=False)
        (Batch_Norm_2): QuantizedBatchNorm2d

### Evaluating the floating point model and quantized model

In [60]:
floating_point_prec = validate(testloader, model, criterion, cpu_dev)
integer_point_prec = validate(testloader, quant_model_final, criterion, cpu_dev)

print("The original model accuracy is ", floating_point_prec)
print("Quantized model accuracy is", integer_point_prec)

Test: [0/100]	Time 0.250 (0.250)	Loss 0.7609 (0.7609)	Prec@1 73.000 (73.000)
Test: [50/100]	Time 0.027 (0.032)	Loss 0.6579 (0.7720)	Prec@1 77.000 (73.725)
 * Prec@1 73.690
Test: [0/100]	Time 0.192 (0.192)	Loss 0.7480 (0.7480)	Prec@1 73.000 (73.000)
Test: [50/100]	Time 0.013 (0.017)	Loss 0.6548 (0.7713)	Prec@1 76.000 (73.647)
 * Prec@1 73.590
The original model accuracy is  73.69
Quantized model accuracy is 73.59


### Measure Inference Latency

In [61]:
def measure_inference_latency(model,
                              device,
                              input_size=(1, 3, 32, 32),
                              num_samples=100,
                              num_warmups=10):

    model.to(device)
    model.eval()

    x = torch.rand(size=input_size).to(device)

    with torch.no_grad():
        for _ in range(num_warmups):
            _ = model(x)
    torch.cuda.synchronize()

    with torch.no_grad():
        start_time = time.time()
        for _ in range(num_samples):
            _ = model(x)
            torch.cuda.synchronize()
        end_time = time.time()
    elapsed_time = end_time - start_time
    elapsed_time_ave = elapsed_time / num_samples

    return elapsed_time_ave

In [66]:
inpp = torch.rand(size=(1, 3, 32, 32))
with torch.autograd.profiler.profile() as prof:
    output = model(inpp)
print(prof.key_averages().table(sort_by="self_cpu_time_total"))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
       aten::_slow_conv2d_forward        46.14%       3.722ms        49.00%       3.953ms     171.870us            23  
          aten::native_batch_norm        13.97%       1.127ms        17.55%       1.416ms      59.000us            24  
         aten::mkldnn_convolution         5.89%     475.000us         6.25%     504.000us     504.000us             1  
                      aten::relu_         5.84%     471.000us         7.72%     623.000us      29.667us            21  
                      aten::empty         4.71%     380.000us         4.71%     380.000us       1.979us           192  
                aten::convolution       

STAGE:2023-03-25 12:15:26 31746:31746 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-03-25 12:15:26 31746:31746 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-03-25 12:15:26 31746:31746 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [69]:
inpp = torch.rand(size=(1, 3, 32, 32))
with torch.autograd.profiler.profile() as prof:
    output = quant_model_final(inpp)
print(prof.key_averages().table(sort_by="self_cpu_time_total"))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                quantized::conv2d        69.73%       6.350ms        70.93%       6.459ms     280.826us            23  
          quantized::batch_norm2d         7.06%     643.000us        11.86%       1.080ms      46.957us            23  
           quantized::conv2d_relu         4.55%     414.000us         5.67%     516.000us     516.000us             1  
    aten::_empty_affine_quantized         3.33%     303.000us         3.33%     303.000us       4.967us            61  
                   quantized::add         3.06%     279.000us         3.42%     311.000us      31.100us            10  
                      aten::relu_       

STAGE:2023-03-25 12:16:29 31746:31746 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-03-25 12:16:29 31746:31746 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-03-25 12:16:29 31746:31746 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [63]:
measure_inference_latency(quant_model_final, device = cpu_dev)*1000

7.464544773101807

In [64]:
measure_inference_latency(model, device = cpu_dev)*1000

3.8365817070007324