In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from timeit import default_timer as timer
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn.utils.prune as prune
from help import helper_functions
import os
import copy
import torchsummary

In [2]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=16, pin_memory=True)

testset = datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                          shuffle=True, num_workers=16, pin_memory= True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
model = models.resnet18(pretrained= True)

In [6]:
model.load_state_dict(torch.load('resnet_18_state_dict.pth', map_location=device))

<All keys matched successfully>

In [7]:
helper_functions.test(model, testloader, device)

Accuracy of the network on the 10000 test images: 9 %
 Evaluation time :208.727


In [10]:
for_caliberation = helper_functions.slice_dataloader(testloader, 0, 1000)

In [11]:
model = torch.nn.Sequential(torch.quantization.QuantStub(), model, torch.quantization.DeQuantStub())

In [12]:
model.qconfig = torch.quantization.default_qconfig
model = torch.quantization.prepare(model, inplace=True)

In [13]:
helper_functions.caliberate(model, for_caliberation, device)

In [14]:
torch.quantization.convert(model, inplace=True)

Sequential(
  (0): Quantize(scale=tensor([0.0157]), zero_point=tensor([64]), dtype=torch.quint8)
  (1): ResNet(
    (conv1): QuantizedConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.072036512196064, zero_point=59, padding=(3, 3), bias=False)
    (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): QuantizedReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.1452377438545227, zero_point=70, padding=(1, 1), bias=False)
        (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): QuantizedReLU(inplace=True)
        (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.10540495067834854, zero_point=70, padding=(1, 1), bias=False)
        (bn2): QuantizedBatchNo

In [15]:
helper_functions.test(model, testloader, device)

RuntimeError: Could not run 'aten::add_.Tensor' with arguments from the 'QuantizedCPU' backend. 'aten::add_.Tensor' is only available for these backends: [CPU, MkldnnCPU, SparseCPU, Named, Autograd, Profiler, Tracer].

In [16]:
helper_functions.print_size_of_model(model)

size (KB) : 11814.097
