In [11]:
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
from tqdm import tqdm 
from pathlib import Path
import os
import copy
import torchvision.models.quantization as models

In [2]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=16, pin_memory=True)

testset = datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                          shuffle=True, num_workers=16, pin_memory= True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
model = models.resnet18(pretrained = True).to('cpu')



In [22]:
def is_quantized_model(model):
    # Iterate through all modules in the model
    for module in model.modules():
        # Check if the module belongs to the quantized namespace
        if isinstance(module, (torch.nn.quantized.Conv2d, torch.nn.quantized.Linear)):
            return True
    return False

In [23]:
print("Is the model quantized? ", is_quantized_model(model))

Is the model quantized?  False


In [5]:
def train(model, dataloader):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    model.train()
    for epoch in range(5):  # loop over the dataset multiple times
        running_loss = 0
        for i, data in enumerate(dataloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        running_loss /= len(dataloader.dataset) / inputs.size(0)
        print(f'epochs :{epoch}, loss :{running_loss}')
train(model, testloader)


epochs :0, loss :0.6033951090693473
epochs :1, loss :0.14049217935204505
epochs :2, loss :0.059439822801947595
epochs :3, loss :0.029434860871732235
epochs :4, loss :0.01681124524921179


In [44]:
def test(model, dataloader):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return 100 * correct / total

In [7]:
def slice_dataloader(dataloader, start, end):
    sliced_data = []
    current_index = 0
    for inputs, labels in dataloader:
        batch_size = inputs.size(0)
        if current_index + batch_size > start:
            # Find the start index within the current batch
            start_idx = max(start - current_index, 0)
            # Find the end index within the current batch
            end_idx = min(end - current_index, batch_size)
            sliced_inputs = inputs[start_idx:end_idx]
            sliced_labels = labels[start_idx:end_idx]
            sliced_data.append((sliced_inputs, sliced_labels))
            if current_index + batch_size >= end:
                break
        current_index += batch_size
    return sliced_data

# Example usage: Slice the first 150 images from the DataLoader
sliced_data = slice_dataloader(trainloader, start=0, end=150)

In [9]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp_delme.p")
    print('size (KB) :',os.path.getsize("temp_delme.p")/1e3)
    os.remove('temp_delme.p')

In [8]:
for inputs, labels in sliced_data:
    print(f"Batch size: {inputs.size(0)}")

Batch size: 64
Batch size: 64
Batch size: 22


In [10]:
score = test(model, sliced_data)
print('Accuracy of fp32 on the test images: {}% '.format(score))

print_size_of_model(model)

Accuracy of fp32 on the test images: 90.66666666666667% 
size (KB) : 46836.408


In [24]:
print("Is the model quantized? ", is_quantized_model(model))

Is the model quantized?  False


In [30]:
quant_model = copy.deepcopy(model)

state_dict = model.state_dict()
quant_model.load_state_dict(state_dict)

<All keys matched successfully>

In [31]:
print("Is the model quantized? ", is_quantized_model(quant_model))

Is the model quantized?  False


In [32]:
quant_model.qconfig = torch.quantization.default_qconfig
torch.quantization.prepare(quant_model, inplace=True) # Inserting Observers'

QuantizableResNet(
  (conv1): Conv2d(
    3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (bn1): BatchNorm2d(
    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): QuantizableBasicBlock(
      (conv1): Conv2d(
        64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
        (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
      )
      (bn1): BatchNorm2d(
        64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
      )
      (relu): ReLU()
      (conv2): Conv2d(
        64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)

In [33]:
print("Is the model quantized? ", is_quantized_model(quant_model))

Is the model quantized?  False


In [34]:
caliberate_data = slice_dataloader(trainloader, start=160, end=400)

In [35]:
for inputs, labels in caliberate_data:
    print(f"Batch size: {inputs.size(0)}")

Batch size: 32
Batch size: 64
Batch size: 64
Batch size: 64
Batch size: 16


In [36]:
test(quant_model, caliberate_data)

90.41666666666667

In [37]:
print("Is the model quantized? ", is_quantized_model(quant_model))

Is the model quantized?  False


In [38]:
torch.quantization.convert(quant_model, inplace=True)

QuantizableResNet(
  (conv1): QuantizedConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.12996891140937805, zero_point=64, padding=(3, 3), bias=False)
  (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): QuantizableBasicBlock(
      (conv1): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.12118503451347351, zero_point=78, padding=(1, 1), bias=False)
      (bn1): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.07170393317937851, zero_point=78, padding=(1, 1), bias=False)
      (bn2): QuantizedBatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (add_relu): QFunctional(
        scale=0.038576602935791016, zer

In [39]:
print("Is the model quantized? ", is_quantized_model(quant_model))

Is the model quantized?  True


In [40]:
final_testing = slice_dataloader(trainloader, start=400, end=700)
for inputs, labels in final_testing:
    print(f"Batch size: {inputs.size(0)}")

Batch size: 48
Batch size: 64
Batch size: 64
Batch size: 64
Batch size: 60


In [45]:
score = test(quant_model, final_testing)
print('Accuracy : {}% - INT8'.format(score))

print_size_of_model(quant_model)

Accuracy : 87.66666666666667% - INT8
size (KB) : 11829.916
