In [1]:
import os
import gc
import cv2
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms

#Pytorch Quantization
import torch.quantization

# Utils
from tqdm import tqdm
from collections import defaultdict

# Model Import
from ResNet20 import resnet20, quantized_resnet20

In [2]:
CONFIG = dict(
    seed = 42,
    train_batch_size = 128,
    valid_batch_size = 256,
    num_calibration_batches = 32,
    num_classes = 10,
    device = torch.device("cpu")
)

In [3]:
MODEL_PATHS = 'ResNet20 final.bin'

In [4]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [5]:
train_loader = torch.utils.data.DataLoader(
        torchvision.datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.Pad(4),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]), download=True),
        batch_size=CONFIG['train_batch_size'], shuffle=True,
        num_workers=2)

Files already downloaded and verified


In [6]:
validation_loader = torch.utils.data.DataLoader(
        torchvision.datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])),
        batch_size=CONFIG['valid_batch_size'], shuffle=False,
        num_workers=2)

In [7]:
def criterion(outputs, labels):
    loss = nn.CrossEntropyLoss()
    return loss(outputs, labels)

In [8]:
@torch.no_grad()
def valid_fn(model, dataloader, device, neval_batches):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    correct = 0.0
    PREDS = []
    count = 0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader), ncols=100)
    for step, data in bar:        
        inputs, targets = data
        inputs = inputs.to(device)
        targets = targets.to(device)
            
        # рачсет вывода
        output = model(inputs)
        loss = criterion(output, targets)
        
        _, preds = output.max(1)
        correct += preds.eq(targets).sum()

        batch_size = inputs.size(0)
        
        count += 1
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        sum_loss = running_loss / dataset_size
        
        sum_score = correct.cpu().detach().numpy() / dataset_size
        
        bar.set_postfix({'Valid_Loss':sum_loss, 'Valid_Score':sum_score})
        PREDS.append(output.view(-1).cpu().detach().numpy()) 
        if count >= neval_batches:
            PREDS = np.concatenate(PREDS)
            return sum_loss, sum_score, PREDS
        
    PREDS = np.concatenate(PREDS)
    
    gc.collect()
    return sum_loss, sum_score, PREDS

In [9]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [10]:
def performance_inference(model, dataloader, device):
    
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    history = defaultdict(list)
    
    start = time.time()
        
    val_loss, val_score, preds = valid_fn(model, dataloader, CONFIG['device'], CONFIG['valid_batch_size'])
    
    end = time.time()
    
    history['Valid Loss'].append(val_loss)
    history['Valid Score'].append(val_score)
    
    time_elapsed = end - start
    print('Validation complete in {:.0f}ms'.format(
        time_elapsed * 1000))
    print("Validation Loss: {:.4f}".format(val_loss))
    print("Validation Score: {:.4f}".format(val_score))
    
    
    return model, history

In [11]:
modules_to_fuse = [['conv', 'bn'],
                   ['layer1.0.conv1', 'layer1.0.bn1'],
                   ['layer1.0.conv2', 'layer1.0.bn2'],
                   ['layer1.1.conv1', 'layer1.1.bn1'],
                   ['layer1.1.conv2', 'layer1.1.bn2'],
                   ['layer1.2.conv1', 'layer1.2.bn1'],
                   ['layer1.2.conv2', 'layer1.2.bn2'],
                   
                   ['layer2.0.conv1', 'layer2.0.bn1'],
                   ['layer2.0.conv2', 'layer2.0.bn2'],
                   ['layer2.0.downsample.0', 'layer2.0.downsample.1'],
                   ['layer2.1.conv1', 'layer2.1.bn1'],
                   ['layer2.1.conv2', 'layer2.1.bn2'],
                   ['layer2.2.conv1', 'layer2.2.bn1'],
                   ['layer2.2.conv2', 'layer2.2.bn2'],
                   
                   ['layer3.0.conv1', 'layer3.0.bn1'],
                   ['layer3.0.conv2', 'layer3.0.bn2'],
                   ['layer3.0.downsample.0', 'layer3.0.downsample.1'],
                   ['layer3.1.conv1', 'layer3.1.bn1'],
                   ['layer3.1.conv2', 'layer3.1.bn2'],
                   ['layer3.2.conv1', 'layer3.2.bn1'],
                   ['layer3.2.conv2', 'layer3.2.bn2']]

In [12]:
def quantization_model(model, dataloader):
    model.eval()

    model = torch.quantization.fuse_modules(model, modules_to_fuse)

    model.qconfig = torch.quantization.default_qconfig
    print(model.qconfig)
    torch.quantization.prepare(model, inplace=True)

    # Calibrate first
    print('Post Training Quantization Prepare: Inserting Observers')
    print('\n Inverted Residual Block:After observer insertion \n\n', model.conv)

    # Calibrate with the training set
    valid_fn(model, dataloader, CONFIG['device'], CONFIG['valid_batch_size'])
    print('Post Training Quantization: Calibration done')

    # Convert to quantized model
    torch.quantization.convert(model, inplace=True)
    print('Post Training Quantization: Convert done')
    print('\n Inverted Residual Block: After fusion and quantization, note fused modules: \n\n',model.conv)
    
    return model

In [13]:
quantized_model = quantized_resnet20()
quantized_model.to(CONFIG['device'])
quantized_model.load_state_dict(torch.load(MODEL_PATHS))

<All keys matched successfully>

In [14]:
model = resnet20()
model.to(CONFIG['device'])
model.load_state_dict(torch.load(MODEL_PATHS))

<All keys matched successfully>

In [15]:
model, history = performance_inference(model, validation_loader, CONFIG['device'])

[INFO] Using GPU: NVIDIA GeForce RTX 2080



100%|██████████████████████████| 40/40 [00:04<00:00,  8.69it/s, Valid_Loss=0.265, Valid_Score=0.918]

Validation complete in 4696ms
Validation Loss: 0.2653
Validation Score: 0.9183





In [16]:
print_size_of_model(model)

Size (MB): 1.217537


In [17]:
quantized_model =  quantization_model(quantized_model, train_loader)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})
Post Training Quantization Prepare: Inserting Observers

 Inverted Residual Block:After observer insertion 

 Conv2d(
  3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
  (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
)


 65%|███████████████        | 255/391 [00:14<00:07, 17.67it/s, Valid_Loss=0.0674, Valid_Score=0.979]


Post Training Quantization: Calibration done
Post Training Quantization: Convert done

 Inverted Residual Block: After fusion and quantization, note fused modules: 

 QuantizedConv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.17423537373542786, zero_point=52, padding=(1, 1))




In [18]:
print("Size of model after quantization")
print_size_of_model(quantized_model)

Size of model after quantization
Size (MB): 0.335621


In [19]:
quantized_model, history = performance_inference(quantized_model, validation_loader, CONFIG['device'])

[INFO] Using GPU: NVIDIA GeForce RTX 2080



100%|██████████████████████████| 40/40 [00:02<00:00, 15.97it/s, Valid_Loss=0.286, Valid_Score=0.916]


Validation complete in 2590ms
Validation Loss: 0.2858
Validation Score: 0.9156
