In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets 
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x112d42390>

# Loading the MNIST dataset

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

# loading the MNIST dataset
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
# creating a dataloader for the training
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=10, shuffle=True)

# loading the MNIST test set
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# creating a dataloader for the testing
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:03<00:00, 2637044.78it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 38455775.82it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1080387.73it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13322048.09it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






# Defining the model

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self, hidden_size_1=100, hidden_size_2=100):
        super().__init__()
        self.ln1 = nn.Linear(28*28, hidden_size_1) 
        self.ln2 = nn.Linear(hidden_size_1, hidden_size_2) 
        self.ln3 = nn.Linear(hidden_size_2, 10)
        self.relu = nn.ReLU()

    def forward(self, img):
        x = img.view(-1, 28*28)
        x = self.relu(self.ln1(x))
        x = self.relu(self.ln2(x))
        x = self.ln3(x)
        return x

In [5]:
model = NeuralNetwork().to(device)

# Training the model

In [6]:
def train(train_loader, model, epochs=5, total_iterations_limit=None):
    cross_entropy = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    total_iterations = 0

    for epoch in range(epochs):
        model.train()

        loss_sum = 0
        num_iterations = 0

        data_iterator = tqdm(train_loader, desc=f'Epoch {epoch+1}')
        if total_iterations_limit is not None:
            data_iterator.total = total_iterations_limit
        for data in data_iterator:
            num_iterations += 1
            total_iterations += 1
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            output = model(x.view(-1, 28*28))
            loss = cross_entropy(output, y)
            loss_sum += loss.item()
            avg_loss = loss_sum / num_iterations
            data_iterator.set_postfix(loss=avg_loss)
            loss.backward()
            optimizer.step()

            if total_iterations_limit is not None and total_iterations >= total_iterations_limit:
                return
            
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (KB):', os.path.getsize("temp.p")/1e3)
    os.remove('temp.p') 

In [7]:
train(train_loader, model, epochs=1)

Epoch 1: 100%|██████████| 6000/6000 [00:46<00:00, 129.35it/s, loss=0.221]


# Defining the testing loop

In [8]:
def test(model: nn.Module, total_iterations: int = None):
    
    correct = 0
    total = 0
    iterations = 0

    model.eval()

    with torch.no_grad():
        for data in tqdm(test_loader, desc='Testing'):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            output = model(x.view(-1, 784))
            for idx, i in enumerate(output):
                if torch.argmax(i) == y[idx]:
                    correct +=1
                total +=1
            iterations += 1
            if total_iterations is not None and iterations >= total_iterations:
                break
    print(f'Accuracy: {round(correct/total, 3)}')

# Weights and size of the model before quantization

In [9]:
print('Weights before quantization')
print(model.ln1.weight)
print(model.ln1.weight.dtype)

Weights before quantization
Parameter containing:
tensor([[ 0.0341,  0.0364, -0.0016,  ..., -0.0074,  0.0161,  0.0202],
        [ 0.0067, -0.0099,  0.0442,  ...,  0.0149,  0.0254,  0.0370],
        [-0.0146, -0.0122, -0.0093,  ..., -0.0265, -0.0327, -0.0080],
        ...,
        [ 0.0006,  0.0474,  0.0372,  ...,  0.0321, -0.0047,  0.0567],
        [-0.0401, -0.0162, -0.0096,  ...,  0.0238, -0.0427,  0.0226],
        [-0.0044,  0.0254, -0.0188,  ..., -0.0092,  0.0315, -0.0207]],
       requires_grad=True)
torch.float32


In [10]:
print('Size of the model before quantization')
print_size_of_model(model)

Size of the model before quantization
Size (KB): 360.81


In [11]:
print('Accuracy of the model before quantization:')
test(model)

Accuracy of the model before quantization:


Testing: 100%|██████████| 1000/1000 [00:03<00:00, 284.46it/s]

Accuracy: 0.962





# Inserting min-max observers in the model

In [12]:
# https://pytorch.org/docs/stable/_modules/torch/ao/quantization/stubs.html#QuantStub

class QuantizedNeuralNetwork(nn.Module):
    def __init__(self, hidden_size_1=100, hidden_size_2=100):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.ln1 = nn.Linear(28*28, hidden_size_1) 
        self.ln2 = nn.Linear(hidden_size_1, hidden_size_2) 
        self.ln3 = nn.Linear(hidden_size_2, 10)
        self.relu = nn.ReLU()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, img):
        x = img.view(-1, 28*28)
        x = self.quant(x)
        x = self.relu(self.ln1(x))
        x = self.relu(self.ln2(x))
        x = self.ln3(x)
        x = self.dequant(x)
        return x

In [13]:
model_quantized = QuantizedNeuralNetwork().to(device)

# Copying weights from unquantized model
model_quantized.load_state_dict(model.state_dict())
model_quantized.eval()

model_quantized.qconfig = torch.ao.quantization.default_qconfig
model_quantized = torch.ao.quantization.prepare(model_quantized) # Inserting observers
model_quantized

QuantizedNeuralNetwork(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (ln1): Linear(
    in_features=784, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (ln2): Linear(
    in_features=100, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (ln3): Linear(
    in_features=100, out_features=10, bias=True
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (relu): ReLU()
  (dequant): DeQuantStub()
)

# Calibrating the model using the test set

In [14]:
test(model_quantized)

Testing: 100%|██████████| 1000/1000 [00:04<00:00, 204.00it/s]

Accuracy: 0.962





In [15]:
print('Statistics of the various layers')
model_quantized

Statistics of the various layers


QuantizedNeuralNetwork(
  (quant): QuantStub(
    (activation_post_process): MinMaxObserver(min_val=-0.4242129623889923, max_val=2.821486711502075)
  )
  (ln1): Linear(
    in_features=784, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=-42.804134368896484, max_val=35.08749008178711)
  )
  (ln2): Linear(
    in_features=100, out_features=100, bias=True
    (activation_post_process): MinMaxObserver(min_val=-25.888654708862305, max_val=23.114397048950195)
  )
  (ln3): Linear(
    in_features=100, out_features=10, bias=True
    (activation_post_process): MinMaxObserver(min_val=-28.19929313659668, max_val=23.274927139282227)
  )
  (relu): ReLU()
  (dequant): DeQuantStub()
)

# Quantizing the model using the statistics collected

In [16]:
model_quantized = torch.ao.quantization.convert(model_quantized)

In [17]:
print('Statistics of the various layers')
model_quantized

Statistics of the various layers


QuantizedNeuralNetwork(
  (quant): Quantize(scale=tensor([0.0256]), zero_point=tensor([17]), dtype=torch.quint8)
  (ln1): QuantizedLinear(in_features=784, out_features=100, scale=0.6133198738098145, zero_point=70, qscheme=torch.per_tensor_affine)
  (ln2): QuantizedLinear(in_features=100, out_features=100, scale=0.38585078716278076, zero_point=67, qscheme=torch.per_tensor_affine)
  (ln3): QuantizedLinear(in_features=100, out_features=10, scale=0.4053088128566742, zero_point=70, qscheme=torch.per_tensor_affine)
  (relu): ReLU()
  (dequant): DeQuantize()
)

# Weights of the model after quantization

In [18]:
print('Weights after quantization')
print(torch.int_repr(model_quantized.ln1.weight()))

Weights after quantization
tensor([[  8,   8,   0,  ...,  -2,   4,   5],
        [  2,  -2,  10,  ...,   3,   6,   9],
        [ -3,  -3,  -2,  ...,  -6,  -8,  -2],
        ...,
        [  0,  11,   9,  ...,   7,  -1,  13],
        [ -9,  -4,  -2,  ...,   6, -10,   5],
        [ -1,   6,  -4,  ...,  -2,   7,  -5]], dtype=torch.int8)


# Comparing the dequantized weights and the original weights

In [19]:
print('Original weights: ')
print(model.ln1.weight)
print('')
print(f'Dequantized weights: ')
print(torch.dequantize(model_quantized.ln1.weight()))
print('')

Original weights: 
Parameter containing:
tensor([[ 0.0341,  0.0364, -0.0016,  ..., -0.0074,  0.0161,  0.0202],
        [ 0.0067, -0.0099,  0.0442,  ...,  0.0149,  0.0254,  0.0370],
        [-0.0146, -0.0122, -0.0093,  ..., -0.0265, -0.0327, -0.0080],
        ...,
        [ 0.0006,  0.0474,  0.0372,  ...,  0.0321, -0.0047,  0.0567],
        [-0.0401, -0.0162, -0.0096,  ...,  0.0238, -0.0427,  0.0226],
        [-0.0044,  0.0254, -0.0188,  ..., -0.0092,  0.0315, -0.0207]],
       requires_grad=True)

Dequantized weights: 
tensor([[ 0.0346,  0.0346,  0.0000,  ..., -0.0086,  0.0173,  0.0216],
        [ 0.0086, -0.0086,  0.0432,  ...,  0.0130,  0.0259,  0.0389],
        [-0.0130, -0.0130, -0.0086,  ..., -0.0259, -0.0346, -0.0086],
        ...,
        [ 0.0000,  0.0475,  0.0389,  ...,  0.0302, -0.0043,  0.0562],
        [-0.0389, -0.0173, -0.0086,  ...,  0.0259, -0.0432,  0.0216],
        [-0.0043,  0.0259, -0.0173,  ..., -0.0086,  0.0302, -0.0216]])



# Size and accuracy of the quantized model

In [20]:
print('Size of the model after quantization')
print_size_of_model(model_quantized)

Size of the model after quantization
Size (KB): 95.094


In [21]:
print('Testing the model after quantization')
test(model_quantized)

Testing the model after quantization


Testing: 100%|██████████| 1000/1000 [00:03<00:00, 276.62it/s]

Accuracy: 0.961



