In [53]:
pip install torch torchvision torchaudio --upgrade


Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [54]:
pip install numpy

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [55]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.quantization import QuantStub, DeQuantStub, convert
import numpy as np
import torch
import json
print(torch.backends.quantized.supported_engines)

['qnnpack', 'none']


Save weights in binary format for compatibility with CPP library

In [56]:
#Save weights compatible with the cpp library
def save_weights_compatible_with_cpp(model, filepath):
    with open(filepath, 'wb') as f:
        # First, write the total number of parameters
        total_params = sum(p.numel() for p in model.parameters())
        f.write(np.array([total_params], dtype=np.int32).tobytes())

        # Now, write each parameter as a flat array of floats
        for param_tensor in model.parameters():
            # Ensure it's a CPU tensor and then convert to numpy
            param_numpy = param_tensor.data.cpu().numpy().ravel().astype(np.float32)
            f.write(param_numpy.tobytes())


def save_quantized_weights_compatible_with_cpp(model, filepath):
    with open(filepath, 'wb') as f:
        # Write each quantized parameter as a flat array of integers, along with scale and zero-point
        for name, module in model.named_modules():
            if hasattr(module, 'weight'):
                # Extract scale and zero-point
                scale = np.array([module.weight().q_scale()], dtype=np.float32)
                zero_point = np.array([module.weight().q_zero_point()], dtype=np.int32)

                # Extract and convert weights to int8
                weight = module.weight().int_repr().cpu().numpy().ravel().astype(np.int8)

                # Write scale, zero-point, and weight to file
                f.write(scale.tobytes())
                f.write(zero_point.tobytes())
                f.write(weight.tobytes())


In [57]:
# Prepare MNIST dataset with transformations
transform_mnist = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

train_dataset_mnist = datasets.MNIST(root='./data', train=True, transform=transform_mnist, download=True)
train_loader_mnist = DataLoader(dataset=train_dataset_mnist, batch_size=64, shuffle=True)
test_dataset_mnist = datasets.MNIST(root='./data', train=False, transform=transform_mnist, download=True)
test_loader = DataLoader(dataset=test_dataset_mnist, batch_size=64, shuffle=True)

In [58]:
#Basic LeNET architecture competible with CPP library
class LeNet(nn.Module):
# Add quant and dequant layers for post-training quantization
    def __init__(self, quantization = False):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding=2),  # Padding is 2 to match the C++ code
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5),  # No padding here to match the C++ code
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )

        # Calculate the size of the flattened feature maps.
        # With padding=2 in the first conv layer and input images being 32x32,
        # the output of the second conv layer would be 16x5x5 feature maps
        self.fc = nn.Sequential(
            nn.Linear(16*5*5, 120),  # Adjusted for the output of 5x5 feature maps
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )
        self.quantization =  quantization
        # Quantization and Dequantization stubs
        if  quantization:
          self.quant = QuantStub()
          self.dequant = DeQuantStub()

    def forward(self, x):
        if self.quantization:
          x = self.quant(x)  # Quantize input
        x = self.conv(x)
        x = x.reshape(x.shape[0], -1) #Dont use view
        x = self.fc(x)
        if self. quantization:
          x = self.dequant(x)  # Dequantize output
        return x



In [59]:
def train(model: nn.Module, dataloader: DataLoader, cuda=False, q=False):
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    # Train LeNet on MNIST
    num_epochs = 10
    for epoch in range(num_epochs):
        total_loss = 0.0
        for i, (images, labels) in enumerate(train_loader_mnist):
            images, labels = images, labels
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            # Print loss every 100 batches
            if (i + 1) % 500 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader_mnist)}], Loss: {total_loss/100:.4f}")
                total_loss = 0.0
    print('Finished Training')

def test(model: nn.Module, dataloader: DataLoader, cuda=False) -> float:
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data

            if cuda:
              inputs = inputs
              labels = labels

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

1.1 No quantization training
Here the training is done without any quantization.

In [60]:
# Assuming the LeNet model and MNIST DataLoaders are initialized
lenet = LeNet()

# Train the model
train(lenet, train_loader_mnist, cuda=True, q=False)

# Test the model
accuracy = test(lenet, test_loader, cuda=True)
print(f'Test Accuracy: {accuracy}%')

# Save the model weights
save_weights_compatible_with_cpp(lenet, 'lenet_weights.pth')


Epoch [1/10], Step [500/938], Loss: 11.4775
Epoch [2/10], Step [500/938], Loss: 3.6309
Epoch [3/10], Step [500/938], Loss: 1.7005
Epoch [4/10], Step [500/938], Loss: 1.2263
Epoch [5/10], Step [500/938], Loss: 1.0180
Epoch [6/10], Step [500/938], Loss: 0.8547
Epoch [7/10], Step [500/938], Loss: 0.7332
Epoch [8/10], Step [500/938], Loss: 0.6244
Epoch [9/10], Step [500/938], Loss: 0.5549
Epoch [10/10], Step [500/938], Loss: 0.4930
Finished Training
Test Accuracy: 97.0%


In [61]:

'''
    # Save the quantized model
    #model_save_path = '/content/drive/My Drive/lenet_mnist_weights_quantized.pth'
    #torch.save(lenet.state_dict(), model_save_path)
    #print(f'Quantized model weights saved to {model_save_path}')

    # Save the weights of the model in a format compatible with C++
    #save_path = '/content/drive/My Drive/lenet_weights_quantized.pth'
    #save_weights_compatible_with_cpp(lenet, save_path)

quantization_info = {}

for name, module in lenet.named_modules():
    if isinstance(module, torch.nn.quantized.Conv2d) or isinstance(module, torch.nn.quantized.Linear):
        layer_info = {
            "scale": float(module.scale),
            "zero_point": int(module.zero_point)
            }

        if isinstance(module, torch.nn.quantized.Conv2d):
            layer_info.update({
                    "kernel_size": module.kernel_size,
                    "stride": module.stride,
                    "padding": module.padding
                })
        elif isinstance(module, torch.nn.quantized.Linear):
            layer_info.update({
                    "in_features": module.in_features,
                    "out_features": module.out_features
                })

        quantization_info[name] = layer_info

    # Optionally, save the information to a file
    #with open("quantization_info.json", "w") as file:
    #    json.dump(quantization_info, file, indent=4)

'''

'\n    # Save the quantized model\n    #model_save_path = \'/content/drive/My Drive/lenet_mnist_weights_quantized.pth\'\n    #torch.save(lenet.state_dict(), model_save_path)\n    #print(f\'Quantized model weights saved to {model_save_path}\')\n\n    # Save the weights of the model in a format compatible with C++\n    #save_path = \'/content/drive/My Drive/lenet_weights_quantized.pth\'\n    #save_weights_compatible_with_cpp(lenet, save_path)\n\nquantization_info = {}\n\nfor name, module in lenet.named_modules():\n    if isinstance(module, torch.nn.quantized.Conv2d) or isinstance(module, torch.nn.quantized.Linear):\n        layer_info = {\n            "scale": float(module.scale),\n            "zero_point": int(module.zero_point)\n            }\n\n        if isinstance(module, torch.nn.quantized.Conv2d):\n            layer_info.update({\n                    "kernel_size": module.kernel_size,\n                    "stride": module.stride,\n                    "padding": module.padding\n   

**1.2 post-training quantization**

In [62]:
def load_model(quantized_model, model):
    """ Loads in the weights into an object meant for quantization """
    state_dict = model.state_dict()
    model = model.to('cpu')
    quantized_model.load_state_dict(state_dict)


#torch.save(lenet.state_dict(), 'lenet_full_precision.pth')
torch.backends.quantized.engine = 'qnnpack'

# Instantiate the LeNet model
lenet_quant = LeNet(quantization=True)
load_model(lenet_quant, lenet)
#lenet_quant.load_state_dict(lenet.state_dict())
lenet_quant.eval()
# Fuse Conv, bn and relu
#net.fuse_model()

# Specify quantization configuration
lenet_quant.qconfig = torch.quantization.get_default_qconfig('qnnpack')

# Prepare the model for static quantization
torch.quantization.prepare(lenet_quant, inplace=True)

# Calibrate the model and collect statistics
for inputs, _ in train_loader_mnist:
    lenet_quant(inputs)

# Convert to a quantized model
torch.quantization.convert(lenet_quant, inplace=True)
# Print scale and zero-point information
for name, module in lenet_quant.named_modules():
    if hasattr(module, 'scale') and hasattr(module, 'zero_point'):
        print(f"{name} - Scale: {module.scale}, Zero Point: {module.zero_point}")

    # Training process
score = test(lenet_quant, test_loader, cuda=True)
print(f'Accuracy of the network on the test images: {score}%')
save_quantized_weights_compatible_with_cpp(lenet_quant, 'postquantization_lenet.pth')


conv.0 - Scale: 0.07743089646100998, Zero Point: 121
conv.3 - Scale: 0.1501348316669464, Zero Point: 107
fc.0 - Scale: 0.13934408128261566, Zero Point: 110
fc.2 - Scale: 0.08970557153224945, Zero Point: 108
fc.4 - Scale: 0.1313752382993698, Zero Point: 134
quant - Scale: tensor([0.0078]), Zero Point: tensor([128])
Accuracy of the network on the test images: 97.04%


1.3 Quantization Aware Training

In [67]:
torch.backends.quantized.engine = 'qnnpack'
qnet = LeNet(quantization=True)
qnet.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')
torch.quantization.prepare_qat(qnet, inplace=True)
train(qnet, train_loader_mnist, cuda=True)
qnet = qnet.cpu()
torch.quantization.convert(qnet, inplace=True)
score = test(qnet, test_loader, cuda=False)
print(f'Accuracy of the fused and quantized network (trained quantized) on the test images:- INT8{score}')
save_quantized_weights_compatible_with_cpp(qnet, 'qat_lenet.pth')



Epoch [1/10], Step [500/938], Loss: 0.1390
Epoch [2/10], Step [500/938], Loss: 0.1381
Epoch [3/10], Step [500/938], Loss: 0.1464
Epoch [4/10], Step [500/938], Loss: 0.1353
Epoch [5/10], Step [500/938], Loss: 0.1295
Epoch [6/10], Step [500/938], Loss: 0.1278
Epoch [7/10], Step [500/938], Loss: 0.1221
Epoch [8/10], Step [500/938], Loss: 0.1154
Epoch [9/10], Step [500/938], Loss: 0.1200
Epoch [10/10], Step [500/938], Loss: 0.1109
Finished Training




Accuracy of the fused and quantized network (trained quantized) on the test images:- INT89.8
