<a href="https://colab.research.google.com/github/mtds20/mtds20.ML/blob/main/HW2_U93384434.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initial Setup

Before beginning the assignment, we import the CIFAR dataset, and train a simple convolutional neural network (CNN) to classify it.

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

**Reminder:** set the runtime type to "GPU", or your code will run much more slowly on a CPU.

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

Load training and test data from the CIFAR10 dataset.

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


Define a simple CNN that classifies CIFAR images.
The network provided is similar to LeNet-5, and it has the following architecture:

**Layer** | **Type** | **Input Shape** | **Output Shape** | **Activation**
--- | --- | --- | --- | ---
conv1 | Convolutional | 3x32x32 | 6x28x28 | ReLU
pool1 | Max pool | 6x28x28 | 6x14x14 | None                
conv2 | Convolutional | 6x14x14 | 16x10x10 | ReLU                
pool2 | Max pool | 16x10x10 | 16x5x5 | None                
fc1 | Fully-connected | 400 | 120 | ReLU                
fc2 | Fully-connected | 120 | 84 | ReLU                
fc3 | Fully-connected | 84 | 10 | None                

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, bias=False)
        self.pool = nn.MaxPool2d(2, 2) # run after each conv (hence the 5x5 FC layer)
        self.conv2 = nn.Conv2d(6, 16, 5, bias=False)
        self.fc1 = nn.Linear(16 * 5 * 5, 120, bias=False)
        self.fc2 = nn.Linear(120, 84, bias=False)
        self.fc3 = nn.Linear(84, 10, bias=False)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net().to(device)

Train this CNN on the training dataset (this may take a few moments).

In [None]:
from torch.utils.data import DataLoader

def train(model: nn.Module, dataloader: DataLoader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(2):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(dataloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')

def test(model: nn.Module, dataloader: DataLoader, max_samples=None) -> float:
    correct = 0
    total = 0
    n_inferences = 0

    with torch.no_grad():
        for data in dataloader:
            images, labels = data

            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images) # get 1 batch worth of image predictions (i.e. 4 predictions of 10 each)
            other, predicted = torch.max(outputs.data, 1) # other == values, predicted == indicies
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if max_samples:
                n_inferences += images.shape[0]
                if n_inferences > max_samples:
                    break

    return 100 * correct / total

In [None]:
train(net, trainloader)

[1,  2000] loss: 2.168
[1,  4000] loss: 1.851
[1,  6000] loss: 1.720
[1,  8000] loss: 1.607
[1, 10000] loss: 1.539
[1, 12000] loss: 1.504
[2,  2000] loss: 1.436
[2,  4000] loss: 1.413
[2,  6000] loss: 1.380
[2,  8000] loss: 1.357
[2, 10000] loss: 1.334
[2, 12000] loss: 1.345
Finished Training


Now that the CNN has been trained, let's test it on our test dataset.

In [None]:
score = test(net, testloader)
print('Accuracy of the network on the test images: {}%'.format(score))

Accuracy of the network on the test images: 52.5%


Define a convenience function which we use to copy CNN's.

In [None]:
from copy import deepcopy

def copy_model(model: nn.Module) -> nn.Module:
    result = deepcopy(model)

    # Copy over the extra metadata we've collected which copy.deepcopy doesn't capture
    if hasattr(model, 'input_activations'):
        result.input_activations = deepcopy(model.input_activations)

    for result_layer, original_layer in zip(result.children(), model.children()):
        if isinstance(result_layer, nn.Conv2d) or isinstance(result_layer, nn.Linear):
            if hasattr(original_layer.weight, 'scale'):
                result_layer.weight.scale = deepcopy(original_layer.weight.scale)
            if hasattr(original_layer, 'activations'):
                result_layer.activations = deepcopy(original_layer.activations)
            if hasattr(original_layer, 'output_scale'):
                result_layer.output_scale = deepcopy(original_layer.output_scale)

    return result

# Quantize Weights

Any convolution or fully-connected layer pass, without a bias, can be described by the equation:

$$W*In = Out$$

where $W$ is the weight tensor, $In$ in the input tensor, and $Out$ is the output tensor.

For this question, your task is to find a *scaling factor*, called $n_W$ for each convolutional and fully connected layer,
which would fit inside an 8-bit signed integer.
This equation can now be described as the following:

$$n_WW*In = n_WOut$$

You might wonder: "Isn't it a problem that the output of the layer has now changed? Wouldn't quantizing the weights change the output of the neural net?"

The answer, of course, is: "Yes".
However, what we care about is not the *absolute* values output by the CNN, but the relative difference between the probabilities it assigns to different classes for its predictions.
Quantizing the weights only scales this relative difference up or down, but it does not affect which class the network assigns the most probability to.

Copy the old model into three new models.

In [None]:
net_q8 = copy_model(net)
net_q4 = copy_model(net)
net_q2 = copy_model(net)

## Question 1:

Fill in three copies of the `quantized_weights` function to quantize weights with N=8, N=4, N=2. Rename your copies accordingly.

A template code will then call this function on the weights of every layer in the CNN that we just trained at 32-bit floating point precision, **to lower them into N-bit signed integer precisions.** Remember to make multiple (three) copies of the template code as well, so you call call `quantized_weights` for the different values of N.

In [None]:
from typing import Tuple

def quantized_weights_8(weights: torch.Tensor) -> Tuple[torch.Tensor, float]:
    '''
    Quantize the weights so that all values are integers between -2**N and 2**N-1 (2's complement representation).
    You may want to use the total range to scale the float32 values by.

    Parameters:
    weights (Tensor): The unquantized weights

    Returns:
    (Tensor, float): A tuple with the following elements:
                        * The weights in quantized form, where every value is an integer between -2**N and 2**N-1.
                          The "dtype" will still be "float", but the values themselves should all be integers.
                        * The scaling factor that your weights were multiplied by.
                          This value does not need to be an N-bit integer.
    '''

    # N = 8
    # Max Value of Int = 2^8 - 1

    # -2^8 = -256
    # 2^7 = 128

    maxValue = torch.max(torch.abs(weights))

    scale = pow(2, 8) - 1 / maxValue

    result8 = torch.round(weights * scale)

    return torch.clamp(result8, min = -256, max = 128), scale

In [None]:
from typing import Tuple

def quantized_weights_4(weights: torch.Tensor) -> Tuple[torch.Tensor, float]:
    '''
    Quantize the weights so that all values are integers between -2**N and 2**N-1 (2's complement representation).
    You may want to use the total range to scale the float32 values by.

    Parameters:
    weights (Tensor): The unquantized weights

    Returns:
    (Tensor, float): A tuple with the following elements:
                        * The weights in quantized form, where every value is an integer between -2**N and 2**N-1.
                          The "dtype" will still be "float", but the values themselves should all be integers.
                        * The scaling factor that your weights were multiplied by.
                          This value does not need to be an N-bit integer.
    '''

    # N = 4
    # Max Value of Int = 2^4 - 1

    # -2^4 = -16
    # 2^3 = 8

    maxValue = torch.max(torch.abs(weights))

    scale = pow(2, 4) - 1 / maxValue

    result4 = torch.round(weights * scale)

    return torch.clamp(result4, min = -16, max = 8), scale

In [None]:
from typing import Tuple

def quantized_weights_2(weights: torch.Tensor) -> Tuple[torch.Tensor, float]:
    '''
    Quantize the weights so that all values are integers between -2**N and 2**N-1 (2's complement representation).
    You may want to use the total range to scale the float32 values by.

    Parameters:
    weights (Tensor): The unquantized weights

    Returns:
    (Tensor, float): A tuple with the following elements:
                        * The weights in quantized form, where every value is an integer between -2**N and 2**N-1.
                          The "dtype" will still be "float", but the values themselves should all be integers.
                        * The scaling factor that your weights were multiplied by.
                          This value does not need to be an N-bit integer.
    '''

    # N = 2
    # Max Value of Int = 2^2 - 1

    # -2^2 = -4
    # 2^1 = 2

    maxValue = torch.max(torch.abs(weights))

    scale = pow(2, 2) - 1 / maxValue

    result2 = torch.round(weights * scale)

    return torch.clamp(result2, min = -4, max = 2), scale

In [None]:
def quantize_layer_weights_8(model: nn.Module):
    for layer in model.children():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            q_layer_data, scale = quantized_weights_8(layer.weight.data)
            q_layer_data = q_layer_data.to(device)

            layer.weight.data = q_layer_data
            layer.weight.scale = scale

            print(q_layer_data)

            # ADD YOUR CODE HERE (value of N only)
            N = 8
            if (q_layer_data < -2**N ).any() or (q_layer_data > 2**N-1).any():
                raise Exception("Quantized weights of {} layer include values out of bounds for an 8-bit signed integer".format(layer.__class__.__name__))
            if (q_layer_data != q_layer_data.round()).any():
                raise Exception("Quantized weights of {} layer include non-integer values".format(layer.__class__.__name__))

# ADD YOUR CODE HERE (change N)
quantize_layer_weights_8(net_q8)

tensor([[[[ -34.,    9.,    2.,  -18.,   -2.],
          [ -55.,  -62.,  -35.,   -3.,  -45.],
          [ -49., -119., -128., -106.,  -64.],
          [ -61.,  -50.,  -34.,  -10.,  -65.],
          [  54.,   84.,   62.,   75.,   25.]],

         [[   1.,    0.,   22.,   49.,    7.],
          [   4.,   22.,  -12.,  -24.,  -14.],
          [ -49.,  -38.,  -79.,  -69.,  -63.],
          [  -5.,    4.,   16.,  -26.,  -37.],
          [  61.,   82.,  112.,   75.,   34.]],

         [[  17.,   73.,   46.,   20.,   50.],
          [  39.,   37.,   42.,    8.,   51.],
          [  -4.,  -42.,  -36.,  -63.,  -17.],
          [   4.,    8.,  -39.,    1.,   -4.],
          [  58.,   27.,   22.,   17.,    7.]]],


        [[[  94.,  111.,  128.,  109.,   59.],
          [  96.,   97.,  109.,   58.,   76.],
          [  -9.,    1.,  -76.,  -58.,  -22.],
          [ -47.,  -54.,  -72.,  -75.,  -36.],
          [ -66.,  -57.,  -35.,  -43.,  -54.]],

         [[ -18.,   38.,   44.,   10.,    9.],
   

In [None]:
def quantize_layer_weights_4(model: nn.Module):
    for layer in model.children():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            q_layer_data, scale = quantized_weights_4(layer.weight.data)
            q_layer_data = q_layer_data.to(device)

            layer.weight.data = q_layer_data
            layer.weight.scale = scale

            print(q_layer_data)

            # ADD YOUR CODE HERE (value of N only)
            N = 4
            if (q_layer_data < -2**N ).any() or (q_layer_data > 2**N-1).any():
                raise Exception("Quantized weights of {} layer include values out of bounds for an 8-bit signed integer".format(layer.__class__.__name__))
            if (q_layer_data != q_layer_data.round()).any():
                raise Exception("Quantized weights of {} layer include non-integer values".format(layer.__class__.__name__))

# ADD YOUR CODE HERE (change N)
quantize_layer_weights_4(net_q4)

tensor([[[[ -2.,   0.,   0.,  -1.,  -0.],
          [ -3.,  -4.,  -2.,  -0.,  -3.],
          [ -3.,  -7.,  -7.,  -6.,  -4.],
          [ -3.,  -3.,  -2.,  -1.,  -4.],
          [  3.,   5.,   4.,   4.,   1.]],

         [[  0.,   0.,   1.,   3.,   0.],
          [  0.,   1.,  -1.,  -1.,  -1.],
          [ -3.,  -2.,  -5.,  -4.,  -4.],
          [ -0.,   0.,   1.,  -1.,  -2.],
          [  3.,   5.,   6.,   4.,   2.]],

         [[  1.,   4.,   3.,   1.,   3.],
          [  2.,   2.,   2.,   0.,   3.],
          [ -0.,  -2.,  -2.,  -4.,  -1.],
          [  0.,   0.,  -2.,   0.,  -0.],
          [  3.,   2.,   1.,   1.,   0.]]],


        [[[  5.,   6.,   7.,   6.,   3.],
          [  5.,   5.,   6.,   3.,   4.],
          [ -1.,   0.,  -4.,  -3.,  -1.],
          [ -3.,  -3.,  -4.,  -4.,  -2.],
          [ -4.,  -3.,  -2.,  -2.,  -3.]],

         [[ -1.,   2.,   2.,   1.,   1.],
          [ -1.,  -1.,  -1.,  -0.,  -4.],
          [ -2.,  -4.,  -4.,  -5.,  -4.],
          [ -1.,  -2.,  

In [None]:
def quantize_layer_weights_2(model: nn.Module):
    for layer in model.children():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            q_layer_data, scale = quantized_weights_2(layer.weight.data)
            q_layer_data = q_layer_data.to(device)

            layer.weight.data = q_layer_data
            layer.weight.scale = scale

            print(q_layer_data)

            # ADD YOUR CODE HERE (value of N only)
            N = 2
            if (q_layer_data < -2**N ).any() or (q_layer_data > 2**N-1).any():
                raise Exception("Quantized weights of {} layer include values out of bounds for an 8-bit signed integer".format(layer.__class__.__name__))
            if (q_layer_data != q_layer_data.round()).any():
                raise Exception("Quantized weights of {} layer include non-integer values".format(layer.__class__.__name__))

# ADD YOUR CODE HERE (change N)
quantize_layer_weights_2(net_q2)

tensor([[[[-0.,  0.,  0., -0., -0.],
          [-1., -1., -0., -0., -0.],
          [-0., -1., -1., -1., -1.],
          [-1., -0., -0., -0., -1.],
          [ 1.,  1.,  1.,  1.,  0.]],

         [[ 0.,  0.,  0.,  0.,  0.],
          [ 0.,  0., -0., -0., -0.],
          [-0., -0., -1., -1., -1.],
          [-0.,  0.,  0., -0., -0.],
          [ 1.,  1.,  1.,  1.,  0.]],

         [[ 0.,  1.,  0.,  0.,  0.],
          [ 0.,  0.,  0.,  0.,  0.],
          [-0., -0., -0., -1., -0.],
          [ 0.,  0., -0.,  0., -0.],
          [ 1.,  0.,  0.,  0.,  0.]]],


        [[[ 1.,  1.,  1.,  1.,  1.],
          [ 1.,  1.,  1.,  1.,  1.],
          [-0.,  0., -1., -1., -0.],
          [-0., -1., -1., -1., -0.],
          [-1., -1., -0., -0., -1.]],

         [[-0.,  0.,  0.,  0.,  0.],
          [-0., -0., -0., -0., -1.],
          [-0., -1., -1., -1., -1.],
          [-0., -0., -1., -0., -0.],
          [-0.,  0.,  0.,  0.,  0.]],

         [[-0., -0., -0.,  0., -0.],
          [ 0.,  0.,  0., 

## Question 2:

Using previous examples from the class on how to measure the performance of CNN on hardware, record the **energy (J), runtime (s), and test accuracy (%)** of the original, full-precision network and all of the quantized versions of the network. Run your test 5 times for the original/each quantized network and report the average results for each network.

Note: To access the accuracy of the network use the example below:

In [None]:
!pip install nvidia-ml-py
import psutil
import pynvml
import time

# ADD YOUR CODE HERE FOR ENERGY, RUNTIME, AND TEST ACCURACY OF ORIGINAL AND EACH QUANTIZED NETWORK
def get_current_energy():
  pynvml.nvmlInit()
  handle = pynvml.nvmlDeviceGetHandleByIndex(0)
  gpuEnergyUsage = pynvml.nvmlDeviceGetTotalEnergyConsumption(handle)
  return gpuEnergyUsage

def print_res(timeBeg,timeEnd,energyBeg,energyEnd):
  totalEnergy = energyEnd-energyBeg
  totalTime = timeEnd-timeBeg
  print("Energy", totalEnergy)
  print("Time", totalTime)

timeBeg = time.time()
energyBeg = get_current_energy()
accuracy = test(net, testloader)
timeEnd = time.time()
energyEnd = get_current_energy()

print("\nOriginal: \n")
print_res(timeBeg, timeEnd, energyBeg, energyEnd)
print("Accuracy of the network: {}%".format(accuracy))


timeBeg = time.time()
energyBeg = get_current_energy()
accuracy = test(net_q8, testloader)
timeEnd = time.time()
energyEnd = get_current_energy()

print("\nN = 8: \n")
print_res(timeBeg, timeEnd, energyBeg, energyEnd)
print("Accuracy of the network: {}%".format(accuracy))

timeBeg = time.time()
energyBeg = get_current_energy()
accuracy = test(net_q4, testloader)
timeEnd = time.time()
energyEnd = get_current_energy()

print("\nN = 4: \n")
print_res(timeBeg, timeEnd, energyBeg, energyEnd)
print("Accuracy of the network: {}%".format(accuracy))

timeBeg = time.time()
energyBeg = get_current_energy()
accuracy = test(net_q2, testloader)
timeEnd = time.time()
energyEnd = get_current_energy()

print("\nN = 2: \n")
print_res(timeBeg, timeEnd, energyBeg, energyEnd)
print("Accuracy of the network: {}%".format(accuracy))



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/

Original: 

Energy 297077
Time 9.109869241714478
Accuracy of the network: 52.5%

N = 8: 

Energy 291340
Time 8.954063415527344
Accuracy of the network: 52.87%

N = 4: 

Energy 272107
Time 8.298606634140015
Accuracy of the network: 52.05%

N = 2: 

Energy 284931
Time 8.673094034194946
Accuracy of the network: 10.0%
