# **Basic Setup**

In [1]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import easydict
from torchsummary import summary
from google.colab import drive
from copy import deepcopy
import json

# argument parser
import easydict

args = easydict.EasyDict({
        "batch_size": 32,
        "epochs": 10,
        "lr": 0.01,
})
# Hyper Parameters
input_size = 784
num_classes = 10
num_epochs = args.epochs
batch_size = args.batch_size
learning_rate = args.lr

# MNIST Dataset (Images and Labels)
train_set = dsets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)
test_set = dsets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)


# Dataset Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset = train_set,
        batch_size = batch_size,
        shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_set,
        batch_size = batch_size,
        shuffle = False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:02<00:00, 10.9MB/s]


Extracting ./data/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 170kB/s]


Extracting ./data/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:01<00:00, 2.85MB/s]


Extracting ./data/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 5.52MB/s]

Extracting ./data/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/FashionMNIST/raw






# **Neural Net**

# Original net

In [18]:
class MyConvNet(nn.Module):
    def __init__(self, args):
        super(MyConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.act1  = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.act2  = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.lin2  = nn.Linear(7*7*32, 10, bias=False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.lin2(x)
        return x

    # Export weights as a json file
    def save_weights(self, path):
        #torch.save(self.state_dict(), path)
        stateDictionary = self.state_dict()
        stateDictionary_export = {}
        for name,_ in model.named_parameters():
            layer_stateDictionary = stateDictionary[name]
            #print("Orig size: ", layer_stateDictionary.size())
            layer_stateDictionary_listView = layer_stateDictionary.tolist()
            #print("List size: ", len(layer_stateDictionary_listView))
            stateDictionary_export[name] = layer_stateDictionary_listView

        # Export weights as a json file
        with open(path, "w") as outfile:
            json.dump(stateDictionary_export, outfile)

model = MyConvNet(args)
model = model.cuda()

criterion = nn.CrossEntropyLoss()
criterion=criterion.cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 5e-4, momentum=0.9)

# Pruned Net

In [3]:
class MyConvNet_pruned(nn.Module):
    def __init__(self, args):
        numChannels_conv1_output = 12
        numChannels_conv2_output = 11

        super(MyConvNet_pruned, self).__init__()
        self.conv1 = nn.Conv2d(1, numChannels_conv1_output, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.act1  = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(numChannels_conv1_output, numChannels_conv2_output, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.act2  = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.lin2  = nn.Linear(7*7*numChannels_conv2_output, 10, bias=False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)
        x = x.view(x.size(0), -1)
        x = self.lin2(x)
        return x

model_pruned = MyConvNet_pruned(args)
model_pruned = model_pruned.cuda()

In [4]:
print("Initial model summary")
summary(model, (1, 28, 28))

print("Pruned model summary")
summary(model_pruned, (1, 28, 28))

Initial model summary
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             144
              ReLU-2           [-1, 16, 28, 28]               0
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]           4,608
              ReLU-5           [-1, 32, 14, 14]               0
         MaxPool2d-6             [-1, 32, 7, 7]               0
            Linear-7                   [-1, 10]          15,680
Total params: 20,432
Trainable params: 20,432
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.32
Params size (MB): 0.08
Estimated Total Size (MB): 0.40
----------------------------------------------------------------
Pruned model summary
----------------------------------------------------------------


# **Functions**

## General Functions

In [3]:
def get_modelAccuracy(model, dataset_loader):
    correct = 0
    total = 0

    for images, labels in dataset_loader:
        images = images.cuda()
        labels = labels.cuda()

        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    return (100 * correct / total)


def loadModel():
    drive.mount('/content/drive')

    !cp "/content/drive/My Drive/saved_myconvnet_lab4.pt" "./pretrainedModel"

    drive.flush_and_unmount()

    global load_model
    load_model = MyConvNet(args)
    load_model.load_state_dict(torch.load('./pretrainedModel'))

    load_model = load_model.cuda()
    load_model.eval()


def pruneModel(module, dim, amount):
    # dim = 0 --> channel pruning
    # dim = 2 --> filter row pruning
    # dim = 3 --> filter column pruning

    prune.ln_structured(module, name="weight", amount=amount,  n=1, dim=dim)
    #print("Post pruning")
    #print(list(module.named_parameters()))
    #print(module.weight)

def print_testAccuracy(test_loader):
    accuracy = get_modelAccuracy(test_loader).data.item()
    print('Accuracy for test images: % d %%' % accuracy)

def as_list(x):
    if type(x) is list:
        return x
    else:
        return [x]

def evaluateModelAccuracy(model, stateDictionary, test_loader):
    # Load state dict
    model.load_state_dict(stateDictionary)
    # Test the Model
    accuracy = get_modelAccuracy(model, test_loader)
    return accuracy

## Quantization Functions

In [4]:
def USquantize(x,bits=4):
  # Uniform symetric quantizer that
  # quantize x into sf * qx
  # sf: scaling factor
  # qx: integer in range [-2^(bits-1)+1, 2^(bits-1)-1]
  # note: only 2^bits - 1 different values can be represented, bits >= 2

  max_value = torch.max(torch.abs(x))
  sf = max_value / (2**(bits-1) - 1)
  qx = torch.round(x/sf)
  qx = torch.clip(qx,min=-2**(bits-1) + 1,max=2**(bits-1) - 1)
  dqx = qx * sf
  return dqx

def UASquantize(x,bits=4):
  # Uniform Asymetric quantizer that
  # quantize x into min_value + sf * qx
  # sf: scaling factor
  # qx: integer in range [0,2^bits-1]
  # note: 2^bits different values can be represented, bits >= 1

  max_value = torch.max(x)
  min_value = torch.min(x)
  sf = (max_value - min_value) / (2**bits - 1)
  qx = torch.round((x-min_value)/sf)
  qx = torch.clip(qx,min=0,max=2**(bits) - 1)
  dqx = min_value + qx * sf
  return dqx

def UASquantizeMinOffset(x,p,bits=4):
  # Uniform Asymetric quantizer that
  # quantize x into min_value + sf * qx
  # sf: scaling factor
  # p: percent offset away from the native min. Can be negative or positive
  # qx: integer in range [0,2^bits-1]
  # note: 2^bits different values can be represented, bits >= 1

  max_value = torch.max(x)
  min_value = torch.min(x)
  # update min_value to be set by p
  min_value = min_value * p
  sf = (max_value - min_value) / (2**bits - 1)
  qx = torch.round((x-min_value)/sf)
  qx = torch.clip(qx,min=0,max=2**(bits) - 1)
  dqx = min_value + qx * sf
  return dqx


def USquantize_clipped(x,bits=4,quantile=0.999):

  # Uniform Symetric quantizer with clipped representation range
  # range covers 'quantile' percent of FP32 x
  max_value = torch.quantile(x, 1 - 0.5 * (1 - quantile))  # Upper quantile
  min_value = torch.quantile(x, 0.5 * (1 - quantile))      # Lower quantile
  quantile_max = max(abs(max_value), abs(min_value))  # Ensure symmetry around zero
  sf = quantile_max / (2 ** (bits - 1) - 1)
  qx = torch.round(x/sf)
  qx = torch.clip(qx,min=-2**(bits-1) + 1,max=2**(bits-1) - 1)
  dqx = qx * sf
  return dqx

def UASquantize_clipped(x,bits=4,quantile=0.999):

  # Uniform Asymetric quantizer with clipped representation range
  # range covers 'quantile' percent of FP32 x

  max_value = torch.quantile(x,1-0.5*(1-quantile))
  min_value = torch.quantile(x,0.5*(1-quantile))
  sf = (max_value - min_value) / (2**bits - 1)
  qx = torch.round((x-min_value)/sf)
  qx = torch.clip(qx,min=0,max=2**(bits) - 1)
  dqx = min_value + qx * sf
  return dqx

def quantize(scheme, values, bits=4):
    if scheme == "FullRange_Symmetric":
        values_quantized = USquantize(values, bits=bits)
    elif scheme == "FullRange_Asymmetric":
        values_quantized = UASquantize(values, bits=bits)
    elif scheme == "OptimalRange_Symmetric":
        values_quantized = USquantize_clipped(values, bits=bits)
    elif scheme == "OptimalRange_Asymmetric":
        values_quantized = UASquantize_clipped(values, bits=bits)
    else:
        print("ERROR:: Unexpected quantization scheme: ", scheme)

    return values_quantized

# **Training**

In [None]:
print("---Training started")
# Training the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.cuda()
        labels = Variable(labels).cuda()

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        L1norm = model.parameters()
        arr = []
        for name,param in model.named_parameters():
          if 'weight' in name.split('.'):
            arr.append(param)
        L1loss = 0
        for Losstmp in arr:
          L1loss = L1loss+Losstmp.abs().mean()

        loss.backward()
        optimizer.step()

        if (i + 1) % 600 == 0:
            print('Epoch: [% d/% d], Step: [% d/% d], Loss: %.4f'
                    % (epoch + 1, num_epochs, i + 1,
                       len(train_set) // batch_size, loss.data.item()))

correct = 0
total = 0
model.eval()
for images, labels in test_loader:
    images = images.cuda()
    labels = labels.cuda()
    outputs = model(images)
    testloss = criterion(outputs, labels)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy for test images: % d %%' % (100 * correct / total))

---Training started
Epoch: [ 1/ 10], Step: [ 600/ 1875], Loss: 0.5599
Epoch: [ 1/ 10], Step: [ 1200/ 1875], Loss: 0.2484
Epoch: [ 1/ 10], Step: [ 1800/ 1875], Loss: 0.3158
Epoch: [ 2/ 10], Step: [ 600/ 1875], Loss: 0.5228
Epoch: [ 2/ 10], Step: [ 1200/ 1875], Loss: 0.2680
Epoch: [ 2/ 10], Step: [ 1800/ 1875], Loss: 0.4812
Epoch: [ 3/ 10], Step: [ 600/ 1875], Loss: 0.2206
Epoch: [ 3/ 10], Step: [ 1200/ 1875], Loss: 0.3994
Epoch: [ 3/ 10], Step: [ 1800/ 1875], Loss: 0.1739
Epoch: [ 4/ 10], Step: [ 600/ 1875], Loss: 0.2520
Epoch: [ 4/ 10], Step: [ 1200/ 1875], Loss: 0.4312
Epoch: [ 4/ 10], Step: [ 1800/ 1875], Loss: 0.2988
Epoch: [ 5/ 10], Step: [ 600/ 1875], Loss: 0.2379
Epoch: [ 5/ 10], Step: [ 1200/ 1875], Loss: 0.2737
Epoch: [ 5/ 10], Step: [ 1800/ 1875], Loss: 0.1225
Epoch: [ 6/ 10], Step: [ 600/ 1875], Loss: 0.2011
Epoch: [ 6/ 10], Step: [ 1200/ 1875], Loss: 0.3740
Epoch: [ 6/ 10], Step: [ 1800/ 1875], Loss: 0.1236
Epoch: [ 7/ 10], Step: [ 600/ 1875], Loss: 0.2469
Epoch: [ 7/ 10], S

# **Save Model**

In [None]:
PATH = "./saved_myconvnet_lab4.pt"
torch.save(model.state_dict(), PATH)

from google.colab import drive
drive.mount('/content/drive')

!cp "./saved_myconvnet_lab4.pt" "/content/drive/My Drive"

drive.flush_and_unmount()

# **Load Model**

In [5]:
load_model = None
loadModel()

Mounted at /content/drive


  load_model.load_state_dict(torch.load('./pretrainedModel'))


In [6]:
# # Validate pruning implementation
# loadModel()
# module = load_model.conv1
# weights = module.weight.data
# size = module.weight.size()
# dims = [i for i in range(len(size))]

# sum_index = {}
# for i0 in range(size[3]):
#     innerSum = 0
#     for i1 in range(size[1]):
#         for i2 in range(size[0]):
#             for i3 in range(size[2]):
#                 innerSum += abs(weights[i2][i1][i3][i0].item())

#     #print(i0, ": ", innerSum)
#     sum_index[innerSum] = i0

# sortedSums = sorted(sum_index.keys())
# print("Total #sums = ", len(sortedSums))
# for sum in sortedSums:
#     index = sum_index[sum]
#     print(index, " : ", sum)


# **Test**

In [None]:
accuracy = get_modelAccuracy(test_loader).data.item()
print('Accuracy for test images: % d %%' % accuracy)
#summary(model, (1, 28, 28))

Accuracy for test images:  90 %


# **Prune**

## Testbench

In [None]:
testVectorsConv1_dim_amount = [
    [0, 1/12],
    [2, 1/3],
    [3, 1/3]
]

testVectorsConv2_dim_amount = [
    [0, 1/15],
    [2, 1/3],
    [3, 1/3]
]

testVectorsOfLayer = {
    "conv1" : testVectorsConv1_dim_amount,
    "conv2" : testVectorsConv2_dim_amount
}

for layer in ["conv2", "conv1"]:
    print("Pruning layer: ", layer)

    for dim, amount in testVectorsOfLayer[layer]:
        loadModel()
        #pruneModel(module=load_model.conv2, dim=0, amount=5/32)
        #pruneModel(module=load_model.conv1, dim=0, amount=1/16)
        #pruneModel(module=load_model.conv2, dim=0, amount=12/32)
        pruneModel(module=load_model.conv1, dim=0, amount=4/16)
        pruneModel(module=load_model.conv2, dim=0, amount=17/32)
        accuracy_baseline = get_modelAccuracy(load_model, test_loader)

        if layer == "conv1":
            module = load_model.conv1
        elif layer == "conv2":
            module = load_model.conv2

        pruneModel(module=module, dim=dim, amount=amount)
        print("Pruned dim = ", dim, ", amount = ", amount)
        accuracy_pruned = get_modelAccuracy(load_model, test_loader)
        print("Accuracy for test images: %.2f%% --> %.2f%%" % (accuracy_baseline, accuracy_pruned))

Pruning layer:  conv2
Mounted at /content/drive


  load_model.load_state_dict(torch.load('./pretrainedModel'))


Pruned dim =  0 , amount =  0.06666666666666667
Accuracy for test images: 86.70% --> 85.54%
Mounted at /content/drive
Pruned dim =  2 , amount =  0.3333333333333333
Accuracy for test images: 86.70% --> 80.45%
Mounted at /content/drive
Pruned dim =  3 , amount =  0.3333333333333333
Accuracy for test images: 86.70% --> 81.75%
Pruning layer:  conv1
Mounted at /content/drive
Pruned dim =  0 , amount =  0.08333333333333333
Accuracy for test images: 86.70% --> 83.44%
Mounted at /content/drive
Pruned dim =  2 , amount =  0.3333333333333333
Accuracy for test images: 86.70% --> 75.55%
Mounted at /content/drive
Pruned dim =  3 , amount =  0.3333333333333333
Accuracy for test images: 86.70% --> 65.46%


## Final Pruned Model

In [None]:
loadModel()
accuracy_baseline = get_modelAccuracy(test_loader).data.item()
print("Initial model summary")
summary(load_model, (1, 28, 28))

pruneModel(module=load_model.conv1, dim=0, amount=4/16)
pruneModel(module=load_model.conv2, dim=0, amount=17/32)
accuracy_pruned = get_modelAccuracy(test_loader).data.item()
print("Accuracy for test images: %d%% --> %d%%" % (accuracy_baseline, accuracy_pruned))

prune.remove(module=load_model.conv1, name='weight')
prune.remove(module=load_model.conv2, name='weight')
print("Pruned model summary")
summary(load_model, (1, 28, 28))


Mounted at /content/drive


  load_model.load_state_dict(torch.load('./pretrainedModel'))


Initial model summary
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             144
              ReLU-2           [-1, 16, 28, 28]               0
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]           4,608
              ReLU-5           [-1, 32, 14, 14]               0
         MaxPool2d-6             [-1, 32, 7, 7]               0
            Linear-7                   [-1, 10]          15,680
Total params: 20,432
Trainable params: 20,432
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.32
Params size (MB): 0.08
Estimated Total Size (MB): 0.40
----------------------------------------------------------------
Accuracy for test images: 90% --> 86%
Pruned model summary
---------------------------

# **Quantize**

## Weight Quantization

In [23]:
q_model = None

def quantize_weights(load_model):
    global q_model
    q_model = deepcopy(load_model)
    sd = load_model.state_dict()
    q_sd = q_model.state_dict()
    cuda_available =torch.cuda.is_available()

    # Switches for quantization scheme:
                #   FullRange_Symmetric, FullRange_Asymmetric, OptimalRange_Symmetric
    config = {
        "conv1.weight" : {
            "enable_quantization" : True,
            #"numBits" : [4, 8, 12, 16, 32],
            "numBits" : 8,
            #"quantizationSchemes" : ["FullRange_Symmetric", "FullRange_Asymmetric", "OptimalRange_Symmetric", "OptimalRange_Asymmetric"]
            "quantizationSchemes" : "FullRange_Asymmetric"
        },

        "conv2.weight" : {
            "enable_quantization" : True,
            #"numBits" : [4, 8, 12, 16, 32],
            "numBits" : 4,
            #"quantizationSchemes" : ["FullRange_Symmetric", "FullRange_Asymmetric", "OptimalRange_Symmetric", "OptimalRange_Asymmetric"]
            "quantizationSchemes" : "OptimalRange_Symmetric"
        },

        "lin2.weight" : {
            "enable_quantization" : True,
            "numBits" : 4,
            #"quantizationSchemes" : ["FullRange_Symmetric", "FullRange_Asymmetric", "OptimalRange_Symmetric", "OptimalRange_Asymmetric"]
            "quantizationSchemes" : "FullRange_Asymmetric"
        }
    }

    # Iterate over layers
    for name in ["lin2.weight", "conv2.weight", "conv1.weight"]:
        print()
        if (not config[name]["enable_quantization"]):
            continue
        print("Quantizing ", name)
        numBits             = as_list(config[name]["numBits"])
        quantizationSchemes = as_list(config[name]["quantizationSchemes"])

        # Iterate over quantization schemes:
        for scheme in quantizationSchemes:
            print("  ", scheme)

            # Iterate over number of bits
            for n_bits in numBits:
                print("    ", n_bits, " bits")
                q_sd[name] = quantize(scheme, sd[name], bits=n_bits)

                if len(numBits) > 1:
                    accuracy = evaluateModelAccuracy(q_model, q_sd, test_loader)
                    print('        Test accuracy: %.2f %%' % (accuracy))

            if len(quantizationSchemes) > 1:
                accuracy = evaluateModelAccuracy(q_model, q_sd, test_loader)
                print('      Test accuracy: %.2f %%' % (accuracy))

    accuracy = evaluateModelAccuracy(q_model, q_sd, test_loader)
    print('Test accuracy: %.2f %%' % (accuracy))

load_model = None
for i in range(2):
    loadModel()
    quantize_weights(load_model)

    # Save weights after quantization
    q_model.save_weights("./lab3_quantized")

    pruneModel(module=q_model.conv2, dim=0, amount=21/32)
    pruneModel(module=q_model.conv1, dim=0, amount=4/16)

    # Make the pruning 'permanent' so it can be saved
    prune.remove(q_model.conv2, 'weight')
    prune.remove(q_model.conv1, 'weight')
    q_model.save_weights("./lab3_quantized_pruned")

    accuracy_pruned = get_modelAccuracy(q_model, test_loader)
    print('Test accuracy after pruning: %.2f %%' % (accuracy_pruned))

    if i == 0:
        pruneModel(module=q_model.conv2, dim=0, amount=1/11)
        accuracy_pruned = get_modelAccuracy(q_model, test_loader)
        print('Test accuracy after further pruning conv2: %.2f %%' % (accuracy_pruned))
    elif i == 1:
        pruneModel(module=q_model.conv1, dim=0, amount=1/12)
        accuracy_pruned = get_modelAccuracy(q_model, test_loader)
        print('Test accuracy after further pruning conv1: %.2f %%' % (accuracy_pruned))

# Save models to Drive
drive.mount('/content/drive')

!cp "./lab3_quantized" "/content/drive/My Drive"
!cp "./lab3_quantized_pruned" "/content/drive/My Drive"

drive.flush_and_unmount()

Mounted at /content/drive


  load_model.load_state_dict(torch.load('./pretrainedModel'))



Quantizing  lin2.weight
   FullRange_Asymmetric
     4  bits

Quantizing  conv2.weight
   OptimalRange_Symmetric
     4  bits

Quantizing  conv1.weight
   FullRange_Asymmetric
     8  bits
Test accuracy: 90.08 %
Test accuracy after pruning: 80.13 %
Test accuracy after further pruning conv2: 80.13 %
Mounted at /content/drive

Quantizing  lin2.weight
   FullRange_Asymmetric
     4  bits

Quantizing  conv2.weight
   OptimalRange_Symmetric
     4  bits

Quantizing  conv1.weight
   FullRange_Asymmetric
     8  bits
Test accuracy: 90.08 %
Test accuracy after pruning: 80.13 %
Test accuracy after further pruning conv1: 80.13 %
Mounted at /content/drive


## Activation and Weight Quantization

In [None]:
#ACTIVATION AND Wright Quantization
# Deepcopy the model for quantization
quantized_model = deepcopy(load_model)

# Simulate quantization on the deepcopy (for evaluation)
def apply_quantization_to_model(model, bits=4):
    model.eval()  # Set to evaluation mode

    # Quantize the weights of the model
    with torch.no_grad():  # No need to track gradients during quantization
        # Quantize the weights of each layer
        model.conv1.weight.data = UASquantize(model.conv1.weight.data, bits)
        model.conv2.weight.data = UASquantize(model.conv2.weight.data, bits)
        model.lin2.weight.data = UASquantize(model.lin2.weight.data, bits)

    # Apply quantization to activations after each layer
    def quantized_forward(x):
        x = model.conv1(x)
        x = model.act1(x)
        x = UASquantize(x, bits)  # Quantize activations after conv1
        x = model.pool1(x)

        x = model.conv2(x)
        x = model.act2(x)
        x = UASquantize(x, bits)  # Quantize activations after conv2
        x = model.pool2(x)

        x = x.view(x.size(0), -1)
        x = model.lin2(x)
        x = UASquantize(x, bits)  # Quantize activations after FC layer
        return x

    # Replace the model's forward pass with the quantized forward pass
    model.forward = quantized_forward

for n_bits in (4, 8, 12, 16):
    # Apply quantization to the copied model
    apply_quantization_to_model(quantized_model, bits=n_bits)

    # Now you can evaluate both models
    # Example: Evaluate the original model and the quantized model on some test data
    def evaluate_model(model, test_loader):
        model.eval()  # Set to evaluation mode

        accuracy = get_modelAccuracy(model, test_loader)
        return accuracy

    # Assuming `test_loader` is your DataLoader for test data
    original_accuracy = evaluate_model(load_model, test_loader)
    quantized_accuracy = evaluate_model(quantized_model, test_loader)

    print("With %d-bit quantization:" % (n_bits))
    print(f"  Original Model Accuracy: {original_accuracy:.2f}%")
    print(f"  Quantized Model Accuracy: {quantized_accuracy:.2f}%")

With 4-bit quantization:
  Original Model Accuracy: 90.39%
  Quantized Model Accuracy: 88.92%
With 8-bit quantization:
  Original Model Accuracy: 90.39%
  Quantized Model Accuracy: 89.79%
With 12-bit quantization:
  Original Model Accuracy: 90.39%
  Quantized Model Accuracy: 89.75%
With 16-bit quantization:
  Original Model Accuracy: 90.39%
  Quantized Model Accuracy: 89.75%
