This notebook has custom AlexNet. It measures:


1.   Sparsity of weights(one-time)
2.   Layerwise CONV layer activation sparsities
3.   Accuracy of the model
4. Layerwise #MAC ops



# imports

In [17]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import time

# Device configuration
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'

# Data

In [18]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor()
#         normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# CIFAR10 dataset 
train_loader, valid_loader = get_train_valid_loader(data_dir = './data', batch_size = 1, augment = False, random_seed = 1)
test_loader = get_test_loader(data_dir = './data', batch_size = 1)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


# Model

**Custom conv2d function**

In [52]:
def myconv2d(input, weight, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):
    """
    Function to process an input with a standard convolution
    """
    mul_count = 0
#     print('input', input.shape)
#     print('wt', weight.shape)
    batch_size, in_channels, in_h, in_w = input.shape
    out_channels, in_channels, kh, kw = weight.shape
    out_h = int((in_h - kh + 2 * padding[0]) / stride[0] + 1)
    out_w = int((in_w - kw + 2 * padding[1]) / stride[1] + 1)
    unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
    inp_unf = unfold(input)
    w_ = weight.view(weight.size(0), -1).t()
    if bias is None:
        out_unf = inp_unf.transpose(1, 2).matmul(w_).transpose(1, 2)
        mul_count += batch_size*out_channels*out_h*out_w*in_channels*kh*kw
    else:
        out_unf = (inp_unf.transpose(1, 2).matmul(w_) + bias).transpose(1, 2)
        mul_count += batch_size*out_channels*out_h*out_w*in_channels*kh*kw
    out = out_unf.view(batch_size, out_channels, out_h, out_w)
#     print(out)
    return (out.float(), mul_count)
    # return out.float()

##############################################################################################

class comp_vector():
  def __init__(self, arr):
    self.x = arr.size(dim=2)
    self.y = arr.size(dim=1)
    self.c = arr.size(dim=0)
    self.index_vector = []
    self.data_vector = []
    for i in range(self.c):
      # print(arr[i])
      self.index_vector.append(np.flatnonzero(arr[i].cpu()))
      self.data_vector.append(arr[i].ravel()[self.index_vector[-1]])

    # index_vector = np.flatnonzero(arr)
    # data_vector = arr.ravel()[index_vector]

  def get_index_vector(self):
    return self.index_vector

  def get_data_vector(self):
    return self.data_vector


def conv_compressed(comp_inp, comp_wt, stride=1):
#     print('called conv_compressed')
    acc_x, acc_y, acc_c = int((comp_inp.x - comp_wt.x)//stride  + 1) , int((comp_inp.y - comp_wt.y)//stride  +1), comp_wt.c
#     print(acc_x, acc_y, acc_c)
    mult_count = 0
    # print(acc_x, acc_y, acc_c)
    acc_buf = torch.FloatTensor(acc_x, acc_y).zero_()
    inp_index_vector = comp_inp.get_index_vector()
    inp_data_vector = comp_inp.get_data_vector()
    wt_index_vector = comp_wt.get_index_vector()
    wt_data_vector = comp_wt.get_data_vector()
    # print(inp_index_vector[0])
    # print(len(inp_index_vector[0]))
    for c in range(acc_c):
      for i in range(len(inp_index_vector[c])):
        for j in range(len(wt_index_vector[c])):
          inp_x = inp_index_vector[c][i]//comp_inp.x
          inp_y = inp_index_vector[c][i]%comp_inp.y
          wt_x = wt_index_vector[c][j]//comp_wt.x
          wt_y = wt_index_vector[c][j]%comp_wt.y

          out_x = (inp_x - wt_x)
          out_y = (inp_y- wt_y)
          if out_x%stride==0 and out_y%stride==0:
            out_x = out_x//stride
            out_y = out_y//stride
            # print(out_x, out_y,c,i,j,)
            if 0<=out_x<acc_x and 0<=out_y<acc_y:
              # print("yes")
              acc_buf[out_x][out_y]+=float(inp_data_vector[c][i] * wt_data_vector[c][j])
              mult_count +=1
    
    return (acc_buf,mult_count)

def myconv2d_sparse(input, weight, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):
  input = torch.nn.functional.pad(input, (padding[1], padding[1], padding[0], padding[0]), "constant", 0)
#   print(input.size())
  comp_in = comp_vector(input[0])
  in_x = input.size(dim=3)
  in_y = input.size(dim=2)
  wt_x = weight.size(dim=3)
  wt_y = weight.size(dim=2)
  c = weight.size(dim=1)
  k = weight.size(dim=0)
  out = torch.empty(size=(1,k, int((in_x-wt_x)/stride[0]+1), int((in_y-wt_y)/stride[1]+1)))

  mult_count = 0
  for i in range(k):
    comp_wt = comp_vector(weight[i])
    out[0][i], num =conv_compressed(comp_in, comp_wt, stride[0])
    out[0][i] += bias[i]
    mult_count+=num
#   print(out)
  return (out,mult_count)

######################################################################################################

# ALGO 3
def compute_weight_list(kernel):    
    kernels = []
    filter_count = kernel.shape[0]
    depth = kernel.shape[1]
    height = kernel.shape[2]
    width = kernel.shape[3]
    for f in range(filter_count):
        weight_list = []
        for k in range(depth):
            for i in range(height):
                for j in range(width):
                    w = kernel[f][k][i][j]
                    if w < 0:
                        weight_list.append(tuple((w, k, i, j)))
        sorted_weight_list = sorted(weight_list, key = lambda x: x[0])
        kernels.append(sorted_weight_list)
    return kernels

def compute_conv_onlypred(img, weight_list, weights, r, c, bias=0):
    img_out_cell = 0
    conv_mult_count = 0
    depth = weights.shape[0]
    height = weights.shape[1]
    width = weights.shape[2]
    for k in range(depth):
        for i in range(width):
            for j in range(height):          
                if weights[k][i][j]>0:
                  #  and r+i<img.shape[2] and c+j<img.shape[3]
                    conv_mult_count += 1 
                    img_out_cell += img[0][k][r+i][c+j]*weights[k][i][j]

    img_out_cell+=bias
    
    for tup in weight_list:
        conv_mult_count += 1
        # if r+tup[2]>=img.shape[2] or c+tup[3]>=img.shape[3]:
        #   continue
        img_out_cell += tup[0]*img[0][tup[1]][r+tup[2]][c+tup[3]]
        if img_out_cell < 0:
            break
    return img_out_cell, conv_mult_count

def compute_filter_conv_onlypred(img, weight_list, weights, kernel_id,stride=(1,1), padding=(0,0), bias=0):
    width_out = int((img.shape[3]-weights.shape[2])/stride[1]+1)
    height_out = int((img.shape[2]-weights.shape[1])/stride[0]+1)
    img_out_channel = torch.zeros(width_out,height_out)
    filter_mult_count = 0
    # print(img.shape[2]+2*padding[0]-weights.shape[1], img.shape[3]+2*padding[1]-weights.shape[2])
    for r in range(0,img.shape[2]-weights.shape[1]+1,stride[0]):
        for c in range(0,img.shape[3]-weights.shape[2]+1,stride[1]):
            r_out = int(r/stride[0])
            c_out = int(c/stride[1])
            # print(r_out, c_out)
            img_out_channel[r_out][c_out], mult_count = compute_conv_onlypred(img, weight_list, weights, r, c, bias)
            # img_out_channel[r_out][c_out] += bias
            filter_mult_count += mult_count
    return img_out_channel, filter_mult_count

def myconv2d_onlypred(img, weights, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):
    img = torch.nn.functional.pad(img, (padding[1], padding[1], padding[0], padding[0]), "constant", 0)
    layer_mult_count = 0
    filter_count = weights.shape[0]
    depth = weights.shape[1]
    height = weights.shape[2]
    width = weights.shape[3]
    channels_out=filter_count
    width_out = int((img.shape[3]-width)/stride[1]+1)
    height_out = int((img.shape[2]-height)/stride[0]+1)
    img_conv_output = torch.zeros(1, channels_out, width_out, height_out)
    filters_list = compute_weight_list(weights)
    for kernel_id in range(filter_count):
        if kernel_id%8==0:
            print("kernel_id", kernel_id)
        weight_list = filters_list[kernel_id]
        img_conv_channel, mult_count = compute_filter_conv_onlypred(img, weight_list, weights[kernel_id], kernel_id, stride, padding, bias[kernel_id])
        img_conv_output[0][kernel_id] = img_conv_channel
        layer_mult_count += mult_count
    return (img_conv_output, layer_mult_count)


######################################################################################


# ALGO 4
class comp_vector_pred():
  def __init__(self, arr):
    self.y = arr.size(dim=2)
    self.x = arr.size(dim=1)
    self.c = arr.size(dim=0)
    self.pos_vector = [] #stores tuples of (data, index)
    self.neg_vector = []

    for k in range(self.c):
            for i in range(self.x):
                for j in range(self.y):
                    w = arr[k][i][j]
                    if w > 0:
                      self.pos_vector.append(tuple((w, k, i, j)))
                    elif w<0:
                      self.neg_vector.append(tuple((w, k, i, j)))

    self.neg_vector = sorted(self.neg_vector, key = lambda x: x[0])

  def get_pos_vector(self):
    return self.pos_vector

  def get_neg_vector(self):
    return self.neg_vector


def compute_conv_sparsepred(input, weight, comp_wt, r, c, bias=0):
  img_out_cell = 0
  conv_mult_count = 0
  pos = comp_wt.get_pos_vector()
  neg = comp_wt.get_neg_vector()

  x = weight.shape[1]
  y = weight.shape[2]
  k = weight.shape[0]

  mult_nonzero = 0
  for channel in range(k):
    inp_window = input[0][channel][r:r+x, c:c+y]
    inp_nonzero = np.flatnonzero(inp_window)
    wt_nonzero = np.flatnonzero(weight[channel])
    common = sum(X == Y for X, Y in zip(inp_nonzero, wt_nonzero))
    mult_nonzero += common

  for tup in pos:
    # if(r+tup[2]>=input.shape[2] or c+tup[3]>=input.shape[3]):
    #   continue
    conv_mult_count += 1
    img_out_cell += tup[0]*input[0][tup[1]][r+tup[2]][c+tup[3]]

  img_out_cell+=bias

  for tup in neg:
    if img_out_cell < 0:
      break
    # if(r+tup[2]>=input.shape[2] or c+tup[3]>=input.shape[3]):
    #   continue
    conv_mult_count += 1
    img_out_cell += tup[0]*input[0][tup[1]][r+tup[2]][c+tup[3]]

  return img_out_cell, conv_mult_count, mult_nonzero


def compute_filter_conv_sparsepred(input, weights, comp_wt, width_out, height_out,stride=(1,1), padding=(0,0), bias=0):
#     print('called compute_filter_conv')
    img_out_channel = torch.zeros(width_out, height_out)
    filter_mult_count = 0
    filter_calc_mult = 0
    for r in range(0,input.shape[2]-weights.shape[1]+1,stride[0]):
        for c in range(0,input.shape[3]-weights.shape[2]+1,stride[1]):
            r_out = int(r/stride[0])
            c_out = int(c/stride[1])
            img_out_channel[r_out][c_out], mult_count, calc_mult = compute_conv_sparsepred(input, weights,comp_wt, r, c, bias)
            filter_mult_count += mult_count
            filter_calc_mult += calc_mult
    return img_out_channel, filter_mult_count, filter_calc_mult


def myconv2d_sparse_pred(input, weight, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):
    input = torch.nn.functional.pad(input, (padding[1], padding[1], padding[0], padding[0]), "constant", 0)
    in_x = input.shape[2]
    in_y = input.shape[3]
    wt_x = weight.shape[2]
    wt_y = weight.shape[3]
    c = weight.shape[1]
    filter_count = weight.shape[0]
    w = int((in_x-wt_x)//stride[0]+1)
    h = int((in_y-wt_y)//stride[1]+1)
    out = torch.empty(size=(1, filter_count, w, h))

    mult_count = 0
    calc_mult = 0
    for i in range(filter_count):
        comp_wt = comp_vector_pred(weight[i])
        out[0][i], num1, num2 =compute_filter_conv_sparsepred(input, weight[i], comp_wt, w, h,stride, padding, bias[i])
        # out[0][i] += bias[i]
        mult_count+=num1
        calc_mult+=num2
    # mult_count is predictive sparse(weight only)
    # calc_mult is baseline 2(sparse non-predictive)
#     print(out)
    return (out,mult_count)
###################################################################################

# ALGO 5
class comp_vector_pred_twosided():
  def __init__(self, arr):
    self.x = arr.size(dim=2)
    self.y = arr.size(dim=1)
    self.c = arr.size(dim=0)
    self.pos_vector = [] #stores tuples of (data, index)
    self.neg_vector = []

    for k in range(self.c):
            for i in range(self.y):
                for j in range(self.x):
                    w = arr[k][i][j]
                    if w > 0:
                      self.pos_vector.append(tuple((w, k, i, j)))
                    elif w<0:
                      self.neg_vector.append(tuple((w, k, i, j)))

    self.neg_vector = sorted(self.neg_vector, key = lambda x: x[0])

  def get_pos_vector(self):
    return self.pos_vector

  def get_neg_vector(self):
    return self.neg_vector


def compute_conv_sparsepred_twosided(input, weight, comp_wt, r, c,bias=0):
  img_out_cell = 0
  conv_mult_count = 0
  pos = comp_wt.get_pos_vector()
  neg = comp_wt.get_neg_vector()

  x = weight.shape[1]
  y = weight.shape[2]
  k = weight.shape[0]

  mult_nonzero = 0
  for channel in range(k):
    inp_window = input[0][channel][r:r+x, c:c+y]
    inp_nonzero = np.flatnonzero(inp_window)
    wt_nonzero = np.flatnonzero(weight[channel])
    common = sum(X == Y for X, Y in zip(inp_nonzero, wt_nonzero))
    mult_nonzero += common

  for tup in pos:
    if(input[0][tup[1]][r+tup[2]][c+tup[3]]==0):
      #  or r+tup[2]>=input.shape[2] or c+tup[3]>=input.shape[3]
      continue
    conv_mult_count += 1
    img_out_cell += tup[0]*input[0][tup[1]][r+tup[2]][c+tup[3]]

  img_out_cell+=bias

  # idx = 0
  # while img_out_cell>=0 and idx<len(neg):
  #   tup = neg[idx]
  #   if(input[0][tup[1]][r+tup[2]][c+tup[3]]==0):
  #     continue
  #   conv_mult_count += 1
  #   img_out_cell += tup[0]*input[0][tup[1]][r+tup[2]][c+tup[3]]
  #   idx+=1

  for tup in neg:
    if(input[0][tup[1]][r+tup[2]][c+tup[3]]==0):
      #  or r+tup[2]>=input.shape[2] or c+tup[3]>=input.shape[3]
      continue
    if img_out_cell < 0:
      break
    conv_mult_count += 1
    img_out_cell += tup[0]*input[0][tup[1]][r+tup[2]][c+tup[3]]

  return img_out_cell, conv_mult_count, mult_nonzero


def compute_filter_conv_sparsepred_twosided(input, weights, comp_wt, width_out, height_out,stride=(1,1), padding=(0,0), bias=0):
    img_out_channel = torch.zeros(width_out, height_out)
    filter_mult_count = 0
    filter_calc_mult = 0
    # print(input.shape)
    # print(weights.shape)
    # print(len(stride))
    # print(len(padding))
    for r in range(0,input.shape[2]-weights.shape[1]+1,stride[0]):
        for c in range(0,input.shape[3]-weights.shape[2]+1,stride[1]):
            r_out = int(r/stride[0])
            c_out = int(c/stride[1])
            img_out_channel[r_out][c_out], mult_count, calc_mult = compute_conv_sparsepred_twosided(input, weights,comp_wt, r, c, bias)
            # img_out_channel[r][c] += bias 
            # Bias added in compute_conv_sparsepred_twosided
            filter_mult_count += mult_count
            filter_calc_mult += calc_mult
    return img_out_channel, filter_mult_count, filter_calc_mult


def myconv2d_sparse_pred_twosided(input, weight, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):
  input = torch.nn.functional.pad(input, (padding[1], padding[1], padding[0], padding[0]), "constant", 0)
  in_x = input.shape[2]
  in_y = input.shape[3]
  wt_x = weight.shape[2]
  wt_y = weight.shape[3]
  c = weight.shape[1]
  filter_count = weight.shape[0]
  w = int((in_x-wt_x)//stride[0]+1)
  h = int((in_y-wt_y)//stride[1]+1)
  # print(w,h)
  out = torch.empty(size=(1, filter_count, w, h))

  mult_count = 0
  calc_mult = 0
  for i in range(filter_count):
    comp_wt = comp_vector_pred_twosided(weight[i])
    out[0][i], num1, num2 =compute_filter_conv_sparsepred_twosided(input, weight[i], comp_wt, w, h,stride=stride,padding=padding,bias=bias[i])
    mult_count+=num1
    calc_mult+=num2
  
  return (out,mult_count)


**Defining custom Conv2D Layer**

In [53]:
class Custom_Conv2d(torch.nn.modules.conv._ConvNd):
    """
    Implements a standard convolution layer that can be used as a regular module
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=True, padding_mode='zeros'):
        kernel_size = (kernel_size, kernel_size)
        stride = (stride, stride)
        padding = (padding, padding)
        dilation = (dilation, dilation)
        super(Custom_Conv2d, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            False, (0, 0), groups, bias, padding_mode)

    def conv2d_forward(self, input, weight):
        return myconv2d_onlypred(input, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, input):
        return self.conv2d_forward(input, self.weight)


**Defining custom AlexNet**

In [54]:
# empty arrays for storing activation sparsities
c1 = []
c2 = []
c3 = []
c4 = []
c5 = []

In [55]:
# empty arrays for storing #MACops per layer
m1 = []
m2 = []
m3 = []
m4 = []
m5 = []

In [56]:
class CustomAlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomAlexNet, self).__init__()
        self.features = nn.Sequential(
            Custom_Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2),
            Custom_Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2),
            Custom_Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            Custom_Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            Custom_Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(6, 6))

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Linear(1024, num_classes))
        
    def forward(self, x):
        # print('Sparsity of CONV1 activations: ', (1 - torch.count_nonzero(x)/torch.numel(x)).item())
        c1.append((1 - torch.count_nonzero(x)/torch.numel(x)).item())
        out, macops1 = self.features[0](x)             
        m1.append(macops1)
        out = out.to(device)
#         print('features layer 0 done')
        out = self.features[1](out)
        out = self.features[2](out)
        
        # print('Sparsity of CONV2 activations: ', (1 - torch.count_nonzero(out)/torch.numel(out)).item())
        c2.append((1 - torch.count_nonzero(out)/torch.numel(out)).item())
        out, macops2 = self.features[3](out)
        m2.append(macops2)
        out = out.to(device)
#         print('features layer 3 done')
        out = self.features[4](out)
        out = self.features[5](out)
        
        # print('Sparsity of CONV3 activations: ', (1 - torch.count_nonzero(out)/torch.numel(out)).item())
        c3.append((1 - torch.count_nonzero(out)/torch.numel(out)).item())
        out, macops3 = self.features[6](out)
        m3.append(macops3)
        out = out.to(device)
#         print('features layer 6 done')
        out = self.features[7](out)
        
        # print('Sparsity of CONV4 activations: ', (1 - torch.count_nonzero(out)/torch.numel(out)).item())
        c4.append((1 - torch.count_nonzero(out)/torch.numel(out)).item())
        out, macops4 = self.features[8](out)
        m4.append(macops4)
        out = out.to(device)
#         print('features layer 8 done')
        out = self.features[9](out)
        
        c5.append((1 - torch.count_nonzero(out)/torch.numel(out)).item())
        # print('Sparsity of CONV5 activations: ', (1 - torch.count_nonzero(out)/torch.numel(out)).item())
        out, macops5 = self.features[10](out)
        m5.append(macops5)
        out = out.to(device)
#         print('features layer 10 done')
        out = self.features[11](out)
        out = self.features[12](out)

        out = self.avgpool(out)
        out = out.reshape(out.size(0), -1)
        out = self.classifier(out)
        return out



**Instantiating a custom AlexNet**

In [57]:
cust_alexnet = CustomAlexNet(10)

**Loading pretrained AlexNet weights into our custom AlexNet**

In [58]:
# alexnet = torch.load(r'alexnet.pth')
cust_alexnet.load_state_dict(torch.load(r'alexnet_unnormalised.pth'))
cust_alexnet.to(device)
cust_alexnet.eval()

CustomAlexNet(
  (features): Sequential(
    (0): Custom_Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Custom_Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Custom_Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Custom_Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Custom_Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU()
    

# Measurements

**Accuracy & Layer-wise Activation Sparsities(Dynamic)**

In [59]:
def test_model(cust_alexnet):
  cust_alexnet.to(device)
  cust_alexnet.eval()
#   print('sent model to device')
  correct = 0
  total = 0
  m1.clear()
  m2.clear()
  m3.clear()
  m4.clear()
  m5.clear()
  c1.clear()
  c2.clear()
  c3.clear()
  c4.clear()
  c5.clear()
  with torch.no_grad():
      for data in test_loader:
          if(total>=1):
            break
          images, labels = data[0].to(device), data[1].to(device)
#           print('sent image and labels to device')
          outputs = cust_alexnet(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  print('Accuracy of the network on the test images: %.2f %%' % (100 * correct / total))
  print("CONV1 #MACops(avg):", sum(m1)/len(m1))
  print("CONV2 #MACops(avg):", sum(m2)/len(m2))
  print("CONV3 #MACops(avg):", sum(m3)/len(m3))
  print("CONV4 #MACops(avg):", sum(m4)/len(m4))
  print("CONV5 #MACops(avg):", sum(m5)/len(m5))

  print("CONV1 activation sparsity(avg):", sum(c1)/len(c1))
  print("CONV2 activation sparsity(avg):", sum(c2)/len(c2))
  print("CONV3 activation sparsity(avg):", sum(c3)/len(c3))
  print("CONV4 activation sparsity(avg):", sum(c4)/len(c4))
  print("CONV5 activation sparsity(avg):", sum(c5)/len(c5))
  # sum(c2)/len(c2)
  print(len(c1))

**Weight Sparsity (static)**

In [60]:
def get_alex_w_sparsities(cust_alexnet):
  conv_indices = [0, 3, 6, 8, 10]

  for i in range(5):
    layer_index = conv_indices[i]

    print(
        "Sparsity in conv{:}.weight: {:.2f}%".format(i+1, 
            100. * float(torch.sum(cust_alexnet.features[layer_index].weight == 0))
            / float(cust_alexnet.features[layer_index].weight.nelement())
        )
    )

In [61]:
get_alex_w_sparsities(cust_alexnet)

Sparsity in conv1.weight: 0.00%
Sparsity in conv2.weight: 0.00%
Sparsity in conv3.weight: 0.00%
Sparsity in conv4.weight: 0.00%
Sparsity in conv5.weight: 0.00%


# Pruning

In [62]:
import torch.nn.utils.prune as prune

In [33]:
alex_90 = CustomAlexNet(10)
alex_90.load_state_dict(torch.load(r'alexnet.pth'))

for name, module in alex_90.named_modules():
    # prune 90% of connections in all 2D-conv layers
    if isinstance(module, Custom_Conv2d):
        prune.l1_unstructured(module, name='weight', amount=0.25)

CustomAlexNet(
  (features): Sequential(
    (0): Custom_Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Custom_Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Custom_Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Custom_Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Custom_Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU()
    

In [36]:
get_alex_w_sparsities(alex_90)

Sparsity in conv1.weight: 25.00%
Sparsity in conv2.weight: 25.00%
Sparsity in conv3.weight: 25.00%
Sparsity in conv4.weight: 25.00%
Sparsity in conv5.weight: 25.00%


In [37]:
test_model(alex_90)

Accuracy of the network on the 10000 test images: 83.35 %
CONV1 #MACops(avg): 1.0
CONV2 #MACops(avg): 1.0
CONV3 #MACops(avg): 1.0
CONV4 #MACops(avg): 1.0
CONV5 #MACops(avg): 1.0
CONV1 activation sparsity(avg): 0.0
CONV2 activation sparsity(avg): 0.4914952580094337
CONV3 activation sparsity(avg): 0.7596199704170227
CONV4 activation sparsity(avg): 0.8848602611422539
CONV5 activation sparsity(avg): 0.8694982788264751
10000


### Global Pruning

In [63]:
model = CustomAlexNet(10)
model.load_state_dict(torch.load(r'alexnet_unnormalised.pth'))
model.to(device)
model.eval()

parameters_to_prune = (
    (model.features[0], 'weight'),
    (model.features[3], 'weight'),
    (model.features[6], 'weight'),
    (model.features[8], 'weight'),
    (model.features[10], 'weight'),
)

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.0,
)

In [64]:
get_alex_w_sparsities(model)

Sparsity in conv1.weight: 0.00%
Sparsity in conv2.weight: 0.00%
Sparsity in conv3.weight: 0.00%
Sparsity in conv4.weight: 0.00%
Sparsity in conv5.weight: 0.00%


In [65]:
start = time.time()
test_model(model)
print(time.time()-start)

kernel_id 0
kernel_id 8
kernel_id 16
kernel_id 24
kernel_id 32
kernel_id 40
kernel_id 48
kernel_id 56
kernel_id 0
kernel_id 8
kernel_id 16
kernel_id 24
kernel_id 32
kernel_id 40
kernel_id 48
kernel_id 56
kernel_id 64
kernel_id 72
kernel_id 80
kernel_id 88
kernel_id 96
kernel_id 104
kernel_id 112
kernel_id 120
kernel_id 128
kernel_id 136
kernel_id 144
kernel_id 152
kernel_id 160
kernel_id 168
kernel_id 176
kernel_id 184
kernel_id 0
kernel_id 8
kernel_id 16
kernel_id 24
kernel_id 32
kernel_id 40
kernel_id 48
kernel_id 56
kernel_id 64
kernel_id 72
kernel_id 80
kernel_id 88
kernel_id 96
kernel_id 104
kernel_id 112
kernel_id 120
kernel_id 128
kernel_id 136
kernel_id 144
kernel_id 152
kernel_id 160
kernel_id 168
kernel_id 176
kernel_id 184
kernel_id 192
kernel_id 200
kernel_id 208
kernel_id 216
kernel_id 224
kernel_id 232
kernel_id 240
kernel_id 248
kernel_id 256
kernel_id 264
kernel_id 272
kernel_id 280
kernel_id 288
kernel_id 296
kernel_id 304
kernel_id 312
kernel_id 320
kernel_id 328
kern