<a href="https://colab.research.google.com/github/nazimorhan/Channel-Pruning-Guided-by-Classification-Loss-and-Feature-Importance/blob/master/Channel%20Pruning%20Guided%20by%20Classification%20Loss%20and%20Feature%20Importance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1 Import Required Modules

In [1]:
import matplotlib.pyplot as plt 
import numpy as np              
import time                     
import random                   
import copy

import torch
import torchvision
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

!pip install torchinfo
from torchinfo import summary

torch.cuda.empty_cache()

%matplotlib inline
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100 # 200 e.g. is really fine, but slower

torch.cuda.empty_cache()



# 2 Prepare Data

We will use CIFAR-10 dataset in order to test CPLI method which can be read from [here](https://arxiv.org/pdf/2003.06757.pdf). In the original paper, batch size is mentioned as 256. So we will use this values as batch size. But firstly a function for creating batches for a certain batch_size will be defined. 

In [2]:
def create_batches(batchSize):
  # Make required transformation which is necessary for the inputs to VGG19 network
  # Further info can be accessed from https://pytorch.org/hub/pytorch_vision_vgg/ 
  TF = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

  trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=TF)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize,
                                            shuffle=True, num_workers=2)

  testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=TF)
  testloader = torch.utils.data.DataLoader(testset, batch_size=batchSize,
                                          shuffle=False, num_workers=2)
  return trainloader, testloader

CIFAR10_classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

## 2.1 Enable GPU

From "Edit -> Notebook Settings -> Hardware accelerator" select GPU. With the following we will specify to PyTorch that we want to use the GPU.

In [3]:
if torch.cuda.is_available():
  print("Cuda (GPU support) is available and enabled!")
  device = torch.device("cuda")
else:
  print("Cuda (GPU support) is not available :(")
  device = torch.device("cpu")

Cuda (GPU support) is available and enabled!


# 3 Download, Finetune and Test Original Pretrained VGG19

In [4]:
# Create an instance of original pretrained VGG19
origVGG = torchvision.models.vgg19(pretrained=True)
# Visualize the network.
print(origVGG)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

## 3.1 Finetune Pre-Trained VGG19 on CIFAR-10

Since VGG19 is pretrained on ImageNet dataset, we have to finetune the network for CIFAR-10.

In [5]:
# Freeze the layers by setting requires_grad parameter to False.
newVGG = copy.deepcopy(origVGG)
for param in newVGG.parameters():
  param.requires_grad = False

In [6]:
newVGG.classifier[6] = None
newVGG.classifier[6] = nn.Linear(4096, 10)
for i,layer in enumerate(newVGG.classifier):
  if i in [0,3,6]:
    for param in layer.parameters():
        param.requires_grad=True
for param in newVGG.parameters():
  print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
True
True
True
True
True


Define a $\textbf{train}$ function in order to train the network

In [7]:
def train(model, criterion, optimizer, epochs, dataloader, scheduler, verbose=True):
  """
    Define the trainer function. We can use this for training any model.
    The parameter names are self-explanatory.

    Returns: the loss history.
  """
  loss_history = [] 
  for epoch in range(epochs):
    for i, data in enumerate(dataloader, 0):    
      
      # Our batch:
      inputs, labels = data
      inputs = inputs.to(device)
      labels = labels.to(device)

      # zero the gradients as PyTorch accumulates them
      optimizer.zero_grad()

      # Obtain the scores
      outputs = model(inputs)

      # Calculate loss
      loss = criterion(outputs.to(device), labels)

      # Backpropagate
      loss.backward()

      # Update the weights
      optimizer.step()

      loss_history.append(loss.item())
    
    scheduler.step()
    
    if verbose: print(f'Epoch {epoch} / {epochs}: avg. loss of last 5 iterations {np.sum(loss_history[:-6:-1])/5}')

  return loss_history

Create the learnable parameters and make those the parameter of SGD optimizer. Also create the instances of loss function and send the model to GPU.

In [None]:
def get_learnable_parameters(model):
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
    return params_to_update

batch_finetune = 256
trainloader, testloader = create_batches(batch_finetune)
weight_decay = 0.0001
parameters_to_update = get_learnable_parameters(newVGG)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(parameters_to_update, lr=0.1, momentum=0.9, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer,1)

newVGG = newVGG.to(device)
epochs = 10
loss_history = train(newVGG, criterion, optimizer, epochs, trainloader, scheduler)

Files already downloaded and verified
Files already downloaded and verified


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0 / 10: avg. loss of last 5 iterations 0.848323392868042
Epoch 1 / 10: avg. loss of last 5 iterations 0.5005512177944184
Epoch 2 / 10: avg. loss of last 5 iterations 0.3965842664241791
Epoch 3 / 10: avg. loss of last 5 iterations 0.361676287651062
Epoch 4 / 10: avg. loss of last 5 iterations 0.3926823943853378
Epoch 5 / 10: avg. loss of last 5 iterations 0.38514016270637513
Epoch 6 / 10: avg. loss of last 5 iterations 0.42957322001457215
Epoch 7 / 10: avg. loss of last 5 iterations 0.4373959720134735
Epoch 8 / 10: avg. loss of last 5 iterations 0.41702077984809877


In [None]:
testiter = iter(testloader)
data, label = next(testiter)
output = newVGG(data.to(device))
print(output.data)


In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = newVGG(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))