In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader


import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision import models
from torchmetrics import Accuracy

import quantus
import captum
from captum.attr import Saliency, IntegratedGradients, NoiseTunnel
from cleverhans.torch.attacks.projected_gradient_descent import (projected_gradient_descent)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import random
import copy
import gc
import math

import warnings
warnings.filterwarnings('ignore')

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
epochs = 120

batch_size = 128

classes = ('plane', 'car' , 'bird','cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [31]:
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                                      std=[0.229, 0.224, 0.225])
# cifar_transforms = transforms.Compose([transforms.RandomHorizontalFlip(),
#             transforms.RandomCrop(32, 4),
#             transforms.ToTensor(),
#             normalize,])

cifar_transforms = transforms.Compose([transforms.ToTensor()])

In [33]:
train_dataset = torchvision.datasets.CIFAR10(root= './datasets', train = True, download=True, transform = cifar_transforms)
test_dataset =  torchvision.datasets.CIFAR10(root= './datasets', train = False, download=True, transform = cifar_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [34]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True) # num_workers=4,
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, pin_memory=True)

In [37]:
class VGG(nn.Module):
    def __init__(self, features):
        super(VGG,self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512,512),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512,512),
            nn.ReLU(True),
            nn.Linear(512,10)
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def make_layers(cfg):
    layers = []
    in_channels =3
    for out_channels in cfg:
        if out_channels == 'M':
            layers += [nn.MaxPool2d(kernel_size = 2, stride =2)]
        else:
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size = 3, padding =1)
            layers += [conv2d, nn.ReLU(inplace = True)]
            in_channels = out_channels
    return nn.Sequential(*layers)

cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

def vgg16():
    return VGG(make_layers(cfg))

In [39]:
model = vgg16()
learning_rate = 0.05
criterion = nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 5e-4)

In [41]:
def evaluate_model(model, data, device):
    model.eval()
    logits = torch.Tensor().to(device)
    targets = torch.LongTensor().to(device)

    with torch.no_grad():
        for x_batch, y_batch in data:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            logits = torch.cat([logits, model(x_batch)])
            targets = torch.cat([targets, y_batch])
    
    return torch.nn.functional.softmax(logits, dim=1), targets

In [43]:
def train_normal(model, epochs):
    model.train()
    for epoch in range(epochs):
        for x_batch, y_batch in train_dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate model!
        if epochs%10==0:
            predictions, labels = evaluate_model(model, test_dataloader, device)
            test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
            print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [None]:
model_normal = train_normal(model = model.to(device), epochs = epochs)

Epoch 1/120 - test accuracy: 13.47% and CE loss 2.21
Epoch 2/120 - test accuracy: 34.14% and CE loss 1.58
Epoch 3/120 - test accuracy: 43.01% and CE loss 1.49
Epoch 4/120 - test accuracy: 53.46% and CE loss 1.07
Epoch 5/120 - test accuracy: 59.72% and CE loss 1.23


In [None]:
# Model to GPU and eval mode.
model_normal.to(device)
model_normal.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_normal, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "vgg_cifar.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving the model: {MODEL_SAVE_PATH}")
torch.save(obj=model_normal.state_dict(), f=MODEL_SAVE_PATH)

# Adversarial Training

In [73]:
model = vgg16()
learning_rate = 0.05
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9, weight_decay = 5e-4)
eps= [0.01, 0.03, 0.06, 0.1, 0.3, 0.5]

In [75]:
def evaluate_model(model, data, device):
    model.eval()
    logits = torch.Tensor().to(device)
    targets = torch.LongTensor().to(device)

    with torch.no_grad():
        for x_batch, y_batch in data:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            logits = torch.cat([logits, model(x_batch)])
            targets = torch.cat([targets, y_batch])
    
    return torch.nn.functional.softmax(logits, dim=1), targets

In [77]:
def train_adv(model, epsilon, epochs):
    model.train()
    eps = epsilon
    for epoch in range(epochs):
        for x_batch, y_batch in train_dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            x_batch = projected_gradient_descent(model, x_batch, eps, eps/10, 40, np.inf)
            optimizer.zero_grad()
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate model!
        if epochs%10==0:
            predictions, labels = evaluate_model(model, test_dataloader, device)
            test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
            print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [83]:
model_adversarial = train_adv(model=model.to(device),
                    epsilon = eps[0], 
                    epochs=epochs)

Epoch 1/100 - test accuracy: 21.61% and CE loss 1.97
Epoch 2/100 - test accuracy: 33.43% and CE loss 1.86
Epoch 3/100 - test accuracy: 34.10% and CE loss 1.79
Epoch 4/100 - test accuracy: 53.47% and CE loss 1.36
Epoch 5/100 - test accuracy: 61.15% and CE loss 1.08
Epoch 6/100 - test accuracy: 67.06% and CE loss 1.02
Epoch 7/100 - test accuracy: 69.84% and CE loss 0.79
Epoch 8/100 - test accuracy: 70.26% and CE loss 0.79
Epoch 9/100 - test accuracy: 74.50% and CE loss 0.68
Epoch 10/100 - test accuracy: 76.50% and CE loss 0.62
Epoch 11/100 - test accuracy: 77.97% and CE loss 0.81
Epoch 12/100 - test accuracy: 77.82% and CE loss 0.66
Epoch 13/100 - test accuracy: 75.05% and CE loss 0.64
Epoch 14/100 - test accuracy: 75.97% and CE loss 0.68
Epoch 15/100 - test accuracy: 79.45% and CE loss 0.62
Epoch 16/100 - test accuracy: 77.86% and CE loss 0.55
Epoch 17/100 - test accuracy: 80.09% and CE loss 0.72
Epoch 18/100 - test accuracy: 78.69% and CE loss 0.75
Epoch 19/100 - test accuracy: 82.92% 

In [85]:
# Model to GPU and eval mode.
model_adversarial.to(device)
model_adversarial.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_adversarial, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

Model test accuracy: 86.29%


In [87]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "vgg_cifar_adv.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving the model: {MODEL_SAVE_PATH}")
torch.save(obj=model_adversarial.state_dict(), f=MODEL_SAVE_PATH)

Saving the model: models\vgg_cifar_adv.pth


# L1 Unstructured

In [128]:
model = vgg16()
epochs = 100
criterion = nn.CrossEntropyLoss(reduction="mean")
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum = 0.9, weight_decay = 5e-4)

In [72]:
def count_params(model):
    total_params = 0
    for layer_names, param in model.named_parameters():
        total_params += torch.count_nonzero(param.data)
    return total_params

In [74]:
orig_params = count_params(model)
print(f"Unpruned VGG-16 model has {orig_params} trainable parameters")

Unpruned VGG-16 model has 15240904 trainable parameters


In [76]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: features.0.weight & param.shape = torch.Size([64, 3, 3, 3])
layer.name: features.0.bias & param.shape = torch.Size([64])
layer.name: features.2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: features.2.bias & param.shape = torch.Size([64])
layer.name: features.5.weight & param.shape = torch.Size([128, 64, 3, 3])
layer.name: features.5.bias & param.shape = torch.Size([128])
layer.name: features.7.weight & param.shape = torch.Size([128, 128, 3, 3])
layer.name: features.7.bias & param.shape = torch.Size([128])
layer.name: features.10.weight & param.shape = torch.Size([256, 128, 3, 3])
layer.name: features.10.bias & param.shape = torch.Size([256])
layer.name: features.12.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.12.bias & param.shape = torch.Size([256])
layer.name: features.14.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.14.bias & param.shape = torch.Size([256])
layer.name: features.17.weight & param.shap

In [80]:
for layer_name in model.state_dict().keys():
    print(layer_name, model.state_dict()[layer_name].shape)

features.0.weight torch.Size([64, 3, 3, 3])
features.0.bias torch.Size([64])
features.2.weight torch.Size([64, 64, 3, 3])
features.2.bias torch.Size([64])
features.5.weight torch.Size([128, 64, 3, 3])
features.5.bias torch.Size([128])
features.7.weight torch.Size([128, 128, 3, 3])
features.7.bias torch.Size([128])
features.10.weight torch.Size([256, 128, 3, 3])
features.10.bias torch.Size([256])
features.12.weight torch.Size([256, 256, 3, 3])
features.12.bias torch.Size([256])
features.14.weight torch.Size([256, 256, 3, 3])
features.14.bias torch.Size([256])
features.17.weight torch.Size([512, 256, 3, 3])
features.17.bias torch.Size([512])
features.19.weight torch.Size([512, 512, 3, 3])
features.19.bias torch.Size([512])
features.21.weight torch.Size([512, 512, 3, 3])
features.21.bias torch.Size([512])
features.24.weight torch.Size([512, 512, 3, 3])
features.24.bias torch.Size([512])
features.26.weight torch.Size([512, 512, 3, 3])
features.26.bias torch.Size([512])
features.28.weight t

In [100]:
def compute_sparsity(model):
    conv1_sparsity = (torch.sum(model.features[0].weight == 0) / model.features[0].weight.nelement()) * 100
    conv2_sparsity = (torch.sum(model.features[2].weight == 0) / model.features[2].weight.nelement()) * 100
    conv3_sparsity = (torch.sum(model.features[5].weight == 0) / model.features[5].weight.nelement()) * 100
    conv4_sparsity = (torch.sum(model.features[7].weight == 0) / model.features[7].weight.nelement()) * 100
    conv5_sparsity = (torch.sum(model.features[10].weight == 0) / model.features[10].weight.nelement()) * 100
    conv6_sparsity = (torch.sum(model.features[12].weight == 0) / model.features[12].weight.nelement()) * 100
    conv7_sparsity = (torch.sum(model.features[14].weight == 0) / model.features[14].weight.nelement()) * 100
    conv8_sparsity = (torch.sum(model.features[17].weight == 0) / model.features[17].weight.nelement()) * 100
    conv9_sparsity = (torch.sum(model.features[19].weight == 0) / model.features[19].weight.nelement()) * 100
    conv10_sparsity = (torch.sum(model.features[21].weight == 0) / model.features[21].weight.nelement()) * 100
    conv11_sparsity = (torch.sum(model.features[24].weight == 0) / model.features[24].weight.nelement()) * 100
    conv12_sparsity = (torch.sum(model.features[26].weight == 0) / model.features[26].weight.nelement()) * 100
    conv13_sparsity = (torch.sum(model.features[28].weight == 0) / model.features[28].weight.nelement()) * 100
    fc1_sparsity = (torch.sum(model.classifier[1].weight == 0) / model.classifier[1].weight.nelement()) * 100
    fc2_sparsity = (torch.sum(model.classifier[4].weight == 0) / model.classifier[4].weight.nelement()) * 100
    op_sparsity = (torch.sum(model.classifier[6].weight == 0) / model.classifier[6].weight.nelement()) * 100

    num = torch.sum(model.features[0].weight == 0) + torch.sum(model.features[2].weight == 0) + torch.sum(model.features[5].weight == 0) + torch.sum(model.features[7].weight == 0) + torch.sum(model.features[10].weight == 0) + torch.sum(model.features[12].weight == 0) + torch.sum(model.features[14].weight == 0) + torch.sum(model.features[17].weight == 0) + torch.sum(model.features[19].weight == 0) + torch.sum(model.features[21].weight == 0)+ torch.sum(model.features[24].weight == 0) + torch.sum(model.features[26].weight == 0) + torch.sum(model.features[28].weight == 0) + torch.sum(model.classifier[1].weight == 0) + torch.sum(model.classifier[4].weight == 0) + torch.sum(model.classifier[6].weight == 0)
    denom = model.features[0].weight.nelement() + model.features[2].weight.nelement() + model.features[5].weight.nelement() + model.features[7].weight.nelement() + model.features[10].weight.nelement() + model.features[12].weight.nelement() + model.features[14].weight.nelement() + model.features[17].weight.nelement() + model.features[19].weight.nelement() + model.features[21].weight.nelement() + model.features[24].weight.nelement() + model.features[26].weight.nelement() + model.features[28].weight.nelement() + model.classifier[1].weight.nelement() + model.classifier[4].weight.nelement() + model.classifier[6].weight.nelement()
    global_sparsity = num/denom * 100
    return global_sparsity

In [102]:
print(f"VGG-16 global sparsity = {compute_sparsity(model):.2f}%")

VGG-16 global sparsity = 1.91%


In [120]:
import torch.nn.utils.prune as prune
for name, module in model.named_modules():
    # prune 20% of weights/connections in for all hidden layaers-
    if isinstance(module, torch.nn.Linear) and name != 'classifier.6':
        prune.l1_unstructured(module = module, name = 'weight', amount = 0.5)
    
    # prune 10% of weights/connections for output layer-
    elif isinstance(module, torch.nn.Linear) and name == 'classifier.6':
        prune.l1_unstructured(module = module, name = 'weight', amount = 0.5)

In [122]:
print(f"VGG-16 global sparsity = {compute_sparsity(model):.2f}%")

VGG-16 global sparsity = 3.14%


In [124]:
def evaluate_model(model, data, device):
    model.eval()
    logits = torch.Tensor().to(device)
    targets = torch.LongTensor().to(device)

    with torch.no_grad():
        for x_batch, y_batch in data:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            logits = torch.cat([logits, model(x_batch)])
            targets = torch.cat([targets, y_batch])
    
    return torch.nn.functional.softmax(logits, dim=1), targets

In [126]:
def train_l1_prune(model, epochs):
    model.train()
    for epoch in range(epochs):
        for x_batch, y_batch in train_dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate model!
        if epochs%10==0:
            predictions, labels = evaluate_model(model, test_dataloader, device)
            test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
            print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [None]:
model_l1_unstructured = train_l1_prune(model = model.to(device), epochs = epochs)

In [None]:
# Model to GPU and eval mode.
model_l1_unstructured.to(device)
model_l1_unstructured.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_l1_unstructured, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "vgg_cifar_l1_unstructured.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving the model: {MODEL_SAVE_PATH}")
torch.save(obj=model_l1_unstructured.state_dict(), f=MODEL_SAVE_PATH)

# Global Pruning

In [131]:
model = vgg16()
epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum = 0.9, weight_decay = 5e-4)

In [133]:
orig_params = count_params(model)
print(f"Unpruned VGG-16 model has {orig_params} trainable parameters")

Unpruned VGG-16 model has 15240906 trainable parameters


In [135]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: features.0.weight & param.shape = torch.Size([64, 3, 3, 3])
layer.name: features.0.bias & param.shape = torch.Size([64])
layer.name: features.2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: features.2.bias & param.shape = torch.Size([64])
layer.name: features.5.weight & param.shape = torch.Size([128, 64, 3, 3])
layer.name: features.5.bias & param.shape = torch.Size([128])
layer.name: features.7.weight & param.shape = torch.Size([128, 128, 3, 3])
layer.name: features.7.bias & param.shape = torch.Size([128])
layer.name: features.10.weight & param.shape = torch.Size([256, 128, 3, 3])
layer.name: features.10.bias & param.shape = torch.Size([256])
layer.name: features.12.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.12.bias & param.shape = torch.Size([256])
layer.name: features.14.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.14.bias & param.shape = torch.Size([256])
layer.name: features.17.weight & param.shap

In [137]:
print(f"VGG-16 global sparsity = {compute_sparsity(model):.2f}%")

VGG-16 global sparsity = 0.00%


In [139]:
parameters_to_prune = (
    (model.features[0], 'weight'),
    (model.features[2], 'weight'),
    (model.features[5], 'weight'),
    (model.features[7], 'weight'),
    (model.features[10], 'weight'),
    (model.features[12], 'weight'),
    (model.features[14], 'weight'),
    (model.features[17], 'weight'),
    (model.features[19], 'weight'),
    (model.features[21], 'weight'),
    (model.features[24], 'weight'),
    (model.features[26], 'weight'),
    (model.features[28], 'weight'),
    (model.classifier[1], 'weight'),
    (model.classifier[4], 'weight'),
    (model.classifier[6], 'weight')
)

prune_rates_global = [0.2, 0.3, 0.4, 0.5, 0.6]

In [141]:
def evaluate_model(model, data, device):
    model.eval()
    logits = torch.Tensor().to(device)
    targets = torch.LongTensor().to(device)

    with torch.no_grad():
        for x_batch, y_batch in data:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            logits = torch.cat([logits, model(x_batch)])
            targets = torch.cat([targets, y_batch])
    
    return torch.nn.functional.softmax(logits, dim=1), targets

In [143]:
def train_global_pruned(model, epochs):
    for iter_prune_round in range(1):
        print(f"\n\nIterative Global pruning round = {iter_prune_round + 1}")
        
        # Prune layer-wise in a structured manner-
        prune.global_unstructured(
            parameters_to_prune,
            pruning_method = prune.L1Unstructured,
            amount = prune_rates_global[iter_prune_round]
            
        )
    
        # Print current global sparsity level-
        print(f"VGG global sparsity = {compute_sparsity(model):.2f}%")
        
        
        # Fine-training loop-
        print("\nFine-tuning pruned model to recover model's performance\n")
        model.train()
        for epoch in range(epochs):
            for x_batch, y_batch in train_dataloader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                logits = model(x_batch)
                loss = criterion(logits, y_batch)
                loss.backward()
                optimizer.step()
    
            # Evaluate model!
            if epochs%10==0:
                predictions, labels = evaluate_model(model, test_dataloader, device)
                test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
                print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [None]:
model_global = train_global_pruned(model = model.to(device), epochs = epochs)

In [None]:
# Model to GPU and eval mode.
model_global.to(device)
model_global.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_global, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "vgg_cifar_global.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving the model: {MODEL_SAVE_PATH}")
torch.save(obj=model_global.state_dict(), f=MODEL_SAVE_PATH)

# Layered Structure

In [146]:
model = vgg16()
epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum = 0.9, weight_decay = 5e-4)

In [148]:
orig_params = count_params(model)
print(f"Unpruned VGG-16 model has {orig_params} trainable parameters")

Unpruned VGG-16 model has 15240906 trainable parameters


In [150]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: features.0.weight & param.shape = torch.Size([64, 3, 3, 3])
layer.name: features.0.bias & param.shape = torch.Size([64])
layer.name: features.2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: features.2.bias & param.shape = torch.Size([64])
layer.name: features.5.weight & param.shape = torch.Size([128, 64, 3, 3])
layer.name: features.5.bias & param.shape = torch.Size([128])
layer.name: features.7.weight & param.shape = torch.Size([128, 128, 3, 3])
layer.name: features.7.bias & param.shape = torch.Size([128])
layer.name: features.10.weight & param.shape = torch.Size([256, 128, 3, 3])
layer.name: features.10.bias & param.shape = torch.Size([256])
layer.name: features.12.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.12.bias & param.shape = torch.Size([256])
layer.name: features.14.weight & param.shape = torch.Size([256, 256, 3, 3])
layer.name: features.14.bias & param.shape = torch.Size([256])
layer.name: features.17.weight & param.shap

In [152]:
print(f"VGG-16 global sparsity = {compute_sparsity(model):.2f}%")

VGG-16 global sparsity = 0.00%


In [154]:
def train_layered_pruned(model, epochs):
    for iter_prune_round in range(1):
        print(f"\n\nIterative Global pruning round = {iter_prune_round + 1}")
        
        # Prune layer-wise in a structured manner-
        prune.ln_structured(model.features[0], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[2], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[5], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[7], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[10], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[12], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[14], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[17], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[19], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[21], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[24], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[26], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.features[28], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.classifier[1], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.classifier[4], name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.classifier[6], name = "weight", amount = 0.1, n = 2, dim = 0)
        
        # Print current global sparsity level-
        print(f"VGG global sparsity = {compute_sparsity(model):.2f}%")
        
        
        # Fine-training loop-
        print("\nFine-tuning pruned model to recover model's performance\n")
        model.train()
        for epoch in range(epochs):
            for x_batch, y_batch in train_dataloader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                logits = model(x_batch)
                loss = criterion(logits, y_batch)
                loss.backward()
                optimizer.step()
    
            # Evaluate model!
            if epochs%10==0:
                predictions, labels = evaluate_model(model, test_dataloader, device)
                test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
                print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [156]:
model_layered_structured = train_layered_pruned(model = model.to(device), epochs = epochs)



Iterative Global pruning round = 1
VGG global sparsity = 9.98%

Fine-tuning pruned model to recover model's performance



KeyboardInterrupt: 

In [None]:
# Model to GPU and eval mode.
model_layered_structured.to(device)
model_layered_structured.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_layered_structured, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "vgg_cifar_structured.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving the model: {MODEL_SAVE_PATH}")
torch.save(obj=model_layered_structured.state_dict(), f=MODEL_SAVE_PATH)