In [2]:
import os
from ranger import Ranger

import torch
from torch import nn
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.nn.utils.prune as prune

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import Normalize
from torchmetrics import Accuracy

import torch.optim as optim
from cleverhans.torch.attacks.projected_gradient_descent import (projected_gradient_descent)

import quantus
import captum
from captum.attr import Saliency, IntegratedGradients, NoiseTunnel

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import random
import copy
import gc

import warnings
warnings.filterwarnings('ignore')

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
batch_size = 8
epochs = 100
train_path = 'datasets/imagenette2/train'
val_path =  'datasets/imagenette2/val'
train_dataloader = torch.utils.data.DataLoader(datasets.ImageFolder(train_path, 
                                                                   transform = transforms.Compose([
                                                                        transforms.RandomResizedCrop(224),
                                                                        transforms.RandomHorizontalFlip(),
                                                                        transforms.ToTensor(),
                                                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                                             std=[0.229, 0.224, 0.225])
                                                                   ])), batch_size = batch_size, shuffle=True)

test_dataloader = torch.utils.data.DataLoader(datasets.ImageFolder(val_path,
                                                               transform=transforms.Compose([
                                                                   transforms.ToTensor(),
                                                                   transforms.Resize([224, 224]),
                                                                   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                                        std=[0.229, 0.224, 0.225])
                                                               ])),batch_size=batch_size, shuffle=True)

In [5]:
classes = ('tench', 'springer', 'casette_player', 'chain_saw','church', 'French_horn', 'garbage_truck', 'gas_pump', 'golf_ball', 'parachute')

In [33]:
%run models.ipynb
%run utils.ipynb
%run metrics.ipynb

In [35]:
model_normal = resnet_18(filter='None', filter_layer=0)
learning_rate = 1e-04
criterion = nn.CrossEntropyLoss(reduction="mean").cuda()
optimizer = Ranger(model_normal.parameters(), lr = learning_rate, eps = 1e-06)
# lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = 5, verbose=True)

Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [16]:
def train_model(model, epochs):
    model.train()
    for epoch in range(epochs):
        for x_batch, y_batch in train_dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate model!
        if epochs%10==0:
            predictions, labels = evaluate_model(model, test_dataloader, device)
            test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
            print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [50]:
model_normal = train_model(model = model_normal.to(device), epochs = epochs)

Epoch 1/100 - test accuracy: 35.08% and CE loss 2.12
Epoch 2/100 - test accuracy: 48.41% and CE loss 1.66
Epoch 3/100 - test accuracy: 56.51% and CE loss 1.58
Epoch 4/100 - test accuracy: 60.05% and CE loss 1.13
Epoch 5/100 - test accuracy: 64.59% and CE loss 1.18
Epoch 6/100 - test accuracy: 66.06% and CE loss 1.12
Epoch 7/100 - test accuracy: 66.55% and CE loss 1.08
Epoch 8/100 - test accuracy: 67.75% and CE loss 0.92
Epoch 9/100 - test accuracy: 70.11% and CE loss 1.24
Epoch 10/100 - test accuracy: 68.94% and CE loss 0.77
Epoch 11/100 - test accuracy: 70.50% and CE loss 1.15
Epoch 12/100 - test accuracy: 71.85% and CE loss 1.05
Epoch 13/100 - test accuracy: 73.02% and CE loss 1.05
Epoch 14/100 - test accuracy: 73.50% and CE loss 0.77
Epoch 15/100 - test accuracy: 73.81% and CE loss 0.76
Epoch 16/100 - test accuracy: 73.10% and CE loss 0.59
Epoch 17/100 - test accuracy: 74.55% and CE loss 0.69
Epoch 18/100 - test accuracy: 74.57% and CE loss 0.72
Epoch 19/100 - test accuracy: 74.70% 

In [51]:
# Model to GPU and eval mode.
model_normal.to(device)
model_normal.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_normal, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

Model test accuracy: 81.43%


In [52]:
from pathlib import Path

model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

model_name = "resnet_imagenette.pth"
model_save_path = model_path / model_name

print(f"Saving the model: {model_save_path}")
torch.save(obj=model_normal.state_dict(), f=model_save_path)

Saving the model: models\resnet_imagenette.pth


# Adversarial Train

In [37]:
model = resnet_18(filter='None', filter_layer=0)
adversary = PGDAttack(model)
learning_rate = 1e-04
criterion = nn.CrossEntropyLoss(reduction="mean").cuda()
optimizer = Ranger(model.parameters(), lr = learning_rate, eps = 1e-06)

Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [44]:
def train_adv(model, epochs):
    model.train()
    for epoch in range(epochs):
        for x_batch, y_batch in train_dataloader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            r =random.uniform(0, 1)
            if r >= 0.5:
                adv = adversary.perturb(x_batch, y_batch)
                output = model(adv)
            else:
                output = model(x_batch)
            # x_batch = projected_gradient_descent(model, x_batch, eps, eps/10, 40, np.inf)
            # logits = model(x_batch)
            loss = criterion(output, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Evaluate model!
        if epochs%10==0:
            predictions, labels = evaluate_model(model, test_dataloader, device)
            test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
            print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [33]:
model_adversarial = train_adv(model=model.to(device),
                    epochs=epochs)

Epoch 1/100 - test accuracy: 19.13% and CE loss 2.05
Epoch 2/100 - test accuracy: 55.67% and CE loss 0.95
Epoch 3/100 - test accuracy: 63.24% and CE loss 1.18
Epoch 4/100 - test accuracy: 64.25% and CE loss 2.86
Epoch 5/100 - test accuracy: 66.98% and CE loss 1.43
Epoch 6/100 - test accuracy: 68.64% and CE loss 1.34
Epoch 7/100 - test accuracy: 72.43% and CE loss 0.82
Epoch 8/100 - test accuracy: 72.38% and CE loss 1.49
Epoch 9/100 - test accuracy: 71.46% and CE loss 1.68
Epoch 10/100 - test accuracy: 74.37% and CE loss 0.11
Epoch 11/100 - test accuracy: 73.94% and CE loss 1.14
Epoch 12/100 - test accuracy: 74.34% and CE loss 0.20
Epoch 13/100 - test accuracy: 74.57% and CE loss 0.56
Epoch 14/100 - test accuracy: 75.49% and CE loss 0.28
Epoch 15/100 - test accuracy: 77.12% and CE loss 0.98
Epoch 16/100 - test accuracy: 77.66% and CE loss 0.85
Epoch 17/100 - test accuracy: 77.83% and CE loss 1.08
Epoch 18/100 - test accuracy: 76.54% and CE loss 0.14
Epoch 19/100 - test accuracy: 74.93% 

KeyboardInterrupt: 

In [None]:
# Model to GPU and eval mode.
model_adversarial.to(device)
model_adversarial.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_adversarial, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

In [None]:
from pathlib import Path

model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

model_name = "resnet_imagenette_adv.pth"
model_save_path = model_path / model_name

print(f"Saving the model: {model_save_path}")
torch.save(obj=model_adversarial.state_dict(), f=model_save_path)

# L1 Unstructured

In [39]:
model = resnet_18(filter='None', filter_layer=0)
learning_rate = 1e-03
criterion = nn.CrossEntropyLoss(reduction="mean").cuda()
optimizer = Ranger(model.parameters(), lr = learning_rate, eps = 1e-06)

Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [33]:
orig_params = count_params(model)
print(f"Unpruned RESNET-18 model has {orig_params} trainable parameters")

Unpruned RESNET-18 model has 11176842 trainable parameters


In [35]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: conv1.weight & param.shape = torch.Size([64, 3, 7, 7])
layer.name: bn1.weight & param.shape = torch.Size([64])
layer.name: bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn2.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn2.bias & param.shape = torch.Size([64])
lay

In [37]:
for layer_name in model.state_dict().keys():
    print(layer_name, model.state_dict()[layer_name].shape)

conv1.weight torch.Size([64, 3, 7, 7])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
bn1.running_mean torch.Size([64])
bn1.running_var torch.Size([64])
bn1.num_batches_tracked torch.Size([])
layer1.0.conv1.weight torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.bn1.running_mean torch.Size([64])
layer1.0.bn1.running_var torch.Size([64])
layer1.0.bn1.num_batches_tracked torch.Size([])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight torch.Size([64])
layer1.0.bn2.bias torch.Size([64])
layer1.0.bn2.running_mean torch.Size([64])
layer1.0.bn2.running_var torch.Size([64])
layer1.0.bn2.num_batches_tracked torch.Size([])
layer1.1.conv1.weight torch.Size([64, 64, 3, 3])
layer1.1.bn1.weight torch.Size([64])
layer1.1.bn1.bias torch.Size([64])
layer1.1.bn1.running_mean torch.Size([64])
layer1.1.bn1.running_var torch.Size([64])
layer1.1.bn1.num_batches_tracked torch.Size([])
layer1.1.conv2.weight torch.Size([64, 6

In [279]:
model.state_dict().keys()

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', '

In [45]:
print(f"RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")

RESNET-18 global sparsity = 0.00%


In [47]:
import torch.nn.utils.prune as prune
for name, module in model.named_modules():
    # prune 20% of weights/connections in for all hidden layaers-
    if isinstance(module, torch.nn.Conv2d):
        prune.l1_unstructured(module = module, name = 'weight', amount = 0.2)
    
    # prune 10% of weights/connections for output layer-
    elif isinstance(module, torch.nn.Linear):
        prune.l1_unstructured(module = module, name = 'weight', amount = 0.1)

In [48]:
print(f"RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")

RESNET-18 global sparsity = 19.99%


In [291]:
model_l1_unstructured = train_model(model = model.to(device), epochs = epochs)

Epoch 1/100 - test accuracy: 46.37% and CE loss 1.50
Epoch 2/100 - test accuracy: 52.84% and CE loss 1.52
Epoch 3/100 - test accuracy: 66.04% and CE loss 1.23
Epoch 4/100 - test accuracy: 62.62% and CE loss 1.20
Epoch 5/100 - test accuracy: 67.49% and CE loss 0.92
Epoch 6/100 - test accuracy: 74.06% and CE loss 1.06
Epoch 7/100 - test accuracy: 71.26% and CE loss 0.67
Epoch 8/100 - test accuracy: 71.95% and CE loss 0.56
Epoch 9/100 - test accuracy: 76.89% and CE loss 0.83
Epoch 10/100 - test accuracy: 74.22% and CE loss 0.86
Epoch 11/100 - test accuracy: 75.59% and CE loss 0.81
Epoch 12/100 - test accuracy: 78.17% and CE loss 0.93
Epoch 13/100 - test accuracy: 75.54% and CE loss 0.78
Epoch 14/100 - test accuracy: 76.10% and CE loss 0.80
Epoch 15/100 - test accuracy: 80.89% and CE loss 0.60
Epoch 16/100 - test accuracy: 79.26% and CE loss 0.63
Epoch 17/100 - test accuracy: 77.71% and CE loss 0.62
Epoch 18/100 - test accuracy: 80.10% and CE loss 1.06
Epoch 19/100 - test accuracy: 79.46% 

In [293]:
# Model to GPU and eval mode.
model_l1_unstructured.to(device)
model_l1_unstructured.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_l1_unstructured, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

Model test accuracy: 85.94%


In [295]:
from pathlib import Path

model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

model_name = "resnet_imagenette_l1_unstructured.pth"
model_save_path = model_path / model_name

print(f"Saving the model: {model_save_path}")
torch.save(obj=model_l1_unstructured.state_dict(), f=model_save_path)

Saving the model: models\resnet_imagenette_l1_unstructured.pth


# Global Pruning

In [41]:
model = resnet_18(filter='None', filter_layer=0)
learning_rate = 1e-03
criterion = nn.CrossEntropyLoss(reduction="mean").cuda()
optimizer = Ranger(model.parameters(), lr = learning_rate, eps = 1e-06)

Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [56]:
orig_params = count_params(model)
print(f"Unpruned RESNET-18 model has {orig_params} trainable parameters")

Unpruned RESNET-18 model has 11176842 trainable parameters


In [58]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: conv1.weight & param.shape = torch.Size([64, 3, 7, 7])
layer.name: bn1.weight & param.shape = torch.Size([64])
layer.name: bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn2.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn2.bias & param.shape = torch.Size([64])
lay

In [60]:
print(f"RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")

RESNET-18 global sparsity = 0.00%


In [62]:
model.state_dict().keys()

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', '

In [64]:
parameters_to_prune = (
    (model.conv1, 'weight'),
    (model.bn1, 'weight'),
    (model.layer1[0].conv1, 'weight'),
    (model.layer1[0].bn1, 'weight'),
    (model.layer1[0].conv2, 'weight'),
    (model.layer1[0].bn2, 'weight'),
    (model.layer1[1].conv1, 'weight'),
    (model.layer1[1].bn1, 'weight'),
    (model.layer1[1].conv2, 'weight'),
    (model.layer1[1].bn2, 'weight'),
    (model.layer2[0].conv1, 'weight'),
    (model.layer2[0].bn1, 'weight'),
    (model.layer2[0].conv2, 'weight'),
    (model.layer2[0].bn2, 'weight'),
    (model.layer2[1].conv1, 'weight'),
    (model.layer2[1].bn1, 'weight'),
    (model.layer2[1].conv2, 'weight'),
    (model.layer2[1].bn2, 'weight'),
    (model.layer3[0].conv1, 'weight'),
    (model.layer3[0].bn1, 'weight'),
    (model.layer3[0].conv2, 'weight'),
    (model.layer3[0].bn2, 'weight'),
    (model.layer3[1].conv1, 'weight'),
    (model.layer3[1].bn1, 'weight'),
    (model.layer3[1].conv2, 'weight'),
    (model.layer3[1].bn2, 'weight'),
    (model.layer4[0].conv1, 'weight'),
    (model.layer4[0].bn1, 'weight'),
    (model.layer4[0].conv2, 'weight'),
    (model.layer4[0].bn2, 'weight'),
    (model.layer4[1].conv1, 'weight'),
    (model.layer4[1].bn1, 'weight'),
    (model.layer4[1].conv2, 'weight'),
    (model.layer4[1].bn2, 'weight'),
    (model.fc, 'weight')
)

prune_rates_global = [0.2, 0.3, 0.4, 0.5, 0.6]

In [48]:
def train_global_pruned(model, epochs):
    for iter_prune_round in range(1):
        print(f"\n\nIterative Global pruning round = {iter_prune_round + 1}")
        
        # Prune layer-wise in a structured manner-
        prune.global_unstructured(
            parameters_to_prune,
            pruning_method = prune.L1Unstructured,
            amount = prune_rates_global[iter_prune_round]
            
        )
    
        # Print current global sparsity level-
        print(f" RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")
        
        
        # Fine-training loop-
        print("\nFine-tuning pruned model to recover model's performance\n")
        model.train()
        for epoch in range(epochs):
            for x_batch, y_batch in train_dataloader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                output = model(x_batch)
                loss = criterion(output, y_batch)
                loss.backward()
                optimizer.step()
    
            # Evaluate model!
            if epochs%10==0:
                predictions, labels = evaluate_model(model, test_dataloader, device)
                test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
                print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [312]:
model_global = train_global_pruned(model = model.to(device), epochs = epochs)



Iterative Global pruning round = 1
 RESNET-18 global sparsity = 20.00%

Fine-tuning pruned model to recover model's performance

Epoch 1/100 - test accuracy: 53.38% and CE loss 1.48
Epoch 2/100 - test accuracy: 60.41% and CE loss 1.33
Epoch 3/100 - test accuracy: 65.78% and CE loss 1.31
Epoch 4/100 - test accuracy: 66.14% and CE loss 1.33
Epoch 5/100 - test accuracy: 68.99% and CE loss 0.92
Epoch 6/100 - test accuracy: 73.40% and CE loss 0.68
Epoch 7/100 - test accuracy: 71.67% and CE loss 1.02
Epoch 8/100 - test accuracy: 72.51% and CE loss 1.00
Epoch 9/100 - test accuracy: 77.15% and CE loss 0.98
Epoch 10/100 - test accuracy: 68.00% and CE loss 0.85
Epoch 11/100 - test accuracy: 76.54% and CE loss 0.66
Epoch 12/100 - test accuracy: 79.52% and CE loss 0.57
Epoch 13/100 - test accuracy: 73.12% and CE loss 0.70
Epoch 14/100 - test accuracy: 78.83% and CE loss 0.63
Epoch 15/100 - test accuracy: 81.32% and CE loss 0.80
Epoch 16/100 - test accuracy: 79.31% and CE loss 0.59
Epoch 17/100 -

In [313]:
# Model to GPU and eval mode.
model_global.to(device)
model_global.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_global, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

Model test accuracy: 85.76%


In [314]:
from pathlib import Path

model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

model_name = "resnet_imagenette_global.pth"
model_save_path = model_path / model_name

print(f"Saving the model: {model_save_path}")
torch.save(obj=model_global.state_dict(), f=model_save_path)

Saving the model: models\resnet_imagenette_global.pth


# Layered Structured

In [43]:
model = resnet_18(filter='None', filter_layer=0)
learning_rate = 1e-04
criterion = nn.CrossEntropyLoss(reduction="mean").cuda()
optimizer = Ranger(model.parameters(), lr = learning_rate, eps = 1e-06)

Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [70]:
orig_params = count_params(model)
print(f"Unpruned  RESNET-18 model has {orig_params} trainable parameters")

Unpruned  RESNET-18 model has 11176841 trainable parameters


In [72]:
for layer, param in model.named_parameters():
    print(f"layer.name: {layer} & param.shape = {param.shape}")

layer.name: conv1.weight & param.shape = torch.Size([64, 3, 7, 7])
layer.name: bn1.weight & param.shape = torch.Size([64])
layer.name: bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.0.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.0.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.0.bn2.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv1.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn1.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn1.bias & param.shape = torch.Size([64])
layer.name: layer1.1.conv2.weight & param.shape = torch.Size([64, 64, 3, 3])
layer.name: layer1.1.bn2.weight & param.shape = torch.Size([64])
layer.name: layer1.1.bn2.bias & param.shape = torch.Size([64])
lay

In [74]:
print(f" RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")

 RESNET-18 global sparsity = 0.00%


In [50]:
def train_layered_pruned(model, epochs):
    for iter_prune_round in range(1):
        print(f"\n\nIterative Global pruning round = {iter_prune_round + 1}")
        
        # Prune layer-wise in a structured manner-
        prune.ln_structured(model.conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer1[0].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer1[0].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer1[0].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer1[0].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer1[1].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer1[1].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer1[1].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer1[1].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer2[0].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer2[0].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer2[0].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer2[0].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer2[1].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer2[1].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer2[1].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer2[1].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer3[0].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer3[0].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer3[0].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer3[0].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer3[1].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer3[1].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer3[1].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer3[1].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer4[0].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer4[0].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer4[0].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer4[0].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer4[1].conv1, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer4[1].bn1, name = "weight", amount = 0.1, n = 2, dim = 0)
        prune.ln_structured(model.layer4[1].conv2, name = "weight", amount = 0.1, n = 2, dim = 0)
        # prune.ln_structured(model.layer4[1].bn2, name = "weight", amount = 0.1, n = 2, dim = 0)

        prune.ln_structured(model.fc, name = "weight", amount = 0.1, n = 2, dim = 0)
        
        # Print current global sparsity level-
        print(f" RESNET-18 global sparsity = {compute_sparsity_resnet(model):.2f}%")
        
        
        # Fine-training loop-
        print("\nFine-tuning pruned model to recover model's performance\n")
        model.train()
        for epoch in range(epochs):
            for x_batch, y_batch in train_dataloader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                output = model(x_batch)
                loss = criterion(output, y_batch)
                loss.backward()
                optimizer.step()
    
            # Evaluate model!
            if epochs%10==0:
                predictions, labels = evaluate_model(model, test_dataloader, device)
                test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())
                print(f"Epoch {epoch+1}/{epochs} - test accuracy: {(100 * test_acc):.2f}% and CE loss {loss.item():.2f}")
    return model

In [377]:
model_layered_structured = train_layered_pruned(model = model.to(device), epochs = epochs)



Iterative Global pruning round = 1
 RESNET-18 global sparsity = 9.99%

Fine-tuning pruned model to recover model's performance

Epoch 1/100 - test accuracy: 27.92% and CE loss 2.22
Epoch 2/100 - test accuracy: 46.96% and CE loss 1.55
Epoch 3/100 - test accuracy: 54.90% and CE loss 1.63
Epoch 4/100 - test accuracy: 57.99% and CE loss 1.32
Epoch 5/100 - test accuracy: 61.32% and CE loss 1.21
Epoch 6/100 - test accuracy: 61.35% and CE loss 1.34
Epoch 7/100 - test accuracy: 63.92% and CE loss 1.27
Epoch 8/100 - test accuracy: 65.78% and CE loss 1.12
Epoch 9/100 - test accuracy: 67.24% and CE loss 1.24
Epoch 10/100 - test accuracy: 67.26% and CE loss 1.09
Epoch 11/100 - test accuracy: 68.66% and CE loss 1.07
Epoch 12/100 - test accuracy: 69.68% and CE loss 1.02
Epoch 13/100 - test accuracy: 70.90% and CE loss 1.04
Epoch 14/100 - test accuracy: 70.85% and CE loss 1.07
Epoch 15/100 - test accuracy: 71.18% and CE loss 0.58
Epoch 16/100 - test accuracy: 71.75% and CE loss 0.98
Epoch 17/100 - 

In [379]:
# Model to GPU and eval mode.
model_layered_structured.to(device)
model_layered_structured.eval()

# Check test set performance.
predictions, labels = evaluate_model(model_layered_structured, test_dataloader, device)
test_acc = np.mean(np.argmax(predictions.cpu().numpy(), axis=1) == labels.cpu().numpy())        
print(f"Model test accuracy: {(100 * test_acc):.2f}%")

Model test accuracy: 80.64%


In [380]:
from pathlib import Path

model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

model_name = "resnet_imagenette_structured.pth"
model_save_path = model_path / model_name

print(f"Saving the model: {model_save_path}")
torch.save(obj=model_layered_structured.state_dict(), f=model_save_path)

Saving the model: models\resnet_imagenette_structured.pth
