===============================\
   Compression Pipeline\
===============================

In [1]:
!apt-get install python3.11-distutils
!update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Note, selecting 'python3-distutils' instead of 'python3.11-distutils'
python3-distutils is already the newest version (3.10.8-1~22.04).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [2]:
%pip install -q git+https://github.com/satabios/sconce

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m76.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.4/96.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.3/125.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.0/236.0 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00

In [3]:
from collections import defaultdict, OrderedDict
from sconce import sconce

import copy
import numpy as np
import torch
from torch import nn
from torch.optim import *
from torch.optim.lr_scheduler import *
from torch.utils.data import DataLoader
from torchvision.datasets import *
from torchvision.transforms import *
import torch.optim as optim

assert torch.cuda.is_available(), \
"The current runtime does not have CUDA support." \
"Please go to menu bar (Runtime - Change runtime type) and select GPU"



Load the Pre-Trained Model Weights

In [4]:
!wget https://huggingface.co/satabios/pre-trained_cifar10/resolve/main/vgg.cifar.pretrained.pth?download=true -O vgg.cifar.pretrained.pth

--2025-01-31 18:53:30--  https://huggingface.co/satabios/pre-trained_cifar10/resolve/main/vgg.cifar.pretrained.pth?download=true
Resolving huggingface.co (huggingface.co)... 13.35.202.34, 13.35.202.97, 13.35.202.40, ...
Connecting to huggingface.co (huggingface.co)|13.35.202.34|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/55/fb/55fb4781756edef0ec8cbd6e84a7a05c9478cb5c1d83647b7e7d0a7b31bf7e7d/356e478a06348b2497dbaddddbfd09a4c454d6df7c9bcdf36e767b38a93ff43a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27vgg.cifar.pretrained.pth%3B+filename%3D%22vgg.cifar.pretrained.pth%22%3B&Expires=1738353210&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczODM1MzIxMH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzU1L2ZiLzU1ZmI0NzgxNzU2ZWRlZjBlYzhjYmQ2ZTg0YTdhMDVjOTQ3OGNiNWMxZDgzNjQ3YjdlN2QwYTdiMzFiZjdlN2QvMzU2ZTQ3OGEwNjM0OGIyNDk3ZGJhZGRkZGJmZDA5YTRjNDU0ZDZkZjdj

In [5]:

class VGG(nn.Module):
  ARCH = [64, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

  def __init__(self) -> None:
    super().__init__()

    layers = []
    counts = defaultdict(int)

    def add(name: str, layer: nn.Module) -> None:
      layers.append((f"{name}{counts[name]}", layer))
      counts[name] += 1

    in_channels = 3
    for x in self.ARCH:
      if x != 'M':
        # conv-bn-relu
        add("conv", nn.Conv2d(in_channels, x, 3, padding=1, bias=False))
        add("bn", nn.BatchNorm2d(x))
        add("relu", nn.ReLU(True))
        in_channels = x
      else:
        # maxpool
        add("pool", nn.MaxPool2d(2))

    self.backbone = nn.Sequential(OrderedDict(layers))
    self.classifier = nn.Linear(512, 10)

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    # backbone: [N, 3, 32, 32] => [N, 512, 2, 2]
    x = self.backbone(x)

    # avgpool: [N, 512, 2, 2] => [N, 512]
    x = x.mean([2, 3])

    # classifier: [N, 512] => [N, 10]
    x = self.classifier(x)
    return x


#load the pretrained model
model_path = "vgg.cifar.pretrained.pth"

model = VGG().cuda()
checkpoint = torch.load(model_path, weights_only=True)
model.load_state_dict(checkpoint)

<All keys matched successfully>

Setup the Dataset

In [6]:
image_size = 32
transforms = {
    "train": transforms.Compose([
        RandomCrop(image_size, padding=4),
        RandomHorizontalFlip(),
        ToTensor(),
    ]),
    "test": ToTensor(),
}
dataset = {}
for split in ["train", "test"]:

    dataset[split] = CIFAR10(
    root="data/cifar10",
    train=(split == "train"),
    download=True,
    transform=transforms[split],
    )

dataloader = {}
for split in ['train', 'test']:
  dataloader[split] = DataLoader(
    dataset[split],
    batch_size=512,
    shuffle=(split == 'train'),
    num_workers=0,
    pin_memory=True,
  )




Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar10/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:13<00:00, 12.5MB/s]


Extracting data/cifar10/cifar-10-python.tar.gz to data/cifar10
Files already downloaded and verified


sconce Configurations

**Channel-Wise Pruning**

In [7]:
sconces = sconce()
sconces.model= copy.deepcopy(model)
sconces.criterion = nn.CrossEntropyLoss() # Loss
sconces.optimizer= optim.Adam(sconces.model.parameters(), lr=1e-4)
sconces.scheduler = optim.lr_scheduler.CosineAnnealingLR(sconces.optimizer, T_max=200)
sconces.dataloader = dataloader
sconces.epochs = 1 #Number of time we iterate over the data
sconces.num_finetune_epochs = 1
sconces.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sconces.experiment_name = "vgg-cwp"
sconces.prune_mode = "CWP" # Supports Automated Pruning Ratio Detection


In [None]:
# Compress the model Channel-Wise
sconces.compress()


Original Dense Model Size Model=35.20 MiB




Original Model Validation Accuracy: 93.13627254509018 %

 Channel-Wise Pruning




Sensitivity Scan Time(mins): 6.284129293759664 

Sparsity for each Layer:  dict_items([('backbone.conv0', 0.15000000000000002), ('backbone.conv1', 0.0), ('backbone.conv2', 0.0), ('backbone.conv3', 0.0), ('backbone.conv4', 0.0), ('backbone.conv5', 0.0), ('backbone.conv6', 0.1), ('backbone.conv7', 0.40000000000000013)])

Pruning Time Consumed (mins): 0.0014287829399108887
Total Pruning Time Consumed (mins): 6.285559240976969

Pruned Model has size=29.90 MiB(non-zeros) = 84.92% of Original model size





Pruned Model has Accuracy=92.07 % = -1.06% of Original model Accuracy

 




Epoch:1 Train Loss: 0.00000 Validation Accuracy: 92.83567




Epoch:2 Train Loss: 0.00000 Validation Accuracy: 92.93587




Epoch:3 Train Loss: 0.00000 Validation Accuracy: 92.90581




Epoch:4 Train Loss: 0.00000 Validation Accuracy: 93.01603




Epoch:5 Train Loss: 0.00000 Validation Accuracy: 92.98597




Fine-Tuned Sparse model has size=29.90 MiB = 84.92% of Original model size
Fine-Tuned Pruned Model Validation Accuracy: 92.98597194388778

 


train:  17%|█▋        | 17/98 [00:07<00:34,  2.35it/s]

In [None]:


sconces = sconce()
sconces.model= copy.deepcopy(model)
sconces.criterion = nn.CrossEntropyLoss() # Loss
sconces.optimizer= optim.Adam(sconces.model.parameters(), lr=1e-4)
sconces.scheduler = optim.lr_scheduler.CosineAnnealingLR(sconces.optimizer, T_max=200)
sconces.dataloader = dataloader
sconces.epochs = 1 #Number of time we iterate over the data
sconces.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sconces.experiment_name = "vgg-gmp"
sconces.prune_mode = "GMP" # Supports Automated Pruning Ratio Detection


Easy function calls for Train and Validated the Model on the given dataset

In [None]:
# Train the model
sconces.train()
# Evaludate the model
sconces.evaluate()

Magic Happens here: Compress the model(GMP pruning is set as the prune mode[sconces.prune_mode] above)

In [None]:
sconces.compress()

**Venum Pruning a better version of Wanda Pruning**

In [None]:
# from sconce import sconce

# sconces = sconce()
# sconces.model = copy.deepcopy(model)
# sconces.criterion = nn.CrossEntropyLoss()  # Loss
# sconces.optimizer = optim.Adam(sconces.model.parameters(), lr=1e-4)
# sconces.scheduler = optim.lr_scheduler.CosineAnnealingLR(sconces.optimizer, T_max=200)
# sconces.dataloader = dataloader
# sconces.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# sconces.experiment_name = "vgg-venum"
# sconces.prune_mode = "venum"  # Supports Automated Pruning Ratio Detection
# sconces.compress()

Spiking Neural Network Compression


In [None]:
!pip install snntorch -q

In [None]:
# Import snntorch libraries
import snntorch as snn
from snntorch import surrogate
from snntorch import backprop
from snntorch import functional as SF
from snntorch import utils
from snntorch import spikeplot as splt
from torch import optim

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np
import itertools



In [None]:

# Event Drive Data

# dataloader arguments
batch_size = 128
data_path = "./data/mnist"

dtype = torch.float
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Define a transform
transform = transforms.Compose(
    [
        transforms.Resize((28, 28)),
        transforms.Grayscale(),
        transforms.ToTensor(),
        transforms.Normalize((0,), (1,)),
    ]
)

mnist_train = datasets.MNIST(data_path, train=True, download=True, transform=transform)
mnist_test = datasets.MNIST(data_path, train=False, download=True, transform=transform)

# Create DataLoaders
train_loader = DataLoader(
    mnist_train, batch_size=batch_size, shuffle=True, drop_last=True
)
test_loader = DataLoader(
    mnist_test, batch_size=batch_size, shuffle=True, drop_last=True
)


In [None]:
from sconce import sconce
sconces = sconce()
# Set you Dataloader
dataloader = {}
dataloader["train"] = train_loader
dataloader["test"] = test_loader
sconces.dataloader = dataloader

In [None]:
#Enable snn in sconce
sconces.snn = True

# Load your snn Model
spike_grad = surrogate.fast_sigmoid(slope=25)
beta = 0.5
snn_model = nn.Sequential(
    nn.Conv2d(1, 12, 5),
    nn.MaxPool2d(2),
    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
    nn.Conv2d(12, 64, 5),
    nn.MaxPool2d(2),
    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
    nn.Flatten(),
    nn.Linear(64 * 4 * 4, 10),
    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True, output=True),
).to('cuda')


#Load the pretrained weights
snn_pretrained_model_path = "drive/MyDrive/Efficientml/Efficientml.ai/snn_model.pth"
snn_model.load_state_dict(torch.load(snn_pretrained_model_path))  # Model Definition
sconces.model = snn_model

In [None]:

sconces.optimizer = optim.Adam(sconces.model.parameters(), lr=1e-4)
sconces.scheduler = optim.lr_scheduler.CosineAnnealingLR(sconces.optimizer, T_max=200)

sconces.criterion = SF.ce_rate_loss()

sconces.epochs = 10  # Number of time we iterate over the data
sconces.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sconces.experiment_name = "snn-gmp"  # Define your experiment name here
sconces.prune_mode = "GMP"
sconces.num_finetune_epochs = 1


In [None]:
sconces.compress()