# Pruning

In [1]:
import os
import copy
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

from indiv_utils import load_yaml, size_on_disk, get_layers, measure_inference_latency, param_count, FLOPs_count, save_model_weights, start_train, Sparsity
from models import FFNN
from data_processing import MNISTDataProcessor

## Preliminaries & Setup

| hyperparameter  | MNIST |
| --------------- | ----- |
| learning rate   | 0.001 |
| batch size      | 64    |
| hidden size     | 1024  |
| # hidden layers | 2     |
| input size      | 20x20 |
| output size     | 10    |

In [2]:
"""Setup"""
# hyperparameters
lr = 0.001
batch_size = 64
num_hidden = 2
hidden_dim = 1024
out_dim = 10 # 10 MNIST classes   
epochs = 2
input_dim = 20*20

# config
config = load_yaml('config')

# device 
device = torch.device(config['device'])

# criterion
criterion = torch.nn.CrossEntropyLoss()

# model
model = FFNN(input_dim=input_dim, hidden_dim=hidden_dim, out_dim=out_dim, num_hidden=num_hidden, bias=True).to(device)

In [3]:
"""Model initialization:
Before training, SAVE the model's initial (random) weights. 
You will use them later for iterative pruning."""

if os.path.exists("out/FFNN_weights_initial.pth"):
    pass
else:
    initial_random_weights = save_model_weights(model, fname="initial")

In [4]:
"""Train"""
if os.path.exists("out/FFNN_weights_trained.pth"):
    pass
else:
    start_train(model, device, criterion, epochs, batch_size, lr)
    trained_weights = save_model_weights(model, fname="trained")

In [5]:
"""load initial model"""
model_initial = FFNN(input_dim=input_dim, hidden_dim=hidden_dim, out_dim=out_dim, num_hidden=num_hidden, bias=True).to(device)
model_initial.load_state_dict(torch.load("out/FFNN_weights_initial.pth"))
print("Loaded initial model weights")

"""load trained model"""
model_trained = copy.deepcopy(model_initial)
model_trained.load_state_dict(torch.load("out/FFNN_weights_trained.pth"))
print("Loaded trained model weights")

"""test dataset"""
test_dataset = MNISTDataProcessor().vision_test_dataset()

Loaded initial model weights
Loaded trained model weights
Center Cropping images from 28x28 to 20x20
new image size:  (400,)
Center Cropping images from 28x28 to 20x20
new image size:  (400,)
parsing test features...
The number of test labels: 10000


In [6]:
"""Inference Latency of Trained Model"""
# Inference Latency
measure_inference_latency(model=model, test_dataset=test_dataset, device=device, warmup_itr=100)

Measuring inference latency of trained FFNN on cuda...


1it [00:00,  5.83it/s]

Warm-up begins...


10000it [00:03, 3204.26it/s]


Mean inference latency: 0.129ms


In [7]:
"""Parameter Count, FLOPs, and Disk Storage of Trained Model"""
# layers
layers = get_layers(model=model)

# Parameter Count
param_count(model=model, layers=layers)

# FLOPs
FLOPs_count(model=model, layers=layers)

# Disk Storage
size_on_disk(model=model)

The number of parameters in each layer of FFNN:
	Linear: 400 * 1024 + 1024 = 410,624
	ReLU: = 0
	Linear: 1024 * 1024 + 1024 = 1,049,600
	ReLU: = 0
	Linear: 1024 * 10 + 10 = 10,250
The total number of parameters in FFNN: 1,470,474
The total FLOPs in FFNN: 0.002937 GFLOPs
Model Size on Disk: 5.883903 MB


5883903

## Magnitude pruning on SST2/MNIST

### Trained Model

In [8]:
"""Trained Model's architecture"""
print(model_trained)

FFNN(
  (model): Sequential(
    (0): Sequential(
      (0): Linear(in_features=400, out_features=1024, bias=True)
      (1): ReLU()
    )
    (1): Sequential(
      (0): Linear(in_features=1024, out_features=1024, bias=True)
      (1): ReLU()
    )
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


In [9]:
"""Buffers of Trained Model"""
print(list(model_trained.named_buffers()))

[]


In [10]:
"""Get all layers of Trained Model"""
model_layers = get_layers(model=model_trained)
print(f"{model_trained.__class__.__name__} layers:\n{model_layers}")

FFNN layers:
[Linear(in_features=400, out_features=1024, bias=True), ReLU(), Linear(in_features=1024, out_features=1024, bias=True), ReLU(), Linear(in_features=1024, out_features=10, bias=True)]


In [11]:
"""Weight and Bias of the first layer of Trained Model"""
print(f"Weight of the first layer:\n{model_layers[0].weight}")
print(f"Bias of the first layer:\n{model_layers[0].bias}")

Weight of the first layer:
Parameter containing:
tensor([[ 0.0661,  0.0115,  0.0095,  ...,  0.0792,  0.0295,  0.0438],
        [ 0.0417,  0.0243,  0.0168,  ...,  0.0155,  0.0438,  0.0155],
        [ 0.0864,  0.0559,  0.0999,  ...,  0.0503,  0.0688,  0.0363],
        ...,
        [-0.0159,  0.0330, -0.0122,  ...,  0.0150,  0.0357, -0.0115],
        [-0.0207,  0.0613, -0.0179,  ..., -0.0278,  0.0120, -0.0376],
        [ 0.0525,  0.0021,  0.0082,  ..., -0.0028,  0.0136,  0.0405]],
       device='cuda:0', requires_grad=True)
Bias of the first layer:
Parameter containing:
tensor([ 0.0228, -0.0110, -0.0333,  ..., -0.0355, -0.0043, -0.0440],
       device='cuda:0', requires_grad=True)


### Global Unstructured Magnitude (L1) Pruning 

In [12]:
"""Sparsity of Trained Model"""
Sparsity(model_layers=model_layers).global_level()

Global sparsity: 0.0%


In [13]:
"""Sparsity of each layer of Trained Model"""
Sparsity(model_layers=model_layers).each_layer()

	Linear: 0.0%
	Linear: 0.0%
	Linear: 0.0%


In [14]:
"""Global Unstructured Pruning"""
sparsity_level = 0.3
example_prune_params = [(model_layers[0], 'weight'),
                           (model_layers[2], 'weight'),
                           (model_layers[4], 'weight')]

prune.global_unstructured(example_prune_params, pruning_method=prune.L1Unstructured, amount=sparsity_level)

In [15]:
"""Sparsity of pruned Trained Model"""
Sparsity(model_layers=model_layers).global_level()

Global sparsity: 30.00001362011855%


In [16]:
"""Sparsity of each layer of pruned Trained Model"""
Sparsity(model_layers=model_layers).each_layer()

	Linear: 23.748291015625%
	Linear: 32.4275016784668%
	Linear: 31.494140625%


### Iterative magnitude pruning (IMP)