In [1]:
import os
import numpy as np
import cv2
import pickle
from tqdm import tqdm
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
import torch.nn.utils as utils

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
!ls
!tar -xvzf dataset.tar.gz

dataset.tar.gz	my_model_weights_1.pt  sample_data
train_images.pkl
train_labels.pkl
val_images.pkl
val_labels.pkl


In [4]:
# load train
train_images = pickle.load(open('train_images.pkl', 'rb'))
train_labels = pickle.load(open('train_labels.pkl', 'rb'))
# load val
val_images = pickle.load(open('val_images.pkl', 'rb'))
val_labels = pickle.load(open('val_labels.pkl', 'rb'))

In [5]:
train_images = torch.tensor(train_images, dtype=torch.float32)
val_images = torch.tensor(val_images, dtype=torch.float32)

train_images = train_images.permute(0, 3, 1, 2)
val_images = val_images.permute(0, 3, 1, 2)

In [6]:
train_dataset = TensorDataset(train_images,
                              torch.tensor(train_labels.squeeze(), dtype=torch.long))
val_dataset = TensorDataset(val_images,
                            torch.tensor(val_labels.squeeze(), dtype=torch.long))

In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [8]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()

        self.model = nn.Sequential(
            # First block: Conv -> ReLU -> Conv -> ReLU -> MaxPool -> Dropout
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=0, bias=True),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25),

            # Second block: Conv -> ReLU -> Conv -> ReLU -> MaxPool -> Dropout
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=0, bias=True),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25),

            # Flatten layer
            nn.Flatten(),

            # Fully connected block: Dense -> ReLU -> Dropout -> Dense -> Softmax
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 5),
        )

    def forward(self, x):
        return self.model(x)

# Running Iterative Pruning

Given some "sparsity" value, we will set the weights of the model for the values to 0 train for n steps and then update the process iteratively.

In [32]:
model = ConvNet()

model.load_state_dict(torch.load('my_model_weights_1.pt'))
model.to(device)

criterion = torch.nn.CrossEntropyLoss()

In [33]:
sparsity = 0.85
N = 500 # number of steps of training
lr = 0.001
num_iter = 100
lamb = 1e-3

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamb)

In [34]:
# getting lists of weights in the model
weights = []
with torch.no_grad():  # disable gradient tracking for efficiency
  for name, param in model.named_parameters():
    if "weight" in name:  # only apply to weights, skip biases
      weights.append(name)

In [35]:
def train_n_steps(model, train_loader, optimizer, criterion, device,
                  n_steps = N, s = sparsity):


    # setting sparsity
    with torch.no_grad():  # disable gradient tracking for efficiency
      # weight threshold to prune
      threshold = torch.cat([torch.flatten(torch.absolute(model.get_parameter(i))) for i in weights]).quantile(s)
      #print(threshold)
      for name, param in model.named_parameters():
          if "weight" in name:  # only apply to weights, skip biases
              param[torch.absolute(param) < threshold] = 0

    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    steps = 0



    # Progress bar for the training loop
    train_loader_tqdm = tqdm(train_loader, desc="Training", leave=False, total=n_steps)

    for inputs, labels in train_loader_tqdm:

        steps += 1

        if steps > n_steps:
            break

        # Zero the parameter gradients
        optimizer.zero_grad()  # Zero the parameter gradients
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track loss and accuracy
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # Update tqdm description with current loss and accuracy
        train_loader_tqdm.set_postfix(loss=running_loss / n_steps, accuracy=100 * correct / total)

    train_accuracy = 100 * correct / total
    train_loss = running_loss / n_steps
    return train_loss, train_accuracy

In [36]:
def validate(model, val_loader, criterion, device):
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0

    # Progress bar for the validation loop
    val_loader_tqdm = tqdm(val_loader, desc="Validation", leave=False)

    with torch.no_grad():  # Disable gradient calculations for validation
        for inputs, labels in val_loader_tqdm:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Track loss and accuracy
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            # Update tqdm description with current validation loss and accuracy
            val_loader_tqdm.set_postfix(loss=val_loss / total, accuracy=100 * correct / total)

    val_accuracy = 100 * correct / total
    val_loss = val_loss / len(val_loader)
    return val_loss, val_accuracy

In [37]:
# Main training loop

train_loss_hist = []
train_acc_hist = []
val_loss_hist = []
val_acc_hist = []

for iter in range(num_iter):
    print(f"Pruning Iteration {iter+1}/{num_iter}")

    # Training
    train_loss, train_accuracy = train_n_steps(model, train_loader, optimizer, criterion, device)

    # Validation
    val_loss, val_accuracy = validate(model, val_loader, criterion, device)

    # Keep History of Output
    train_loss_hist.append(train_loss)
    train_acc_hist.append(train_accuracy)
    val_loss_hist.append(val_loss)
    val_acc_hist.append(val_accuracy)

    # Print epoch results
    print(f'Iter [{iter+1}/{num_iter}], '
          f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, '
          f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.0582, Train Acc: 58.65%, Val Loss: 0.9663, Val Acc: 61.66%
Pruning Iteration 2/100




Iter [2/100], Train Loss: 1.0327, Train Acc: 59.76%, Val Loss: 0.9187, Val Acc: 64.40%
Pruning Iteration 3/100




Iter [3/100], Train Loss: 1.0195, Train Acc: 60.32%, Val Loss: 0.9439, Val Acc: 63.68%
Pruning Iteration 4/100




Iter [4/100], Train Loss: 0.9988, Train Acc: 61.13%, Val Loss: 0.9274, Val Acc: 62.65%
Pruning Iteration 5/100




Iter [5/100], Train Loss: 0.9896, Train Acc: 61.39%, Val Loss: 0.8776, Val Acc: 65.07%
Pruning Iteration 6/100




Iter [6/100], Train Loss: 0.9645, Train Acc: 61.95%, Val Loss: 0.9004, Val Acc: 63.13%
Pruning Iteration 7/100




Iter [7/100], Train Loss: 0.9608, Train Acc: 62.78%, Val Loss: 0.8755, Val Acc: 66.38%
Pruning Iteration 8/100




Iter [8/100], Train Loss: 0.9404, Train Acc: 63.62%, Val Loss: 0.9037, Val Acc: 65.50%
Pruning Iteration 9/100




Iter [9/100], Train Loss: 0.9244, Train Acc: 64.47%, Val Loss: 0.9318, Val Acc: 62.77%
Pruning Iteration 10/100




Iter [10/100], Train Loss: 0.9249, Train Acc: 64.42%, Val Loss: 0.9509, Val Acc: 61.94%
Pruning Iteration 11/100




Iter [11/100], Train Loss: 0.9114, Train Acc: 64.84%, Val Loss: 0.8343, Val Acc: 68.55%
Pruning Iteration 12/100




Iter [12/100], Train Loss: 0.9021, Train Acc: 65.23%, Val Loss: 0.8539, Val Acc: 67.05%
Pruning Iteration 13/100




Iter [13/100], Train Loss: 0.8943, Train Acc: 65.56%, Val Loss: 0.8458, Val Acc: 67.29%
Pruning Iteration 14/100




Iter [14/100], Train Loss: 0.8746, Train Acc: 66.48%, Val Loss: 0.8050, Val Acc: 69.66%
Pruning Iteration 15/100




Iter [15/100], Train Loss: 0.8616, Train Acc: 67.08%, Val Loss: 0.7942, Val Acc: 69.82%
Pruning Iteration 16/100




Iter [16/100], Train Loss: 0.8608, Train Acc: 67.52%, Val Loss: 0.8548, Val Acc: 67.21%
Pruning Iteration 17/100




Iter [17/100], Train Loss: 0.8430, Train Acc: 67.76%, Val Loss: 0.8445, Val Acc: 67.56%
Pruning Iteration 18/100




Iter [18/100], Train Loss: 0.8533, Train Acc: 67.72%, Val Loss: 0.8140, Val Acc: 68.55%
Pruning Iteration 19/100




Iter [19/100], Train Loss: 0.8335, Train Acc: 68.71%, Val Loss: 0.7971, Val Acc: 69.54%
Pruning Iteration 20/100




Iter [20/100], Train Loss: 0.8328, Train Acc: 67.94%, Val Loss: 0.8134, Val Acc: 68.28%
Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.8279, Train Acc: 68.77%, Val Loss: 0.8334, Val Acc: 68.04%
Pruning Iteration 22/100




Iter [22/100], Train Loss: 0.8173, Train Acc: 69.18%, Val Loss: 0.7775, Val Acc: 70.77%
Pruning Iteration 23/100




Iter [23/100], Train Loss: 0.8074, Train Acc: 69.69%, Val Loss: 0.8165, Val Acc: 69.39%
Pruning Iteration 24/100




Iter [24/100], Train Loss: 0.8183, Train Acc: 68.98%, Val Loss: 0.7634, Val Acc: 70.53%
Pruning Iteration 25/100




Iter [25/100], Train Loss: 0.7971, Train Acc: 69.70%, Val Loss: 0.7882, Val Acc: 70.53%
Pruning Iteration 26/100




Iter [26/100], Train Loss: 0.7932, Train Acc: 70.21%, Val Loss: 0.7823, Val Acc: 71.29%
Pruning Iteration 27/100




Iter [27/100], Train Loss: 0.7963, Train Acc: 69.81%, Val Loss: 0.7468, Val Acc: 72.32%
Pruning Iteration 28/100




Iter [28/100], Train Loss: 0.7864, Train Acc: 70.17%, Val Loss: 0.7537, Val Acc: 71.76%
Pruning Iteration 29/100




Iter [29/100], Train Loss: 0.7841, Train Acc: 70.44%, Val Loss: 0.7573, Val Acc: 70.77%
Pruning Iteration 30/100




Iter [30/100], Train Loss: 0.7817, Train Acc: 70.69%, Val Loss: 0.7547, Val Acc: 70.93%
Pruning Iteration 31/100




Iter [31/100], Train Loss: 0.7716, Train Acc: 71.03%, Val Loss: 0.7509, Val Acc: 71.68%
Pruning Iteration 32/100




Iter [32/100], Train Loss: 0.7685, Train Acc: 70.63%, Val Loss: 0.7420, Val Acc: 71.64%
Pruning Iteration 33/100




Iter [33/100], Train Loss: 0.7666, Train Acc: 71.17%, Val Loss: 0.7695, Val Acc: 71.01%
Pruning Iteration 34/100




Iter [34/100], Train Loss: 0.7532, Train Acc: 71.88%, Val Loss: 0.7378, Val Acc: 72.04%
Pruning Iteration 35/100




Iter [35/100], Train Loss: 0.7593, Train Acc: 71.62%, Val Loss: 0.7552, Val Acc: 72.32%
Pruning Iteration 36/100




Iter [36/100], Train Loss: 0.7455, Train Acc: 71.84%, Val Loss: 0.7357, Val Acc: 72.51%
Pruning Iteration 37/100




Iter [37/100], Train Loss: 0.7552, Train Acc: 71.76%, Val Loss: 0.7529, Val Acc: 71.05%
Pruning Iteration 38/100




Iter [38/100], Train Loss: 0.7527, Train Acc: 71.43%, Val Loss: 0.7740, Val Acc: 69.94%
Pruning Iteration 39/100




Iter [39/100], Train Loss: 0.7490, Train Acc: 71.99%, Val Loss: 0.7120, Val Acc: 72.44%
Pruning Iteration 40/100




Iter [40/100], Train Loss: 0.7410, Train Acc: 71.54%, Val Loss: 0.7820, Val Acc: 71.13%
Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.7296, Train Acc: 72.46%, Val Loss: 0.7345, Val Acc: 71.33%
Pruning Iteration 42/100




Iter [42/100], Train Loss: 0.7428, Train Acc: 71.86%, Val Loss: 0.7207, Val Acc: 73.39%
Pruning Iteration 43/100




Iter [43/100], Train Loss: 0.7330, Train Acc: 72.06%, Val Loss: 0.7111, Val Acc: 73.15%
Pruning Iteration 44/100




Iter [44/100], Train Loss: 0.7251, Train Acc: 72.64%, Val Loss: 0.7668, Val Acc: 70.81%
Pruning Iteration 45/100




Iter [45/100], Train Loss: 0.7281, Train Acc: 72.58%, Val Loss: 0.6967, Val Acc: 73.50%
Pruning Iteration 46/100




Iter [46/100], Train Loss: 0.7197, Train Acc: 72.56%, Val Loss: 0.7372, Val Acc: 71.88%
Pruning Iteration 47/100




Iter [47/100], Train Loss: 0.7249, Train Acc: 72.89%, Val Loss: 0.7206, Val Acc: 72.24%
Pruning Iteration 48/100




Iter [48/100], Train Loss: 0.7125, Train Acc: 73.21%, Val Loss: 0.7010, Val Acc: 73.74%
Pruning Iteration 49/100




Iter [49/100], Train Loss: 0.7212, Train Acc: 72.94%, Val Loss: 0.7486, Val Acc: 72.08%
Pruning Iteration 50/100




Iter [50/100], Train Loss: 0.7028, Train Acc: 73.68%, Val Loss: 0.7445, Val Acc: 71.92%
Pruning Iteration 51/100




Iter [51/100], Train Loss: 0.7135, Train Acc: 73.08%, Val Loss: 0.6947, Val Acc: 73.86%
Pruning Iteration 52/100




Iter [52/100], Train Loss: 0.7161, Train Acc: 73.33%, Val Loss: 0.7221, Val Acc: 72.40%
Pruning Iteration 53/100




Iter [53/100], Train Loss: 0.7094, Train Acc: 73.51%, Val Loss: 0.7391, Val Acc: 72.32%
Pruning Iteration 54/100




Iter [54/100], Train Loss: 0.7034, Train Acc: 73.62%, Val Loss: 0.6942, Val Acc: 73.94%
Pruning Iteration 55/100




Iter [55/100], Train Loss: 0.6952, Train Acc: 73.88%, Val Loss: 0.6888, Val Acc: 73.90%
Pruning Iteration 56/100




Iter [56/100], Train Loss: 0.6975, Train Acc: 73.66%, Val Loss: 0.7177, Val Acc: 73.27%
Pruning Iteration 57/100




Iter [57/100], Train Loss: 0.6936, Train Acc: 73.90%, Val Loss: 0.7168, Val Acc: 73.58%
Pruning Iteration 58/100




Iter [58/100], Train Loss: 0.6971, Train Acc: 73.56%, Val Loss: 0.7390, Val Acc: 71.80%
Pruning Iteration 59/100




Iter [59/100], Train Loss: 0.6933, Train Acc: 74.09%, Val Loss: 0.6838, Val Acc: 74.65%
Pruning Iteration 60/100




Iter [60/100], Train Loss: 0.6942, Train Acc: 73.90%, Val Loss: 0.7262, Val Acc: 72.36%
Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.7003, Train Acc: 73.80%, Val Loss: 0.6849, Val Acc: 74.30%
Pruning Iteration 62/100




Iter [62/100], Train Loss: 0.6878, Train Acc: 74.26%, Val Loss: 0.6815, Val Acc: 74.18%
Pruning Iteration 63/100




Iter [63/100], Train Loss: 0.6820, Train Acc: 74.42%, Val Loss: 0.7089, Val Acc: 73.31%
Pruning Iteration 64/100




Iter [64/100], Train Loss: 0.6692, Train Acc: 74.70%, Val Loss: 0.8549, Val Acc: 69.39%
Pruning Iteration 65/100




Iter [65/100], Train Loss: 0.6768, Train Acc: 74.43%, Val Loss: 0.7103, Val Acc: 73.19%
Pruning Iteration 66/100




Iter [66/100], Train Loss: 0.6754, Train Acc: 74.57%, Val Loss: 0.7766, Val Acc: 71.13%
Pruning Iteration 67/100




Iter [67/100], Train Loss: 0.6766, Train Acc: 74.60%, Val Loss: 0.6993, Val Acc: 74.81%
Pruning Iteration 68/100




Iter [68/100], Train Loss: 0.6667, Train Acc: 75.14%, Val Loss: 0.6982, Val Acc: 73.03%
Pruning Iteration 69/100




Iter [69/100], Train Loss: 0.6809, Train Acc: 74.51%, Val Loss: 0.7040, Val Acc: 72.67%
Pruning Iteration 70/100




Iter [70/100], Train Loss: 0.6707, Train Acc: 74.92%, Val Loss: 0.6766, Val Acc: 75.09%
Pruning Iteration 71/100




Iter [71/100], Train Loss: 0.6644, Train Acc: 74.69%, Val Loss: 0.6783, Val Acc: 75.01%
Pruning Iteration 72/100




Iter [72/100], Train Loss: 0.6643, Train Acc: 75.01%, Val Loss: 0.7491, Val Acc: 71.60%
Pruning Iteration 73/100




Iter [73/100], Train Loss: 0.6663, Train Acc: 75.10%, Val Loss: 0.6864, Val Acc: 74.50%
Pruning Iteration 74/100




Iter [74/100], Train Loss: 0.6710, Train Acc: 74.64%, Val Loss: 0.7186, Val Acc: 73.19%
Pruning Iteration 75/100




Iter [75/100], Train Loss: 0.6588, Train Acc: 75.44%, Val Loss: 0.7110, Val Acc: 72.55%
Pruning Iteration 76/100




Iter [76/100], Train Loss: 0.6624, Train Acc: 75.30%, Val Loss: 0.7638, Val Acc: 71.56%
Pruning Iteration 77/100




Iter [77/100], Train Loss: 0.6614, Train Acc: 75.58%, Val Loss: 0.6919, Val Acc: 73.98%
Pruning Iteration 78/100




Iter [78/100], Train Loss: 0.6558, Train Acc: 75.27%, Val Loss: 0.6578, Val Acc: 75.13%
Pruning Iteration 79/100




Iter [79/100], Train Loss: 0.6602, Train Acc: 75.34%, Val Loss: 0.7764, Val Acc: 69.54%
Pruning Iteration 80/100




Iter [80/100], Train Loss: 0.6573, Train Acc: 75.59%, Val Loss: 0.6883, Val Acc: 74.65%
Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.6634, Train Acc: 75.24%, Val Loss: 0.6753, Val Acc: 74.65%
Pruning Iteration 82/100




Iter [82/100], Train Loss: 0.6487, Train Acc: 75.92%, Val Loss: 0.6856, Val Acc: 74.42%
Pruning Iteration 83/100




Iter [83/100], Train Loss: 0.6638, Train Acc: 75.17%, Val Loss: 0.6843, Val Acc: 74.85%
Pruning Iteration 84/100




Iter [84/100], Train Loss: 0.6539, Train Acc: 75.79%, Val Loss: 0.6784, Val Acc: 74.18%
Pruning Iteration 85/100




Iter [85/100], Train Loss: 0.6506, Train Acc: 75.54%, Val Loss: 0.6830, Val Acc: 74.53%
Pruning Iteration 86/100




Iter [86/100], Train Loss: 0.6500, Train Acc: 75.83%, Val Loss: 0.6829, Val Acc: 74.46%
Pruning Iteration 87/100




Iter [87/100], Train Loss: 0.6535, Train Acc: 75.50%, Val Loss: 0.6715, Val Acc: 75.41%
Pruning Iteration 88/100




Iter [88/100], Train Loss: 0.6462, Train Acc: 75.64%, Val Loss: 0.6907, Val Acc: 74.81%
Pruning Iteration 89/100




Iter [89/100], Train Loss: 0.6484, Train Acc: 75.88%, Val Loss: 0.6542, Val Acc: 75.76%
Pruning Iteration 90/100




Iter [90/100], Train Loss: 0.6425, Train Acc: 76.12%, Val Loss: 0.6883, Val Acc: 74.18%
Pruning Iteration 91/100




Iter [91/100], Train Loss: 0.6405, Train Acc: 75.88%, Val Loss: 0.6673, Val Acc: 75.92%
Pruning Iteration 92/100




Iter [92/100], Train Loss: 0.6435, Train Acc: 75.74%, Val Loss: 0.7126, Val Acc: 73.15%
Pruning Iteration 93/100




Iter [93/100], Train Loss: 0.6503, Train Acc: 75.59%, Val Loss: 0.6652, Val Acc: 74.93%
Pruning Iteration 94/100




Iter [94/100], Train Loss: 0.6250, Train Acc: 76.94%, Val Loss: 0.6838, Val Acc: 73.47%
Pruning Iteration 95/100




Iter [95/100], Train Loss: 0.6426, Train Acc: 76.24%, Val Loss: 0.7266, Val Acc: 73.19%
Pruning Iteration 96/100




Iter [96/100], Train Loss: 0.6438, Train Acc: 76.10%, Val Loss: 0.7302, Val Acc: 73.31%
Pruning Iteration 97/100




Iter [97/100], Train Loss: 0.6373, Train Acc: 76.33%, Val Loss: 0.6616, Val Acc: 75.84%
Pruning Iteration 98/100




Iter [98/100], Train Loss: 0.6390, Train Acc: 76.05%, Val Loss: 0.6775, Val Acc: 74.65%
Pruning Iteration 99/100




Iter [99/100], Train Loss: 0.6335, Train Acc: 76.41%, Val Loss: 0.7021, Val Acc: 72.63%
Pruning Iteration 100/100


                                                                                       

Iter [100/100], Train Loss: 0.6392, Train Acc: 75.99%, Val Loss: 0.6706, Val Acc: 74.34%




In [38]:
 # forcing low weight connections to be set to 0
with torch.no_grad():  # disable gradient tracking for efficiency
  # weight threshold to prune
  threshold = torch.cat([torch.flatten(torch.absolute(model.get_parameter(i))) for i in weights]).quantile(sparsity)
  for name, param in model.named_parameters():
      if "weight" in name:  # only apply to weights, skip biases
          param[torch.absolute(param) < threshold] = 0

In [39]:
validate(model, val_loader, criterion, device)



(0.6444189482474629, 75.60396039603961)

In [40]:
def calculate_model_sparsity(model):
    total_elements = 0
    zero_elements = 0

    for name, param in model.named_parameters():
        if "weight" in name:  # Consider only trainable parameters
            num_elements = param.numel()
            num_zeros = torch.sum(param <= 1e-4 * 4).item()

            total_elements += num_elements
            zero_elements += num_zeros

    if total_elements == 0:
        return 0.0  # Handle case of no trainable parameters

    sparsity = zero_elements / total_elements
    return sparsity

In [41]:
calculate_model_sparsity(model)

0.9319615550872643

In [42]:
model_name = f'pruning_l1_iterative_sparisity{sparsity}_N{N}_lr{lr}_iter{iter}_forced_sparsity.pt'
torch.save(model.state_dict(), model_name, _use_new_zipfile_serialization=False)

# Creating Pareto Frontiers

In [121]:
from google.colab import files

In [134]:
def create_pareto_point(sparsity_p, reg=1e-3, N_p=500, lr_p=0.001, num_iter_p=100):
  model = ConvNet()

  model.load_state_dict(torch.load('my_model_weights_1.pt'))
  model.to(device)

  criterion = torch.nn.CrossEntropyLoss()

  optim = torch.optim.Adam(model.parameters(), lr=lr_p, weight_decay=reg)
  print(optim)

  # Main training loop

  for iter in range(num_iter_p):
    if iter % 20 == 0:
      print(f"Pruning Iteration {iter+1}/{num_iter_p}")

    # Training
    train_loss, train_accuracy = train_n_steps(model, train_loader, optim, criterion, device,
                                               n_steps = N_p, s = sparsity_p)

    # Validation
    val_loss, val_accuracy = validate(model, val_loader, criterion, device)


    # Print epoch results
    if iter % 20 == 0:
      print(f'Iter [{iter+1}/{num_iter}], '
            f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, '
            f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

  acc = validate(model, val_loader, criterion, device)
  acc = acc[1]
  obs_sparsity = calculate_model_sparsity(model)


  #model_name = f'pruning_l1_iterative_sparsity{sparsity}_reg_{reg}_N{N}_lr{lr}_iter{iter}.pt'
  #torch.save(model.state_dict(), model_name, _use_new_zipfile_serialization=False)

  print(f'Param: {sparsity_p}, Sparsity:{obs_sparsity}, Acc: {acc}')

  # forcing low weight connections to be set to 0
  with torch.no_grad():  # disable gradient tracking for efficiency
    # weight threshold to prune
    threshold = torch.cat([torch.flatten(torch.absolute(model.get_parameter(i))) for i in weights]).quantile(sparsity_p)
    for name, param in model.named_parameters():
        if "weight" in name:  # only apply to weights, skip biases
            param[torch.absolute(param) < threshold] = 0

  man_set_acc = validate(model, val_loader, criterion, device)
  man_set_acc = man_set_acc[1]
  man_set_obs_sparsity = calculate_model_sparsity(model)



  return sparsity_p, obs_sparsity, acc, man_set_obs_sparsity, man_set_acc

In [135]:
sparsity_levels = [0.99, 0.98, 0.97, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]


In [136]:
df = pd.DataFrame(columns=['sparsity_param', 'sparsity', 'accuracy', 'man_sparsity', 'man_acc'])
l2 = 1e-7
for i in sparsity_levels:
  df.loc[len(df)] = create_pareto_point(sparsity_p = i, reg=l2)
df.to_csv('reg_1e-7_pareto_frontiers.csv', index=False)
files.download('reg_1e-7_pareto_frontiers.csv')

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.3851, Train Acc: 39.91%, Val Loss: 1.2770, Val Acc: 46.46%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 1.1253, Train Acc: 54.59%, Val Loss: 1.0243, Val Acc: 60.16%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 1.0868, Train Acc: 56.23%, Val Loss: 1.0099, Val Acc: 59.96%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 1.0716, Train Acc: 57.21%, Val Loss: 0.9861, Val Acc: 61.23%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 1.0676, Train Acc: 57.75%, Val Loss: 0.9863, Val Acc: 60.71%




Param: 0.99, Sparsity:0.8083461663154482, Acc: 56.99009900990099




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.3046, Train Acc: 45.26%, Val Loss: 1.1345, Val Acc: 54.06%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 1.0105, Train Acc: 60.77%, Val Loss: 0.9535, Val Acc: 62.46%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.9851, Train Acc: 61.63%, Val Loss: 0.8968, Val Acc: 64.63%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.9536, Train Acc: 62.58%, Val Loss: 0.9324, Val Acc: 63.17%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.9529, Train Acc: 62.51%, Val Loss: 0.9307, Val Acc: 63.45%




Param: 0.98, Sparsity:0.7163792754093046, Acc: 67.04950495049505




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.2123, Train Acc: 50.90%, Val Loss: 1.1126, Val Acc: 54.14%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.9786, Train Acc: 62.14%, Val Loss: 1.0237, Val Acc: 59.76%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.9387, Train Acc: 63.44%, Val Loss: 0.8818, Val Acc: 65.90%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.9218, Train Acc: 64.66%, Val Loss: 0.8724, Val Acc: 65.74%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.9092, Train Acc: 64.59%, Val Loss: 0.8653, Val Acc: 67.41%




Param: 0.97, Sparsity:0.6836855649213811, Acc: 68.07920792079207




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.1685, Train Acc: 52.85%, Val Loss: 1.0728, Val Acc: 56.79%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.9113, Train Acc: 64.79%, Val Loss: 0.8611, Val Acc: 67.09%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.8834, Train Acc: 65.84%, Val Loss: 0.8720, Val Acc: 65.11%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.8564, Train Acc: 67.23%, Val Loss: 0.8085, Val Acc: 68.79%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.8405, Train Acc: 67.78%, Val Loss: 0.8082, Val Acc: 68.99%




Param: 0.95, Sparsity:0.6224705516831469, Acc: 68.87128712871286




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.0777, Train Acc: 57.16%, Val Loss: 1.0745, Val Acc: 56.55%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.8287, Train Acc: 68.17%, Val Loss: 0.8388, Val Acc: 67.76%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.7725, Train Acc: 70.51%, Val Loss: 0.8490, Val Acc: 66.85%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.7414, Train Acc: 71.44%, Val Loss: 0.7604, Val Acc: 70.53%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.7026, Train Acc: 73.08%, Val Loss: 0.7534, Val Acc: 70.65%




Param: 0.9, Sparsity:0.5698840303668882, Acc: 70.4950495049505




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.0371, Train Acc: 59.06%, Val Loss: 0.9570, Val Acc: 62.53%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.7713, Train Acc: 70.49%, Val Loss: 0.9230, Val Acc: 64.04%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.6958, Train Acc: 73.22%, Val Loss: 0.7946, Val Acc: 70.06%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.6492, Train Acc: 75.17%, Val Loss: 0.7542, Val Acc: 70.97%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.6267, Train Acc: 76.12%, Val Loss: 0.7849, Val Acc: 71.64%




Param: 0.85, Sparsity:0.577296766088507, Acc: 71.48514851485149




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 1.0042, Train Acc: 60.23%, Val Loss: 0.9232, Val Acc: 63.68%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.7323, Train Acc: 72.24%, Val Loss: 0.8033, Val Acc: 69.15%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.6541, Train Acc: 74.94%, Val Loss: 0.7661, Val Acc: 70.97%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.6014, Train Acc: 77.04%, Val Loss: 0.7768, Val Acc: 70.42%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.5499, Train Acc: 79.17%, Val Loss: 0.7533, Val Acc: 72.24%




Param: 0.8, Sparsity:0.5713817744637164, Acc: 72.79207920792079




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9871, Train Acc: 61.25%, Val Loss: 0.9972, Val Acc: 61.23%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.7087, Train Acc: 72.74%, Val Loss: 0.7746, Val Acc: 69.94%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.6004, Train Acc: 77.32%, Val Loss: 0.7728, Val Acc: 71.88%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.5494, Train Acc: 79.25%, Val Loss: 0.7584, Val Acc: 71.41%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.5031, Train Acc: 80.58%, Val Loss: 0.7714, Val Acc: 71.76%




Param: 0.75, Sparsity:0.5653604041713947, Acc: 73.42574257425743




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9591, Train Acc: 62.56%, Val Loss: 0.8901, Val Acc: 64.32%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.6781, Train Acc: 74.00%, Val Loss: 0.7482, Val Acc: 71.37%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.5674, Train Acc: 78.82%, Val Loss: 0.7386, Val Acc: 72.40%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.4939, Train Acc: 80.99%, Val Loss: 0.7634, Val Acc: 71.21%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.4744, Train Acc: 82.33%, Val Loss: 0.7888, Val Acc: 71.09%




Param: 0.7, Sparsity:0.5757787593883396, Acc: 72.35643564356435




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9560, Train Acc: 62.79%, Val Loss: 0.9659, Val Acc: 60.91%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.6571, Train Acc: 74.74%, Val Loss: 0.7651, Val Acc: 69.90%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.5300, Train Acc: 79.63%, Val Loss: 0.7871, Val Acc: 71.13%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.4792, Train Acc: 81.74%, Val Loss: 0.7806, Val Acc: 71.64%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.4407, Train Acc: 83.57%, Val Loss: 0.8123, Val Acc: 70.77%




Param: 0.65, Sparsity:0.578774247581996, Acc: 71.20792079207921




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9425, Train Acc: 63.02%, Val Loss: 0.9326, Val Acc: 62.30%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.6362, Train Acc: 75.86%, Val Loss: 0.7582, Val Acc: 70.69%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.5064, Train Acc: 80.96%, Val Loss: 0.7529, Val Acc: 72.79%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.4507, Train Acc: 83.04%, Val Loss: 0.8002, Val Acc: 72.08%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.4081, Train Acc: 84.75%, Val Loss: 0.7853, Val Acc: 71.76%




Param: 0.6, Sparsity:0.5764001458907441, Acc: 71.96039603960396




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9498, Train Acc: 63.05%, Val Loss: 0.9267, Val Acc: 63.52%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.6030, Train Acc: 77.35%, Val Loss: 0.7526, Val Acc: 71.52%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.4724, Train Acc: 82.63%, Val Loss: 0.7827, Val Acc: 72.04%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.4119, Train Acc: 84.75%, Val Loss: 0.8189, Val Acc: 72.48%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.3833, Train Acc: 85.75%, Val Loss: 0.7700, Val Acc: 73.31%




Param: 0.5, Sparsity:0.5799173961203868, Acc: 71.92079207920793




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9513, Train Acc: 63.06%, Val Loss: 0.9018, Val Acc: 63.68%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.5716, Train Acc: 78.41%, Val Loss: 0.8159, Val Acc: 70.22%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.4438, Train Acc: 83.47%, Val Loss: 0.7628, Val Acc: 73.11%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.3907, Train Acc: 85.46%, Val Loss: 0.8158, Val Acc: 72.75%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.3511, Train Acc: 87.34%, Val Loss: 0.8146, Val Acc: 72.67%




Param: 0.4, Sparsity:0.5763326038796132, Acc: 72.0




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9376, Train Acc: 63.54%, Val Loss: 0.8790, Val Acc: 65.47%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.5502, Train Acc: 78.74%, Val Loss: 0.7537, Val Acc: 72.28%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.4333, Train Acc: 83.83%, Val Loss: 0.7767, Val Acc: 73.07%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.3707, Train Acc: 86.24%, Val Loss: 0.7961, Val Acc: 73.35%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.3515, Train Acc: 87.30%, Val Loss: 0.8003, Val Acc: 73.70%




Param: 0.3, Sparsity:0.5667551467012482, Acc: 73.82178217821782




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9434, Train Acc: 63.14%, Val Loss: 0.9741, Val Acc: 61.35%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.5481, Train Acc: 79.44%, Val Loss: 0.7868, Val Acc: 71.41%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.4224, Train Acc: 84.39%, Val Loss: 0.7511, Val Acc: 73.90%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.3549, Train Acc: 86.98%, Val Loss: 0.7957, Val Acc: 73.54%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.3204, Train Acc: 88.51%, Val Loss: 0.8241, Val Acc: 72.91%




Param: 0.2, Sparsity:0.5892061112011672, Acc: 73.94059405940594




Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-07
)
Pruning Iteration 1/100




Iter [1/100], Train Loss: 0.9421, Train Acc: 63.55%, Val Loss: 0.9130, Val Acc: 63.33%




Pruning Iteration 21/100




Iter [21/100], Train Loss: 0.5542, Train Acc: 79.15%, Val Loss: 0.7832, Val Acc: 71.52%




Pruning Iteration 41/100




Iter [41/100], Train Loss: 0.4191, Train Acc: 84.59%, Val Loss: 0.8262, Val Acc: 71.21%




Pruning Iteration 61/100




Iter [61/100], Train Loss: 0.3570, Train Acc: 87.03%, Val Loss: 0.8480, Val Acc: 72.51%




Pruning Iteration 81/100




Iter [81/100], Train Loss: 0.3350, Train Acc: 88.00%, Val Loss: 0.8932, Val Acc: 72.59%




Param: 0.1, Sparsity:0.578260928297401, Acc: 72.5940594059406




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [137]:
df

Unnamed: 0,sparsity_param,sparsity,accuracy,man_sparsity,man_acc
0,0.99,0.808346,56.990099,0.995362,45.623762
1,0.98,0.716379,67.049505,0.991422,59.247525
2,0.97,0.683686,68.079208,0.987636,63.089109
3,0.95,0.622471,68.871287,0.979744,67.841584
4,0.9,0.569884,70.49505,0.958571,72.594059
5,0.85,0.577297,71.485149,0.937345,72.594059
6,0.8,0.571382,72.792079,0.916223,73.306931
7,0.75,0.56536,73.425743,0.894192,73.386139
8,0.7,0.575779,72.356436,0.871827,73.584158
9,0.65,0.578774,71.207921,0.852031,72.237624


In [138]:
files.download('reg_1e-7_pareto_frontiers.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>