<a href="https://colab.research.google.com/github/shubhe25p/LeNet-Pruning-Colab/blob/master/compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
from torch.nn import Parameter
from torch.nn.modules.module import Module
import math
from sklearn.cluster import KMeans
from scipy.sparse import csc_matrix, csr_matrix
from collections import defaultdict, namedtuple
from heapq import heappush, heappop, heapify
import struct
from pathlib import Path
from scipy.sparse import csr_matrix, csc_matrix

In [None]:
args={
    "batch-size":50,
    "test-batch-size":1000,
    "epochs":100,
    "lr":0.01,
    "seed":42,
    "log-interval":10,
    "log":"log.txt",
    "sensitivity":2,
    "no_cuda":False
}
args["batch-size"]

50

In [None]:
torch.manual_seed(args["seed"])
use_cuda = not args["no_cuda"] and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else 'cpu')
if use_cuda:
    print("Yes CUDA!")
    torch.cuda.manual_seed(args["seed"])
else:
    print('No CUDA!!!')


Yes CUDA!


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
os.chdir('gdrive/My Drive/kaggle/self_compress') 
!ls

huffmancoding.py


In [None]:
kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args["batch-size"], shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args["test-batch-size"], shuffle=False, **kwargs)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


In [None]:
class PruningModule(Module):
    def prune_by_percentile(self, q=5.0, **kwargs):
        # Calculate percentile value
        alive_parameters = []
        for name, p in self.named_parameters():
            if 'bias' in name or 'mask' in name:
                continue
            tensor = p.data.cpu().numpy()
            alive = tensor[np.nonzero(tensor)] # flattened array of nonzero values
            alive_parameters.append(alive)

        all_alives = np.concatenate(alive_parameters)
        percentile_value = np.percentile(abs(all_alives), q)
        print(f'Pruning with threshold : {percentile_value}')

        for name, module in self.named_modules():
            if name in ['fc1', 'fc2', 'fc3']:
                module.prune(threshold=percentile_value)

    def prune_by_std(self, s=0.25):
        for name, module in self.named_modules():
            if name in ['fc1', 'fc2', 'fc3']:
                threshold = np.std(module.weight.data.cpu().numpy()) * s
                print(f'Pruning with threshold : {threshold} for layer {name}')
                module.prune(threshold)

class MaskeL(Module):
    def __init__(self, in_features, out_features, bias=True):
        super(MaskeL, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        self.mask = Parameter(torch.ones([out_features, in_features]), requires_grad=False)
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input):
        return F.linear(input, self.weight * self.mask, self.bias)

    def __repr__(self):
        return self.__class__.__name__ + '(' \
            + 'in_features=' + str(self.in_features) \
            + ', out_features=' + str(self.out_features) \
            + ', bias=' + str(self.bias is not None) + ')'

    def prune(self, threshold):
        weight_dev = self.weight.device
        mask_dev = self.mask.device
        tensor = self.weight.data.cpu().numpy()
        mask = self.mask.data.cpu().numpy()
        new_mask = np.where(abs(tensor) < threshold, 0, mask)
        self.weight.data = torch.from_numpy(tensor * new_mask).to(weight_dev)
        self.mask.data = torch.from_numpy(new_mask).to(mask_dev)







In [None]:
class LeNet(PruningModule):
    def __init__(self, mask=False):
        super(LeNet, self).__init__()
        linear = MaskeL if mask else nn.Linear
        self.fc1 = linear(784, 300)
        self.fc2 = linear(300, 100)
        self.fc3 = linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.fc3(x), dim=1)
        return x

In [None]:
model = LeNet(mask=True).to(device)
print(model)

LeNet(
  (fc1): MaskeL(in_features=784, out_features=300, bias=True)
  (fc2): MaskeL(in_features=300, out_features=100, bias=True)
  (fc3): MaskeL(in_features=100, out_features=10, bias=True)
)


In [None]:
optimizer = optim.Adam(model.parameters(), lr=args["lr"], weight_decay=0.0001)
initial_optimizer_state_dict = optimizer.state_dict()

def train(epochs):
    model.train()
    for epoch in range(epochs):
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for batch_idx, (data, target) in pbar:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()

            for name, p in model.named_parameters():
                if 'mask' in name:
                    continue
                tensor = p.data.cpu().numpy()
                grad_data = p.grad.data.cpu().numpy()
                grad_data = np.where(tensor==0, 0, grad_data)
                p.grad.data = torch.from_numpy(grad_data).to(device)

            optimizer.step()
            if batch_idx % args["log-interval"] == 0:
                done = batch_idx * len(data)
                percentage = 100. * batch_idx / len(train_loader)
                pbar.set_description(f'Train Epoch: {epoch} Loss: {loss.item():.6f}')


def test():
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] 
            correct += pred.eq(target.data.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f'Test : Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)')
    return accuracy


In [None]:
def log(filename, content):
    with open(filename, 'a') as f:
        content += "\n"
        f.write(content)

train(args["epochs"])
accuracy = test()
log(args["log"], f"initial_accuracy {accuracy}")


Train Epoch: 0 Loss: 0.110306: 100%|██████████| 1200/1200 [00:14<00:00, 80.61it/s]
Train Epoch: 1 Loss: 0.182219: 100%|██████████| 1200/1200 [00:14<00:00, 82.79it/s]
Train Epoch: 2 Loss: 0.370321: 100%|██████████| 1200/1200 [00:14<00:00, 80.22it/s]
Train Epoch: 3 Loss: 0.475764: 100%|██████████| 1200/1200 [00:14<00:00, 82.83it/s]
Train Epoch: 4 Loss: 0.120527: 100%|██████████| 1200/1200 [00:14<00:00, 81.23it/s]
Train Epoch: 5 Loss: 0.317719: 100%|██████████| 1200/1200 [00:14<00:00, 82.60it/s]
Train Epoch: 6 Loss: 0.347141: 100%|██████████| 1200/1200 [00:14<00:00, 81.67it/s]
Train Epoch: 7 Loss: 0.059793: 100%|██████████| 1200/1200 [00:14<00:00, 82.61it/s]
Train Epoch: 8 Loss: 0.166623: 100%|██████████| 1200/1200 [00:14<00:00, 82.15it/s]
Train Epoch: 9 Loss: 0.117478: 100%|██████████| 1200/1200 [00:14<00:00, 81.32it/s]
Train Epoch: 10 Loss: 0.107435: 100%|██████████| 1200/1200 [00:14<00:00, 81.82it/s]
Train Epoch: 11 Loss: 0.120158: 100%|██████████| 1200/1200 [00:14<00:00, 81.07it/s]
Tr

Test : Average loss: 0.2106, Accuracy: 9477/10000 (94.77%)


FileNotFoundError: ignored

In [None]:
os.mkdir('saves')
torch.save(model, f"saves/initial_model.ptmodel")
print("--- With pruning ---")
model.prune_by_std(args["sensitivity"])
accuracy = test()
log(args["log"], f"accuracy_after_pruning {accuracy}")

--- With pruning ---
Pruning with threshold : 0.2341970056295395 for layer fc1
Pruning with threshold : 0.1945437341928482 for layer fc2
Pruning with threshold : 0.21063780784606934 for layer fc3
Test : Average loss: 1.1871, Accuracy: 7056/10000 (70.56%)


In [None]:
print("--- Retraining ---")
optimizer.load_state_dict(initial_optimizer_state_dict) 
train(args["epochs"])
torch.save(model, f"saves/model_after_retraining.ptmodel")
accuracy = test()

--- Retraining ---


Train Epoch: 0 Loss: 0.041789: 100%|██████████| 1200/1200 [00:15<00:00, 79.44it/s]
Train Epoch: 1 Loss: 0.083689: 100%|██████████| 1200/1200 [00:14<00:00, 80.24it/s]
Train Epoch: 2 Loss: 0.117890: 100%|██████████| 1200/1200 [00:15<00:00, 78.61it/s]
Train Epoch: 3 Loss: 0.237558: 100%|██████████| 1200/1200 [00:15<00:00, 79.45it/s]
Train Epoch: 4 Loss: 0.045141: 100%|██████████| 1200/1200 [00:15<00:00, 78.65it/s]
Train Epoch: 5 Loss: 0.097599: 100%|██████████| 1200/1200 [00:15<00:00, 79.49it/s]
Train Epoch: 6 Loss: 0.067162: 100%|██████████| 1200/1200 [00:15<00:00, 78.55it/s]
Train Epoch: 7 Loss: 0.078319: 100%|██████████| 1200/1200 [00:15<00:00, 79.34it/s]
Train Epoch: 8 Loss: 0.026370: 100%|██████████| 1200/1200 [00:15<00:00, 78.64it/s]
Train Epoch: 9 Loss: 0.114019: 100%|██████████| 1200/1200 [00:15<00:00, 78.20it/s]
Train Epoch: 10 Loss: 0.063769: 100%|██████████| 1200/1200 [00:15<00:00, 77.46it/s]
Train Epoch: 11 Loss: 0.071750: 100%|██████████| 1200/1200 [00:15<00:00, 79.53it/s]
Tr

Test : Average loss: 0.1190, Accuracy: 9674/10000 (96.74%)


AttributeError: ignored

In [None]:
log(args["log"], f"accuracy_after_retraining {accuracy}")
def test(model, use_cuda=True):
    kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {}
    device = torch.device("cuda" if use_cuda else 'cpu')
    test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=1000, shuffle=False, **kwargs)
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
        print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)')
    return accuracy

#Weight sharing by KMeans Algorithm

def weight_sharing(model, bits=5):
  for module in model.children():
    dev = module.weight.device
    weight = module.weight.data.cpu().numpy()
    shape = weight.shape
    mat = csr_matrix(weight) if shape[0] < shape[1] else csc_matrix(weight)
    min_ = min(mat.data)
    max_ = max(mat.data)
    space = np.linspace(min_, max_, num=2**bits)
    kmeans = KMeans(n_clusters=len(space), init=space.reshape(-1,1), n_init=1, precompute_distances=True, algorithm="full")
    kmeans.fit(mat.data.reshape(-1,1))
    new_weight = kmeans.cluster_centers_[kmeans.labels_].reshape(-1)
    mat.data = new_weight
    module.weight.data = torch.from_numpy(mat.toarray()).to(dev)

!ls

data  huffmancoding.py	log.txt  saves


In [None]:
model = torch.load("saves/model_after_retraining.ptmodel")

In [None]:
print('accuracy before weight sharing')
test(model, use_cuda)
weight_sharing(model)
print('accuacy after weight sharing')
test(model, use_cuda)
os.makedirs('saves', exist_ok=True)
torch.save(model, f"saves/model_after_weight_sharing.ptmodel")

accuracy before weight sharing
Test set: Average loss: 0.1190, Accuracy: 9674/10000 (96.74%)
accuacy after weight sharing
Test set: Average loss: 0.1235, Accuracy: 9659/10000 (96.59%)


In [None]:
model=torch.load("saves/model_after_weight_sharing.ptmodel")
from huffmancoding import huffman_encode_model
huffman_encode_model(model)


Layer           |   original compressed improvement percent
----------------------------------------------------------------------
fc1.weight      |      88164      21476       4.11x  24.36%
fc1.bias        |       1200       1200       1.00x 100.00%
fc2.weight      |      11692       3221       3.63x  27.55%
fc2.bias        |        400        400       1.00x 100.00%
fc3.weight      |        636        397       1.60x  62.42%
fc3.bias        |         40         40       1.00x 100.00%
----------------------------------------------------------------------
total           |     102132      26734       3.82x  26.18%
