In [1]:
import sys
sys.path.append('../framework')
from NetworkClass import Network
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision
import torch
import torch.nn as nn
import copy


In [2]:
class ReshapeTransform:
    def __init__(self, new_size):
        self.new_size = new_size

    def __call__(self, img):
        return torch.reshape(img, self.new_size)

In [3]:
# Define Constants
batch_size_train = 64
batch_size_test = 1000
n_epochs = 3
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1

In [4]:
# Model Definition
model_dict = {
        "network":{
            'input_layer': {
                "units": 784,
                
                },
            'hidden_layer': [{
                    "units": 400, 
                    "type": "Linear"
                }, 
                {
                    "units": 50, 
                    "activation": "relu",
                    "type": "Linear"

                }],
            'output_layer': {
                "units": 10,
                "activation": "softmax",
                "type": "Linear"
                }
        }
    }

model = Network(model_dict)

In [6]:
for (layer, param) in enumerate(model.parameters()):
    print("Layer {} , Parameters: {}".format(layer, param.shape))

Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [7]:
# Load Datasets

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_train, shuffle=True)


test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [10]:
# Define Loss and Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [12]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [13]:
def train(mod, optim, epoch):
  mod.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optim.zero_grad()
    output = mod(data)
    loss = criterion(output, target)
    loss.backward()
    optim.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [14]:
def test(mod):
  mod.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = mod(data)
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [15]:
test(model)

# Pruning every 3 epochs
for epoch in range(1, n_epochs + 1):
  train(model, optimizer, epoch)
  test(model)
    


  x = self.activation_scheme[-1](self.output_layer(x))



Test set: Avg. loss: 0.0023, Accuracy: 1209/10000 (12%)


Test set: Avg. loss: 0.0022, Accuracy: 5264/10000 (53%)




Test set: Avg. loss: 0.0018, Accuracy: 7296/10000 (73%)


Test set: Avg. loss: 0.0017, Accuracy: 7550/10000 (76%)



In [16]:
def prune_neurons(nn_model):
    print("Pre-Pruning/n")
    for (layer, param) in enumerate(nn_model.parameters()):
        print("Layer {} , Parameters: {}".format(layer, param.shape))    
        
    # 1 neuron is pruned from each layer based on the minimum L1 norm.
    neurons_to_prune = []
    for p in nn_model.parameters():
            if len(p.data.size()) != 1:
                normed_weights = p.data.abs()
                l1_norm_layer = []
                for neuron_idx in range(normed_weights.shape[0]):
                    l1_norm_layer.append(torch.sum(normed_weights[neuron_idx, :]).item())
                neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))
                neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))

                
    # Modify the model parameters to update the shape of the network after pruning
    param_list = list(nn_model.parameters())
    for i, neuron_idx in enumerate(neurons_to_prune):
        idx_weights = param_list[i]
        if i < len(param_list) - 2:
            y = torch.cat((idx_weights[0:neuron_idx], idx_weights[neuron_idx+1:]))
            if i > 1 and len(idx_weights.shape) > 1:

                y = torch.cat((y[:, 0:neuron_idx-1], y[:, neuron_idx:]), axis=1)
        elif i > 1 and len(idx_weights.shape) > 1:
            y = torch.cat((idx_weights[:, 0:neuron_idx-1], idx_weights[:, neuron_idx:]), axis=1)
        else:
            y = idx_weights
        idx_weights.data = y  
       
    print("Post Pruning /n")
    for (layer, param) in enumerate(nn_model.parameters()):
        print("Layer {} , Parameters: {}".format(layer, param.shape))   
        
    return nn_model

In [17]:
def prune_train(model, optimizer):
    new_model = copy.deepcopy(model)
    new_optimizer = copy.deepcopy(optimizer)
    
    # Get new model after pruning
    new_model = prune_neurons(new_model)
    new_model_dict = copy.deepcopy(model_dict)
    
    
    for i, layer in enumerate(model_dict['network']['hidden_layer']):
        if i == 0:

            new_model_dict['network']['hidden_layer'][i]['units'] = new_model.state_dict()['input_layer.weight'].shape[0]
        else:
            new_model_dict['network']['hidden_layer'][i]['units'] = new_model.state_dict()['hidden_layers.{}.weight'.format(i-1)].shape[0]

    # Create new model based on updated network definition
    updated_model = Network(new_model_dict)
    
    # Load previously trained parameters into the new model
    updated_model.load_state_dict(new_model.state_dict())
    
    # Update Optimizers and set state from the previous optimizer
    criterion = nn.CrossEntropyLoss()
    updated_optimizer = torch.optim.SGD(updated_model.parameters(), lr=learning_rate)
    updated_optimizer.load_state_dict(new_optimizer.state_dict())
    
    # Train and test the model
    for epoch in range(1, n_epochs + 1):
      train(updated_model, updated_optimizer, epoch)
      test(updated_model)
        
    return updated_model, updated_optimizer

In [18]:
# Perform Pruning 5 times in succession
for i in range(5):
    model, optimizer = prune_train(model, optimizer)


Pre-Pruning/n
Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])
Post Pruning /n
Layer 0 , Parameters: torch.Size([399, 784])
Layer 1 , Parameters: torch.Size([399])
Layer 2 , Parameters: torch.Size([49, 399])
Layer 3 , Parameters: torch.Size([49])
Layer 4 , Parameters: torch.Size([10, 49])
Layer 5 , Parameters: torch.Size([10])
softmax

Test set: Avg. loss: 0.0017, Accuracy: 7547/10000 (75%)




Test set: Avg. loss: 0.0017, Accuracy: 7603/10000 (76%)


Test set: Avg. loss: 0.0017, Accuracy: 7633/10000 (76%)

Pre-Pruning/n
Layer 0 , Parameters: torch.Size([399, 784])
Layer 1 , Parameters: torch.Size([399])
Layer 2 , Parameters: torch.Size([49, 399])
Layer 3 , Parameters: torch.Size([49])
Layer 4 , Parameters: torch.Size([10, 49])
Layer 5 , Parameters: torch.Size([10])
Post Pruning /n
Layer 0 , Parameters: torch.Size([398, 784])
Layer 1 , Parameters: torch.Size([398])
Layer 2 , Parameters: torch.Size([48, 398])
Layer 3 , Parameters: torch.Size([48])
Layer 4 , Parameters: torch.Size([10, 48])
Layer 5 , Parameters: torch.Size([10])
softmax



Test set: Avg. loss: 0.0017, Accuracy: 7623/10000 (76%)



KeyboardInterrupt: 