In [1]:
import sys
sys.path.append('../framework')
from NetworkClass import Network
import torch.nn.functional as F
from torch import nn

In [2]:
model = Network({
        "network":{
            'input_layer': {
                "units": 784,
                
                },
            'hidden_layer': [{
                    "units": 400, 
                    "type": "Linear"
                }, 
                {
                    "units": 50, 
                    "activation": "relu",
                    "type": "Linear"

                }],
            'output_layer': {
                "units": 10,
                "activation": "softmax",
                "type": "Linear"
                }
        }
    })

softmax


In [3]:
for (layer, param) in enumerate(model.parameters()):
    print("Layer {} , Parameters: {}".format(layer, param.shape))

Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [4]:
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision
import torch
import torch.nn as nn


In [5]:
batch_size_train = 64
batch_size_test = 1000
n_epochs = 3
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1

In [6]:
class ReshapeTransform:
    def __init__(self, new_size):
        self.new_size = new_size

    def __call__(self, img):
        return torch.reshape(img, self.new_size)

In [7]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_train, shuffle=True)


test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [8]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [9]:
example_data.shape

torch.Size([1000, 784])

In [10]:
# import matplotlib.pyplot as plt

# fig = plt.figure()
# for i in range(6):
#   plt.subplot(2,3,i+1)
#   plt.tight_layout()
#   plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
#   plt.title("Ground Truth: {}".format(example_targets[i]))
#   plt.xticks([])
#   plt.yticks([])

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [12]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [13]:
def train(mod, optim, epoch):
  mod.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optim.zero_grad()
    output = mod(data)
    loss = criterion(output, target)
    loss.backward()
    optim.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [14]:
def test(mod):
  mod.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = mod(data)
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [15]:
test(model)

# Pruning every 3 epochs
for epoch in range(1, n_epochs + 1):
  train(model, optimizer, epoch)
  test(model)
    


  x = self.activation_scheme[-1](self.output_layer(x))



Test set: Avg. loss: 0.0023, Accuracy: 1050/10000 (10%)


Test set: Avg. loss: 0.0021, Accuracy: 4473/10000 (45%)




Test set: Avg. loss: 0.0019, Accuracy: 6220/10000 (62%)


Test set: Avg. loss: 0.0017, Accuracy: 7492/10000 (75%)



Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [17]:
def prune_neurons(nn_model):
    print("Pre-Pruning/n")
    for (layer, param) in enumerate(nn_model.parameters()):
        print("Layer {} , Parameters: {}".format(layer, param.shape))    
    neurons_to_prune = []
    for p in nn_model.parameters():
            if len(p.data.size()) != 1:
                normed_weights = p.data.abs()
                l1_norm_layer = []
                for neuron_idx in range(normed_weights.shape[0]):
                    l1_norm_layer.append(torch.sum(normed_weights[neuron_idx, :]).item())
                neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))
                neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))


    param_list = list(nn_model.parameters())
    for i, neuron_idx in enumerate(neurons_to_prune):
        idx_weights = param_list[i]
        if i < len(param_list) - 2:
            y = torch.cat((idx_weights[0:neuron_idx], idx_weights[neuron_idx+1:]))
            if i > 1 and len(idx_weights.shape) > 1:

                y = torch.cat((y[:, 0:neuron_idx-1], y[:, neuron_idx:]), axis=1)
        elif i > 1 and len(idx_weights.shape) > 1:
            y = torch.cat((idx_weights[:, 0:neuron_idx-1], idx_weights[:, neuron_idx:]), axis=1)
        else:
            y = idx_weights
        idx_weights.data = y  
       
    print("Post Pruning /n")
    for (layer, param) in enumerate(nn_model.parameters()):
        print("Layer {} , Parameters: {}".format(layer, param.shape))   
        
    return nn_model

Layer 0 , Parameters: torch.Size([399, 784])
Layer 1 , Parameters: torch.Size([399])
Layer 2 , Parameters: torch.Size([49, 399])
Layer 3 , Parameters: torch.Size([49])
Layer 4 , Parameters: torch.Size([10, 49])
Layer 5 , Parameters: torch.Size([10])


In [36]:
model.state_dict()['input_layer.weight']

tensor([[ 0.0166, -0.0226, -0.0272,  ...,  0.0007,  0.0326,  0.0082],
        [-0.0242, -0.0241, -0.0240,  ..., -0.0253, -0.0246,  0.0143],
        [-0.0166, -0.0213,  0.0067,  ...,  0.0309, -0.0081, -0.0047],
        ...,
        [-0.0211, -0.0347, -0.0168,  ..., -0.0312, -0.0197, -0.0021],
        [ 0.0035, -0.0318,  0.0043,  ...,  0.0283, -0.0242,  0.0234],
        [ 0.0329,  0.0300,  0.0072,  ..., -0.0275, -0.0193, -0.0343]])

In [37]:
updated_model =  Network({
        "network":{
            'input_layer': {
                "units": 784,
                
                },
            'hidden_layer': [{
                    "units": 399, 
                    "type": "Linear"
                }, 
                {
                    "units": 49, 
                    "activation": "relu",
                    "type": "Linear"

                }],
            'output_layer': {
                "units": 10,
                "activation": "softmax",
                "type": "Linear"
                }
        }
    })

softmax


In [38]:
updated_model.load_state_dict(model.state_dict())


<All keys matched successfully>

In [41]:
list(updated_model.parameters())

[Parameter containing:
 tensor([[ 0.0166, -0.0226, -0.0272,  ...,  0.0007,  0.0326,  0.0082],
         [-0.0242, -0.0241, -0.0240,  ..., -0.0253, -0.0246,  0.0143],
         [-0.0166, -0.0213,  0.0067,  ...,  0.0309, -0.0081, -0.0047],
         ...,
         [-0.0211, -0.0347, -0.0168,  ..., -0.0312, -0.0197, -0.0021],
         [ 0.0035, -0.0318,  0.0043,  ...,  0.0283, -0.0242,  0.0234],
         [ 0.0329,  0.0300,  0.0072,  ..., -0.0275, -0.0193, -0.0343]],
        requires_grad=True), Parameter containing:
 tensor([ 0.0280,  0.0296,  0.0370, -0.0279,  0.0030,  0.0288,  0.0099, -0.0033,
          0.0372,  0.0282, -0.0069, -0.0379, -0.0207,  0.0043, -0.0186, -0.0329,
         -0.0096, -0.0285,  0.0250,  0.0132,  0.0359, -0.0307, -0.0269,  0.0046,
         -0.0181,  0.0319, -0.0270,  0.0178, -0.0002,  0.0338,  0.0021,  0.0095,
          0.0130,  0.0079, -0.0198,  0.0141,  0.0251, -0.0061,  0.0082, -0.0124,
          0.0227, -0.0033,  0.0189,  0.0100, -0.0321,  0.0261,  0.0133,  0.0337,

In [42]:
criterion = nn.CrossEntropyLoss()
updated_optimizer = torch.optim.SGD(updated_model.parameters(), lr=learning_rate)



In [43]:
updated_optimizer.load_state_dict(optimizer.state_dict())


In [44]:
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [140229386518248,
    140229386518320,
    140229386518392,
    140229386518464,
    140229386584136,
    140229386584208]}]}

In [46]:
test(updated_model)


Test set: Avg. loss: 0.0022, Accuracy: 1449/10000 (14%)



In [47]:

# Pruning every 3 epochs
for epoch in range(1, n_epochs + 1):
  train(updated_model, updated_optimizer, epoch)
  test(updated_model)
    


Test set: Avg. loss: 0.0017, Accuracy: 7546/10000 (75%)




Test set: Avg. loss: 0.0017, Accuracy: 7588/10000 (76%)


Test set: Avg. loss: 0.0017, Accuracy: 7630/10000 (76%)

