In [54]:
import sys
sys.path.append('../framework')
from NetworkClass import Network
import torch.nn.functional as F
from torch import nn

In [55]:
model = Network({
        "network":{
            'input_layer': {
                "units": 784,
                
                },
            'hidden_layer': [{
                    "units": 400, 
                    "type": "Linear"
                }, 
                {
                    "units": 50, 
                    "activation": "relu",
                    "type": "Linear"

                }],
            'output_layer': {
                "units": 10,
                "activation": "softmax",
                "type": "Linear"
                }
        }
    })

softmax


In [56]:
for (layer, param) in enumerate(model.parameters()):
    print("Layer {} , Parameters: {}".format(layer, param.shape))

Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [57]:
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision
import torch
import torch.nn as nn


In [58]:
batch_size_train = 64
batch_size_test = 1000
n_epochs = 3
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1

In [59]:
class ReshapeTransform:
    def __init__(self, new_size):
        self.new_size = new_size

    def __call__(self, img):
        return torch.reshape(img, self.new_size)

In [60]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_train, shuffle=True)


test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)), ReshapeTransform((-1,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [61]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [62]:
example_data.shape

torch.Size([1000, 784])

In [63]:
# import matplotlib.pyplot as plt

# fig = plt.figure()
# for i in range(6):
#   plt.subplot(2,3,i+1)
#   plt.tight_layout()
#   plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
#   plt.title("Ground Truth: {}".format(example_targets[i]))
#   plt.xticks([])
#   plt.yticks([])

In [64]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,
                      momentum=momentum)

In [65]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [66]:
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [67]:
def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = model(data)
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [68]:
test()

# Pruning every 3 epochs
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
    



Test set: Avg. loss: 0.0023, Accuracy: 851/10000 (9%)


Test set: Avg. loss: 0.0018, Accuracy: 7413/10000 (74%)




Test set: Avg. loss: 0.0016, Accuracy: 8990/10000 (90%)


Test set: Avg. loss: 0.0016, Accuracy: 9124/10000 (91%)



In [69]:
for (layer, param) in enumerate(model.parameters()):
    print("Layer {} , Parameters: {}".format(layer, param.shape))

Layer 0 , Parameters: torch.Size([400, 784])
Layer 1 , Parameters: torch.Size([400])
Layer 2 , Parameters: torch.Size([50, 400])
Layer 3 , Parameters: torch.Size([50])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [70]:
neurons_to_prune = []
for p in model.parameters():
        if len(p.data.size()) != 1:
            normed_weights = p.data.abs()
            l1_norm_layer = []
            for neuron_idx in range(normed_weights.shape[0]):
                l1_norm_layer.append(torch.sum(normed_weights[neuron_idx, :]).item())
            neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))
            neurons_to_prune.append(torch.argmin(torch.FloatTensor(l1_norm_layer)))
                

In [71]:
neurons_to_prune = neurons_to_prune[:-2]

In [72]:
list(model.parameters())[0]

Parameter containing:
tensor([[-0.0353,  0.0011,  0.0056,  ..., -0.0005,  0.0297, -0.0037],
        [ 0.0192,  0.0163,  0.0266,  ..., -0.0112, -0.0005,  0.0282],
        [ 0.0209, -0.0029, -0.0197,  ..., -0.0268, -0.0195,  0.0038],
        ...,
        [-0.0351, -0.0332, -0.0285,  ...,  0.0203,  0.0110,  0.0114],
        [ 0.0190, -0.0052,  0.0093,  ...,  0.0325,  0.0255,  0.0067],
        [ 0.0065,  0.0127, -0.0162,  ..., -0.0003, -0.0091, -0.0072]],
       requires_grad=True)

In [73]:
param_list = list(model.parameters())
for i, neuron_idx in enumerate(neurons_to_prune):
    idx_weights = param_list[i]
    y = torch.cat((idx_weights[0:neuron_idx], idx_weights[neuron_idx+1:]))
    if i > 1 and len(idx_weights.shape) > 1:
        y = torch.cat((idx_weights[:, 0:neuron_idx-1], idx_weights[:, neuron_idx:]), axis=1)
    print(y.shape)

    idx_weights.data = y

torch.Size([399, 784])
torch.Size([399])
torch.Size([50, 42])
torch.Size([50, 399])
torch.Size([49])


In [74]:
for (layer, param) in enumerate(model.parameters()):
    print("Layer {} , Parameters: {}".format(layer, param.shape))

Layer 0 , Parameters: torch.Size([399, 784])
Layer 1 , Parameters: torch.Size([399])
Layer 2 , Parameters: torch.Size([50, 399])
Layer 3 , Parameters: torch.Size([49])
Layer 4 , Parameters: torch.Size([10, 50])
Layer 5 , Parameters: torch.Size([10])


In [60]:
model.state_dict()['input_layer.weight']

tensor([[-0.0241, -0.0201,  0.0112,  ...,  0.0152, -0.0048, -0.0302],
        [ 0.0199,  0.0312,  0.0181,  ..., -0.0262, -0.0269, -0.0204],
        [ 0.0144, -0.0292,  0.0177,  ..., -0.0168, -0.0165, -0.0345],
        ...,
        [-0.0134,  0.0108, -0.0238,  ..., -0.0300,  0.0301, -0.0255],
        [ 0.0274, -0.0345, -0.0261,  ..., -0.0155, -0.0024, -0.0271],
        [ 0.0157, -0.0022, -0.0255,  ...,  0.0204,  0.0325, -0.0283]])

In [59]:
updated_model =  Network({
        "network":{
            'input_layer': {
                "units": 784,
                
                },
            'hidden_layer': [{
                    "units": 399, 
                    "type": "Linear"
                }, 
                {
                    "units": 49, 
                    "activation": "relu",
                    "type": "Linear"

                }],
            'output_layer': {
                "units": 10,
                "activation": "softmax",
                "type": "Linear"
                }
        }
    })

softmax


In [62]:
updated_model.load_state_dict(model.state_dict())

RuntimeError: Error(s) in loading state_dict for Network:
	size mismatch for hidden_layers.0.weight: copying a param with shape torch.Size([49, 400]) from checkpoint, the shape in current model is torch.Size([49, 399]).
	size mismatch for output_layer.weight: copying a param with shape torch.Size([10, 50]) from checkpoint, the shape in current model is torch.Size([10, 49]).