In this notebook, we would stash some of our nodes, and see the performance of the code.
The number of epochs taken are 3, and batch size = 64

In [1]:
# Improting all the libraries
import torch
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
import torch.nn as nn
from torch import optim
import cv2
import numpy as np
import torchvision
import copy

In [2]:
# Defining Hyper-parameters
epochs = 5
batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
train_set = torchvision.datasets.MNIST(root='data',train=True, transform=transform, download=True)
test_set = torchvision.datasets.MNIST(root='data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_set,shuffle=True,batch_size = batch_size)
test_loader = DataLoader(test_set,shuffle=False, batch_size=batch_size)

In [4]:
# Here we are creating a single hidden layer NN, with 150 nodes
class trial_2(nn.Module):

    def __init__(self, num_hidden_layer=150):
        super(trial_2,self).__init__()

        self.input_layer = nn.ModuleList([nn.Linear(in_features=784, out_features=1) for i in range(num_hidden_layer)])
        self.activation = nn.ReLU()
        self.output_layer = nn.Linear(in_features=num_hidden_layer, out_features=10)
    
    def forward(self,x):
        x_input = [self.input_layer[i](x) for i in range(num_hidden_layer)]
        x_hidden = torch.hstack(x_input)
        x_f = self.activation(x_hidden)
        outp = self.output_layer(x_f)
        return(outp)

In [5]:
# Number of hidden Layer
num_hidden_layer = 150
model = trial_2(num_hidden_layer=num_hidden_layer).to(device)
next(model.parameters()).is_cuda

True

In [6]:
criteration = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)

In [7]:
# Function to get accuracy on testing data
def accuracy():
    with torch.no_grad():
        n_correct = 0
        n_samples = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()
            return(n_correct/n_samples * 100)

def reset_grad():
    for params in model.parameters():
        params.require_grad = True

In [8]:
# Generating non duplicate random nodes to be stashed out of hidden layer per epoch 
import random
stashed = random.sample(range(num_hidden_layer), 15)
stashed

[91, 124, 90, 121, 34, 108, 10, 116, 3, 44, 74, 128, 61, 57, 1]

In [9]:
# Start training
n_total_steps = len(train_loader)
j=0
for epoch_iter in range(epochs):
    for i,(data, label) in enumerate(train_loader):
        x = data.reshape(-1,28*28).to(device)
        y = label.to(device)
        optimizer.zero_grad()
        pred = model(x)
        loss = criteration(pred,y)
        loss.backward()
        optimizer.step()
        if (i+1) % 500 == 0:
             print (f'Epoch [{epoch_iter+1}/{epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
             print("-------------------------")
        
        if (i+1) % 300 == 0:
            print("Accuracy is", accuracy())
            for params in model.input_layer[stashed[j]].parameters():
                params.requires_grad = False
            print("Node stashed", stashed[j])
            print("-------------------------")
            j+=1

Accuracy is 93.75
Node stashed 91
-------------------------
Epoch [1/5], Step[500/938], Loss: 0.2075
-------------------------
Accuracy is 95.3125
Node stashed 124
-------------------------
Accuracy is 96.875
Node stashed 90
-------------------------
Accuracy is 98.4375
Node stashed 121
-------------------------
Epoch [2/5], Step[500/938], Loss: 0.1102
-------------------------
Accuracy is 96.875
Node stashed 34
-------------------------
Accuracy is 98.4375
Node stashed 108
-------------------------
Accuracy is 98.4375
Node stashed 10
-------------------------
Epoch [3/5], Step[500/938], Loss: 0.0395
-------------------------
Accuracy is 100.0
Node stashed 116
-------------------------
Accuracy is 98.4375
Node stashed 3
-------------------------
Accuracy is 100.0
Node stashed 44
-------------------------
Epoch [4/5], Step[500/938], Loss: 0.0775
-------------------------
Accuracy is 98.4375
Node stashed 74
-------------------------
Accuracy is 100.0
Node stashed 128
--------------------

In [10]:
accuracy()

98.4375

In [11]:
# Assertion to see if our nodes have really been stashed even after the training is over
univ = [i for i in range(num_hidden_layer)]
not_stashed = [i for i in univ if i not in stashed]

for i in stashed:
    for j in model.input_layer[i].parameters():
        assert j.requires_grad == False

for i in not_stashed:
    for j in model.input_layer[i].parameters():
        assert j.requires_grad == True


In [12]:
# Creating computational graph to see, whether all the neurons are contributing or not
# batch = next(iter(train_loader))
# yhat = model(batch[0].reshape(-1,28*28).to(device)) # Give dummy batch to forward().

# from torchviz import make_dot

#make_dot(yhat, params=dict(list(model.named_parameters()))).render("software_stash_graph", format="png")