# SLP in Pytorch



## Import Necessary modules

In [13]:
import torch
import torchvision                                 # datasets and transformations modules
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn                              # neural network module
import torch.nn.functional as F
import torch.optim as optim                        # optimization module
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter  # logging module
from torchvision.utils import make_grid, save_image

## 1.Define the Network

In [14]:
# All networks derive from the base class nn.Module
class Perceptron(nn.Module):
    # get input and output dimensions as input
    def __init__(self, d, K, H=None):
        # all derived classes must call __init__ method of super class
        super(Perceptron, self).__init__()
        if H is None:
            # create a fully connected layer from input to output
            self.model = nn.Linear(d, K)
        else:
            self.model = nn.Sequential(
                nn.Linear(d,H),
                nn.Sigmoid(),
                nn.Linear(H,K)
            )
    
    # forward method should get the input and return the output
    def forward(self,x):
        batch_size = x.shape[0]
        # flatten the image from BxCxHXW to Bx784
        x = x.view(batch_size, -1)
        x = self.model(x)
        # softmax is internally done inside cross entropy loss
        return x
        

## 2.Define parameters and hyper-parameters

In [15]:
# torch parameters
SEED = 60            # reproducability
# NN Parameters
EPOCHS = 20          # number of epochs
LR = 0.01            # learning rate
MOMENTUM = 0.9       # momentum for the optimizer
WEIGHT_DECAY = 1e-5  # weight decay for the optimizer
GAMMA = 0.1          # learning rate schedular
BATCH_SIZE = 64      # number of images to load per iteration
d = 784              # number of input features 
K = 10               # number of output features
H = None             # if H is none SLP else MLP


# 3.Assure reproducability

In [16]:
# manual seed to reproduce same results
torch.manual_seed(SEED)


<torch._C.Generator at 0x7fb8fc031370>

# 4.Define datasets and  dataloaders

In [17]:
# DOWNLOADING AND LOADING MNIST DATASET 
mnist_folder= './data'

# download the dataset if not already downloaded and set necessery transforms
tr_dataset   = datasets.MNIST(mnist_folder, train=True, download=True, transform=transforms.ToTensor())
# prepare loader for the training dataset
train_loader = torch.utils.data.DataLoader(tr_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

# download the dataset if not already downloaded and set necessery transforms
test_dataset = datasets.MNIST(mnist_folder, train=False, download=True, transform=transforms.ToTensor())
# prepare loader for the test dataset
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)


## 5.Create a network instance and move it to the device you want to run computations on

In [18]:
# create the network
net = Perceptron(d,K,H)

# print network parameter names and their size
for name, param in net.named_parameters():
  print(name, param.size())

# check if CUDA is available
cuda = torch.cuda.is_available()  
device = torch.device("cuda:0" if cuda else "cpu")

# if cuda is available move network into gpu
net.to(device)


model.weight torch.Size([10, 784])
model.bias torch.Size([10])


Perceptron(
  (model): Linear(in_features=784, out_features=10, bias=True)
)

## 6.Specify the loss function and the optimizer

In [19]:
# specify the loss to be used
# softmax is internally computed.
loss_fn = nn.CrossEntropyLoss()
# specify the optimizer to update the weights during backward pass
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# change learning rate over time
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=GAMMA)


## 7. Define training function

In [20]:
def train_net():
  # put the network in training mode
  net.train()
  # keep record of the loss value
  epoch_loss = 0.0
  # use training data as batches
  for xt, rt in train_loader:
    # move training instances and corresponding labels into gpu if cuda is available
    xt, rt = xt.to(device), rt.to(device)
    # clear the previously accumulated gradients
    optimizer.zero_grad()
    # forward the network
    yt = net(xt)
    # calculate loss
    loss = loss_fn(yt, rt)
    # make a backward pass, calculate gradients
    loss.backward()
    # update weights
    optimizer.step()
    # accumulate loss
    epoch_loss += loss.item()
  return epoch_loss
  

## 8. Define test function

In [21]:
def eval_net(loader):
  # put the network in evaluation mode
  net.eval()
  # keep record of the loss value
  total_loss = 0.0
  # number of correctly classified instances
  correct = 0
  # disable gradient tracking
  with torch.no_grad():
    for xt, rt in loader:
      # move training instances and corresponding labels into gpu if cuda is available
      xt, rt = xt.to(device), rt.to(device)
      # forward the network
      yt = net(xt)
      # calculate loss
      loss = loss_fn(yt, rt)
      # accumulate loss
      total_loss += loss.item()
      # get predicted classes
      pred = yt.argmax(dim=1)
      # accumulate correctly classified image counts
      correct += (pred == rt).sum().item()
      #correct += pred.eq(rt.view_as(pred)).sum().item()
  return correct/len(loader.dataset), total_loss 
  

## 9.Train the network

In [22]:
# initialize the logger instance
# by default creates run directory inside current folder
writer = SummaryWriter()           
# train the network
for epoch in range(1,EPOCHS+1):
  # train network for one epoch
  train_net()
  # get accuracy and loss on the training dataset
  tr_ac, tr_loss = eval_net(train_loader)
  # get accuracy and loss on the test dataset
  tt_ac, tt_loss = eval_net(test_loader)
  # save training stats
  writer.add_scalar("Loss/train", tr_loss, epoch)
  writer.add_scalar("Accuracy/train", tr_ac, epoch)
  # save test stats
  writer.add_scalar("Loss/test", tt_loss, epoch)
  writer.add_scalar("Accuracy/test", tt_ac, epoch)
  # run only if SLP
  if H is None:
    weights = net.model.weight                  # 10x784
    weights = weights.view(10, 28, 28)          # 10x28x28
    weights = weights.unsqueeze(dim=1)          # 10x1x28x28
    mean_images = make_grid(weights, normalize=True)
    writer.add_image("Images/mean_images", mean_images, epoch)

writer.flush()
writer.close()


## 10. Save the model

In [23]:
# save the network model
if H is None:
    torch.save(net.state_dict(), 'perceptron_outputs/slp.pth')
else:
    torch.save(net.state_dict(), 'perceptron_outputs/mlp.pth')


## 11. Visualize results on tensorboard

In [24]:
%load_ext tensorboard
%tensorboard --logdir runs
# open http://localhost:6006/ to view the results

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 527), started 0:04:17 ago. (Use '!kill 527' to kill it.)