# Introduction to Feed Forward Neural Networks with Pytorch

In [2]:
import torch
import torch.nn as nn # Module for neural networks
from torch.autograd import Variable # Autograd (save history of operations)

import torchvision.datasets as dset # Load datasets
import torchvision.transforms as transforms # Preprocessing

import torch.nn.functional as F 
import torch.optim as optim
from torchsummary import summary

In [3]:
import os
root = './data'
if not os.path.exists(root):
    os.mkdir(root)

## Load datasets

In [4]:
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])

In [5]:
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans,download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [7]:
batch_size = 100 # Batch learning

In [8]:
train_loader = torch.utils.data.DataLoader(
                dataset=train_set,
                batch_size=batch_size,
                shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=True)
print('Total training batch number: {}'.format(len(train_loader)))
print('Total testing batch number: {}'.format(len(test_loader)))

Total training batch number: 600
Total testing batch number: 100


## Define Architecture

In [9]:
class MLPNet(nn.Module):
    def __init__(self):
        super(MLPNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 500, bias=True) # AX + b
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 10)
    def forward(self, x):
        x = x.view(-1, 28*28)
        y = F.relu(self.fc1(x)) # Max(0, x)
        z = F.relu(self.fc2(y))
        u = self.fc3(z)
        return u

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=20,
                               kernel_size=5,
                               padding=1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    def name(self):
        return "LeNet"

In [10]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = LeNet()
#model = model.cuda()
print(summary(model, (1, 28, 28)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 20, 26, 26]             520
            Conv2d-2             [-1, 50, 9, 9]          25,050
            Linear-3                  [-1, 500]         400,500
            Linear-4                   [-1, 10]           5,010
Total params: 431,080
Trainable params: 431,080
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.14
Params size (MB): 1.64
Estimated Total Size (MB): 1.79
----------------------------------------------------------------
None


In [11]:
print(model)

LeNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


## Define Optimizer and loss function

In [13]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

## Training and Validation

In [14]:
for epoch in range(10):
    # Training
    ave_loss = 0
    for batch_idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        x, target = data
        if torch.cuda.is_available():
            x, target = x.cuda(), target.cuda()
        x, target = Variable(x), Variable(target)
        out = model(x)
        # print(out)
        loss = criterion(out, target)
        # print('loss, ', loss)
        
        ave_loss = ave_loss * 0.9 + loss.item() * 0.1
        
        loss.backward()
        optimizer.step()
        
        if (batch_idx+1)%100 == 0 or (batch_idx+1) == len(train_loader):
            print('epoch: {}, batch index: {}, train loss: {:.6f}'.format(
            epoch, batch_idx+1, ave_loss))
            
    # testing 
    with torch.no_grad():

        correct_cnt, ave_loss = 0, 0
        total_cnt = 0
        for batch_idx, (x, target) in enumerate(test_loader):
            if torch.cuda.is_available():
                x, target = x.cuda(), target.cuda()
            x, target = Variable(x), Variable(target)
            out = model(x)
            loss = criterion(out, target)
            _, pred_label = torch.max(out.data, 1)
            total_cnt += x.data.size()[0]
            correct_cnt += (pred_label == target.data).sum()
            # smooth average
            ave_loss = ave_loss * 0.9 + loss.item()* 0.1

            if (batch_idx+1) % 100 == 0 or (batch_idx+1) == len(test_loader):
                print('epoch: {}, batch index: {}, test loss:{:.6f}, acc: {:.3f}'.format(
                    epoch, batch_idx+1, ave_loss, correct_cnt.float() * 1.0/total_cnt))

epoch: 0, batch index: 100, train loss: 0.775963
epoch: 0, batch index: 200, train loss: 0.276948
epoch: 0, batch index: 300, train loss: 0.154273
epoch: 0, batch index: 400, train loss: 0.131971
epoch: 0, batch index: 500, train loss: 0.097019
epoch: 0, batch index: 600, train loss: 0.076483
epoch: 0, batch index: 100, test loss:0.065151, acc: 0.977
epoch: 1, batch index: 100, train loss: 0.076308
epoch: 1, batch index: 200, train loss: 0.077981
epoch: 1, batch index: 300, train loss: 0.089371
epoch: 1, batch index: 400, train loss: 0.067467
epoch: 1, batch index: 500, train loss: 0.059038
epoch: 1, batch index: 600, train loss: 0.073413
epoch: 1, batch index: 100, test loss:0.050418, acc: 0.985
epoch: 2, batch index: 100, train loss: 0.061608
epoch: 2, batch index: 200, train loss: 0.057691
epoch: 2, batch index: 300, train loss: 0.063467
epoch: 2, batch index: 400, train loss: 0.055256
epoch: 2, batch index: 500, train loss: 0.042882
epoch: 2, batch index: 600, train loss: 0.048909
