# Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
device = torch.device("cuda")


# Develop a code in Matlab (or Python) to design a neural network to perform 10 digit classification.  
## Make 3 different networks

In [2]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3= nn.Linear(64, 10)

    def forward(self, x):
        x = x.float()
        h1 = F.relu(self.fc1(x.view(-1, 784)))
        h2 = F.relu(self.fc2(h1))
        h3 = self.fc3(h2)
        return F.log_softmax(h3, dim=1)

In [18]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.float()
        h1 = F.relu(self.fc1(x.view(-1, 784)))
        h2 = F.relu(self.fc2(h1))
        h3 = F.relu(self.fc3(h2))
        h4 = self.fc4(h3)
        return F.log_softmax(h4, dim=1)

In [15]:
class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.float()
        h1 = F.relu(self.fc1(x.view(-1, 784)))
        h2 = F.relu(self.fc2(h1))
        h3 = F.relu(self.fc3(h2))
        h4 = F.relu(self.fc4(h3))
        h5 = F.relu(self.fc5(h4))
        h6 = self.fc6(h5)
        return F.log_softmax(h6, dim=1)

# Set hyperparameters

In [5]:
batch_size = 64
test_batch_size = 1000
epochs = 10
lr = 0.01
momentum = 0.5
no_cuda= True
seed = 1
log_interval = 200

# Load MNIST dataset

In [6]:
transform = transforms.Compose([
                 transforms.ToTensor(),
                 transforms.Normalize((0.1307,), (0.3081,))])

train_loader = torch.utils.data.DataLoader(
  datasets.MNIST('../data', train=True, download=True, 
                 transform=transform), 
    batch_size = batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, download=True,
                 transform=transform), 
    batch_size=test_batch_size, shuffle=True)

# Make model for each network made before

In [20]:

model1 = Net1().to(device)
model2 = Net2().to(device)
model3 = Net3().to(device)

optimizer1 = optim.SGD(model1.parameters(), lr=lr, momentum=momentum)
optimizer2 = optim.SGD(model2.parameters(), lr=lr, momentum=momentum)
optimizer3 = optim.SGD(model3.parameters(), lr=lr, momentum=momentum)


# Make function for train and test

In [8]:
def train(log_interval, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(log_interval, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() 
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format
          (test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Compare three different model

In [9]:
for epoch in range(1, 11):
    train(log_interval, model1, device, train_loader, optimizer1, epoch)
    test(log_interval, model1, device, test_loader)



Test set: Average loss: 0.2600, Accuracy: 9247/10000 (92%)


Test set: Average loss: 0.1907, Accuracy: 9435/10000 (94%)


Test set: Average loss: 0.1545, Accuracy: 9546/10000 (95%)


Test set: Average loss: 0.1363, Accuracy: 9590/10000 (96%)


Test set: Average loss: 0.1096, Accuracy: 9660/10000 (97%)


Test set: Average loss: 0.1001, Accuracy: 9698/10000 (97%)


Test set: Average loss: 0.0915, Accuracy: 9716/10000 (97%)


Test set: Average loss: 0.0883, Accuracy: 9733/10000 (97%)


Test set: Average loss: 0.0794, Accuracy: 9755/10000 (98%)


Test set: Average loss: 0.0758, Accuracy: 9766/10000 (98%)



In [21]:
for epoch in range(1, 11):
    train(log_interval, model2, device, train_loader, optimizer2, epoch)
    test(log_interval, model2, device, test_loader)



Test set: Average loss: 0.2809, Accuracy: 9168/10000 (92%)


Test set: Average loss: 0.1846, Accuracy: 9445/10000 (94%)


Test set: Average loss: 0.1376, Accuracy: 9576/10000 (96%)


Test set: Average loss: 0.1204, Accuracy: 9630/10000 (96%)


Test set: Average loss: 0.0972, Accuracy: 9706/10000 (97%)


Test set: Average loss: 0.0921, Accuracy: 9722/10000 (97%)


Test set: Average loss: 0.0831, Accuracy: 9739/10000 (97%)


Test set: Average loss: 0.0853, Accuracy: 9740/10000 (97%)


Test set: Average loss: 0.0822, Accuracy: 9758/10000 (98%)


Test set: Average loss: 0.0764, Accuracy: 9755/10000 (98%)



In [13]:
for epoch in range(1, 11):
    train(log_interval, model3, device, train_loader, optimizer3, epoch)
    test(log_interval, model3, device, test_loader)


Test set: Average loss: 0.5952, Accuracy: 8164/10000 (82%)


Test set: Average loss: 0.2294, Accuracy: 9307/10000 (93%)


Test set: Average loss: 0.1634, Accuracy: 9486/10000 (95%)


Test set: Average loss: 0.1195, Accuracy: 9657/10000 (97%)


Test set: Average loss: 0.1394, Accuracy: 9565/10000 (96%)


Test set: Average loss: 0.1024, Accuracy: 9705/10000 (97%)


Test set: Average loss: 0.0880, Accuracy: 9733/10000 (97%)


Test set: Average loss: 0.1113, Accuracy: 9671/10000 (97%)


Test set: Average loss: 0.0820, Accuracy: 9759/10000 (98%)


Test set: Average loss: 0.0874, Accuracy: 9743/10000 (97%)



# Discussion  
As I use quite big networks for MNIST dataset, you can see results among models are quite similar. But you might notice that smaller network converges faster than the others, since it has less parameters to be trained.