# MNIST_classifier_with_linear_network_

#### 1. Build up a feed-forward neural network with pytorch for MNIST dataset.
(validation accuracy should be greater than 97%)

ReLU, CrossEntropyLoss

Learning rate = 1e-3

Adam

the number of network layers = 3

Val accuracy = 98%

In [0]:
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
!pip3 install torchvision

In [0]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets
import torchvision.transforms as transforms
import sklearn
import torch.utils
import torch.optim
import torchvision

In [0]:
data_transform = torchvision.transforms.Compose([#transforms.Grayscale(), 
                                     torchvision.transforms.ToTensor(),
                                     torchvision.transforms.Lambda(lambda im: im.reshape(-1))
                                    ])

mnist_train = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=data_transform)
mnist_test  = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=data_transform)

batch_size=4
data_loader_train = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
data_loader_test = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=True)
data_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

In [0]:
def myNetwork(in_features, n_targget):
    linear1 = torch.nn.Linear(in_features, 256, bias=True)
    linear2 = torch.nn.Linear(256, 256, bias=True)
    linear3 = torch.nn.Linear(256, n_target, bias=True)
    relu = torch.nn.ReLU()

    net = nn.Sequential(linear1, relu, linear2, relu, linear3)

    return net

In [0]:
def performance(model, loss_fn, dataloader):
    model.eval()
    with torch.no_grad():
        loss, acc, n = 0., 0., 0.
        for x, y in dataloader:
            ypred = model(x)
            loss += loss_fn(ypred, y).item() * len(y)
            p = torch.argmax(ypred, dim=1)
            acc += (p == y).sum().item()
            n += len(y)
        #
    loss /= n
    acc /= n
    return loss, acc
  
def training_loop(n_epochs, optim, model, loss_fn, dl_train, dl_val, hist=None):
    if hist is not None:
        pass
    else:
        hist = {'tloss': [], 'tacc': [], 'vloss': [], 'vacc': []}
        
    for epoch in range(1, n_epochs+1):
        tr_loss, tr_acc = 0., 0.
        n_data = 0
        for im_batch, label_batch in dl_train: # minibatch
            ypred = model(im_batch)
            loss_train = loss_fn(ypred, label_batch)
        
            optim.zero_grad()
            loss_train.backward()
            optim.step()

            
            # accumulate correct prediction
            tr_acc  += (torch.argmax(ypred.detach(), dim=1) == label_batch).sum().item() # number of correct predictions
            tr_loss += loss_train.item() * im_batch.shape[0]
            n_data  += im_batch.shape[0]
        #
        # statistics
        tr_loss /= n_data
        tr_acc  /= n_data
        #
        val_loss, val_acc = performance(model, loss_fn, dl_val)
        
        if epoch <= 5 or epoch % 1000 == 0 or epoch == n_epochs:
             print(f'Epoch {epoch}, tloss {tr_loss:.2f} t_acc: {tr_acc:.2f}  vloss {val_loss:.2f}  v_acc: {val_acc:.2f}')
            
        # record for history return
        hist['tloss'].append(tr_loss)
        hist['vloss'].append(val_loss) 
        hist['tacc'].append(tr_acc)
        hist['vacc'].append(val_acc)
        
    print ('finished training_loop().')
    return hist

In [0]:
in_features = 28*28
n_target = 10

net = myNetwork(in_features, n_target)
net, next(net.parameters())

(Sequential(
   (0): Linear(in_features=784, out_features=256, bias=True)
   (1): ReLU()
   (2): Linear(in_features=256, out_features=256, bias=True)
   (3): ReLU()
   (4): Linear(in_features=256, out_features=10, bias=True)
 ), Parameter containing:
 tensor([[-0.0077, -0.0213,  0.0346,  ...,  0.0118,  0.0356, -0.0304],
         [ 0.0237, -0.0006, -0.0015,  ..., -0.0290, -0.0213, -0.0260],
         [ 0.0042,  0.0187, -0.0089,  ...,  0.0215, -0.0115,  0.0296],
         ...,
         [ 0.0346,  0.0221,  0.0210,  ...,  0.0038, -0.0202,  0.0052],
         [-0.0093,  0.0320, -0.0206,  ..., -0.0070, -0.0265, -0.0071],
         [-0.0052, -0.0275, -0.0073,  ...,  0.0036,  0.0325, -0.0008]],
        requires_grad=True))

In [0]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
batch_size=len(data_loader)

model = myNetwork(in_features, n_target)

In [0]:
history = training_loop(n_epochs=10,
                        optim=optimizer,
                        model=net,
                        loss_fn=loss_fn, 
                        dl_train=data_loader_train,
                        dl_val = data_loader_test
                        )

Epoch 1, tloss 0.21 t_acc: 0.94  vloss 0.12  v_acc: 0.96
Epoch 2, tloss 0.11 t_acc: 0.97  vloss 0.09  v_acc: 0.97
Epoch 3, tloss 0.08 t_acc: 0.98  vloss 0.15  v_acc: 0.96
Epoch 4, tloss 0.07 t_acc: 0.98  vloss 0.14  v_acc: 0.97
Epoch 5, tloss 0.07 t_acc: 0.98  vloss 0.14  v_acc: 0.97
Epoch 10, tloss 0.05 t_acc: 0.99  vloss 0.19  v_acc: 0.98
finished training_loop().
