# COMP 4331 Tutorial 8 Neural Networks

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [2]:
def set_random_seed(seed):
    seed_id = seed
    torch.manual_seed(seed_id)
    np.random.seed(seed_id)

set_random_seed(1)

def count_acc(logits, label):
    """
    :param logits: n * c; n=num of samples, c=number of classes
    :param label: n
    :return: accuracy
    """
    pred = torch.argmax(logits, dim=1)
    return (pred == label).float().mean().item()

## Loading MNIST dataset (train + test)
* note: downloading data costs 5~10 second.

In [3]:
# Prepare dataset
mnist_training_set = torchvision.datasets.MNIST("data/", train=True, 
                                       transform=torchvision.transforms.ToTensor(), download=True)
mnist_test_set = torchvision.datasets.MNIST("data/", train=False, 
                                      transform=torchvision.transforms.ToTensor(), download=True)
mnist_train_loader = torch.utils.data.DataLoader(mnist_training_set, batch_size=32)
mnist_test_loader = torch.utils.data.DataLoader(mnist_test_set, batch_size=1024)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST\raw\train-images-idx3-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST\raw\train-labels-idx1-ubyte.gz


102.8%


Extracting data/MNIST\raw\train-labels-idx1-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST\raw\t10k-images-idx3-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST\raw\t10k-labels-idx1-ubyte.gz


112.7%

Extracting data/MNIST\raw\t10k-labels-idx1-ubyte.gz to data/MNIST\raw






## train a model on the training set

In [4]:
def train_a_model(model, num_epoch=3):
    # define the optimizer
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    
    for i_epoch in range(num_epoch):
        model.train(True)
        loss_list = []
        acc_list = []
        for i_iter, (x, y_true) in enumerate(mnist_train_loader, start=1):
            
            # make prediction
            y_pred = model.forward(x)
            
            # compute the loss
            loss = F.cross_entropy(y_pred, y_true)
            
            # set gradient = 0 before applying back-prop
            optimizer.zero_grad()
            
            # apply back-prop, compute the gradient
            loss.backward() 
            
            # update the network weights
            optimizer.step()

            # print msg during training
            acc = count_acc(y_pred, y_true)
            loss_list.append(loss.item())
            acc_list.append(acc)
            if i_iter % 200 == 0:
                loss_avg = np.sum(loss_list) / len(loss_list)
                acc_avg = np.sum(acc_list) / len(acc_list)
                msg = "TRAIN epoch: {}/{} iter: {}/{} \t loss: {:.4f}, acc: {:.2f}%".format(
                    i_epoch, num_epoch, i_iter, len(mnist_train_loader), loss_avg, acc_avg*100)
                print(msg)
                loss_list = []
                acc_list = []

## evaluate a mdoel on testing set

In [7]:
def evaluate_a_model(model):
    model.train(False)
    loss_list = []
    acc_list = []
    with torch.no_grad():
        for i_iter, (x, y_true) in enumerate(mnist_test_loader):
            # make prediction
            y_pred = model.forward(x)
            
            # compute the loss
            loss = F.cross_entropy(y_pred, y_true)
            
            # compute the acc
            acc = count_acc(y_pred, y_true)
            
            loss_list.append(loss.item())
            acc_list.append(acc)

    loss_avg = np.sum(loss_list) / len(loss_list)
    acc_avg = np.sum(acc_list) / len(acc_list)
    msg = "TEST loss : {:.4f}, acc: {:.2f}%".format(loss_avg, acc_avg*100)
    print(msg)

## Use MLP
![MLP](https://guillaumebrg.files.wordpress.com/2016/01/mnist_and_mlp1.png)

In [8]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        input_dim = 1*28*28 # ie, 784
        hidden_dim = 300
        output_dim = 10
        self.fc1 = nn.Linear(in_features=input_dim, out_features=hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(in_features=hidden_dim, out_features=output_dim)
        
    def forward(self, x):
        """
        x: batch x 1 x 28 x 28
        return prediction
        """
        x = torch.flatten(x, start_dim=1) # batch x 784
        x = self.relu(self.fc1(x)) # batch x 300
        y = self.fc2(x) # batch x 10
        
        return y
    
mlp_model = MLP()
print(mlp_model)

MLP(
  (fc1): Linear(in_features=784, out_features=300, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=300, out_features=10, bias=True)
)


In [9]:
train_a_model(mlp_model, num_epoch=3)


TRAIN epoch: 0/3 iter: 200/1875 	 loss: 0.9336, acc: 76.62%
TRAIN epoch: 0/3 iter: 400/1875 	 loss: 0.4157, acc: 87.95%
TRAIN epoch: 0/3 iter: 600/1875 	 loss: 0.3649, acc: 89.22%
TRAIN epoch: 0/3 iter: 800/1875 	 loss: 0.3032, acc: 91.25%
TRAIN epoch: 0/3 iter: 1000/1875 	 loss: 0.3023, acc: 91.33%
TRAIN epoch: 0/3 iter: 1200/1875 	 loss: 0.2534, acc: 92.47%
TRAIN epoch: 0/3 iter: 1400/1875 	 loss: 0.2457, acc: 92.89%
TRAIN epoch: 0/3 iter: 1600/1875 	 loss: 0.2469, acc: 92.81%
TRAIN epoch: 0/3 iter: 1800/1875 	 loss: 0.1957, acc: 94.20%
TRAIN epoch: 1/3 iter: 200/1875 	 loss: 0.1687, acc: 95.11%
TRAIN epoch: 1/3 iter: 400/1875 	 loss: 0.1898, acc: 94.31%
TRAIN epoch: 1/3 iter: 600/1875 	 loss: 0.1666, acc: 95.19%
TRAIN epoch: 1/3 iter: 800/1875 	 loss: 0.1549, acc: 95.58%
TRAIN epoch: 1/3 iter: 1000/1875 	 loss: 0.1609, acc: 95.06%
TRAIN epoch: 1/3 iter: 1200/1875 	 loss: 0.1460, acc: 95.72%
TRAIN epoch: 1/3 iter: 1400/1875 	 loss: 0.1434, acc: 95.78%
TRAIN epoch: 1/3 iter: 1600/1875

In [10]:
evaluate_a_model(mlp_model)

TEST loss : 0.1076, acc: 96.64%


## Use CNN
![CNN](https://www.linkpicture.com/q/cnn_revised.png)

In [11]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, padding=0) 
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(in_features=16*12*12, out_features=10)
        
        
    def forward(self, x):
        """
        x: batch x 1 x 28 x 28
        """
        x = F.relu(self.conv(x)) # C1: batch x 16 x 24 x 24
        x = self.max_pool(x) # P2: batch x 16 x 12 x 12, 
        x = torch.flatten(x, start_dim=1) # flattened P2: batch x 2304, note that 2304 = 16 x 12 x 12
        y = self.fc(x) # F3: batch x 10
        
        return y
cnn_model = CNN()
print(cnn_model)

CNN(
  (conv): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=2304, out_features=10, bias=True)
)


In [12]:
train_a_model(cnn_model, num_epoch=3)

TRAIN epoch: 0/3 iter: 200/1875 	 loss: 0.5914, acc: 82.62%
TRAIN epoch: 0/3 iter: 400/1875 	 loss: 0.3268, acc: 90.20%
TRAIN epoch: 0/3 iter: 600/1875 	 loss: 0.2192, acc: 93.81%
TRAIN epoch: 0/3 iter: 800/1875 	 loss: 0.1561, acc: 95.42%
TRAIN epoch: 0/3 iter: 1000/1875 	 loss: 0.1475, acc: 95.75%
TRAIN epoch: 0/3 iter: 1200/1875 	 loss: 0.1253, acc: 96.53%
TRAIN epoch: 0/3 iter: 1400/1875 	 loss: 0.1057, acc: 96.89%
TRAIN epoch: 0/3 iter: 1600/1875 	 loss: 0.1200, acc: 96.45%
TRAIN epoch: 0/3 iter: 1800/1875 	 loss: 0.0880, acc: 97.45%
TRAIN epoch: 1/3 iter: 200/1875 	 loss: 0.0831, acc: 97.59%
TRAIN epoch: 1/3 iter: 400/1875 	 loss: 0.0849, acc: 97.70%
TRAIN epoch: 1/3 iter: 600/1875 	 loss: 0.0747, acc: 97.66%
TRAIN epoch: 1/3 iter: 800/1875 	 loss: 0.0645, acc: 98.20%
TRAIN epoch: 1/3 iter: 1000/1875 	 loss: 0.0770, acc: 97.81%
TRAIN epoch: 1/3 iter: 1200/1875 	 loss: 0.0715, acc: 97.78%
TRAIN epoch: 1/3 iter: 1400/1875 	 loss: 0.0654, acc: 98.14%
TRAIN epoch: 1/3 iter: 1600/1875

In [13]:
evaluate_a_model(cnn_model)

TEST loss : 0.0536, acc: 98.08%
