<a href="https://colab.research.google.com/github/jwoonge/ML-projects/blob/master/11/assignment_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convolutional Neural Network for the classification task on MNIST
20141261 송제웅
<hr>
  
## 0. Import library
<hr>

In [2]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import math

## 1. Download and Normalize dataset (MNIST)
---


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)),
])

data_path = './MNIST'
data_train = datasets.MNIST(root = data_path, train = False, download = True, transform = transform)
data_test = datasets.MNIST(root = data_path, train = True, download = True, transform = transform)

## 2. Design Model
---

In [45]:
class CNN_module(nn.Module):
    def __init__(self, num_class=10, size_kernel=5):
        super(CNN_module, self).__init__()
        self.num_class = num_class
        self.size_kernel = size_kernel

        #### Feature Layer ####
        self.conv1 = nn.Conv2d(1, 20, kernel_size=self.size_kernel, stride=1, padding=int((size_kernel-1)/2), bias=True)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=self.size_kernel, stride=1, padding=int((size_kernel-1)/2), bias=True)

        self.conv_layer1 = nn.Sequential(self.conv1, nn.MaxPool2d(kernel_size=2), nn.ReLU(True))
        self.conv_layer2 = nn.Sequential(self.conv2, nn.MaxPool2d(kernel_size=2), nn.ReLU(True))

        self.feature = nn.Sequential(self.conv_layer1, self.conv_layer2)
    
        #### Classifier Layer ####
        self.fc1        = nn.Linear(50*7*7, 50, bias=True)
        self.fc2        = nn.Linear(50, num_class, bias=True)

        self.fc_layer1  = nn.Sequential(self.fc1, nn.ReLU(True))
        self.fc_layer2  = nn.Sequential(self.fc2, nn.Sigmoid())

        self.classifier = nn.Sequential(self.fc_layer1, self.fc_layer2)
        self._initialize_weight() 

    def _initialize_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight, gain=math.sqrt(2))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, gain=math.sqrt(2))
                if m.bias is not None:
                    m.bias.data.zero_()
        
    def forward(self, x):
        x = self.feature(x)
        x = x.view(-1, 50*7*7)
        x = self.classifier(x)
        return x


## 3. Define functions for Learning
---

In [49]:
def train(model, data_train, data_train_batch, optimizer, criterion, device='cuda'):
    model.train()
    n_batch = 0
    avg_loss = 0
    avg_acc = 0
    for batch_idx, (x, y) in enumerate(data_train_batch):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model.forward(x)
        loss = criterion(pred, y)
        avg_loss += loss.item()
        avg_acc += accuracy(pred, y)
        n_batch += 1

        loss.backward()
        optimizer.step()
    avg_loss /= n_batch
    avg_acc /= n_batch
    return avg_loss, avg_acc

def test(model, x, y, criterion):
    model.eval()
    with torch.no_grad():
        pred = model.forward(x)
        loss = criterion(pred, y).item()
        acc = accuracy(pred, y)
    return loss, acc

def accuracy(pred, y):
    return 100*(torch.sum(torch.argmax(pred, dim=1)==y)).item()/len(y)

In [20]:
def learn(model, data_train, data_test, criterion, optimizer, batch_size, epoch, device='cuda'):
    data_train_batch = torch.utils.data.DataLoader(data_train, batch_size, shuffle=True)
    data_train = data_train.train_data.unsqueeze(1)
    test_x = data_test.test_data.unsqueeze(1)
    test_y = data_test.test_labels
    test_x, test_y = torch.tensor(test_x, dtype=torch.float, device=device), test_y.to(device)
    i = 0
    while i<epoch:
        losstest, acctest = test(model, test_x, test_y, criterion)
        losstrain, acctrain = train(model, data_train, data_train_batch, optimizer, criterion, device)
        
        loss_train.append(losstrain); loss_test.append(losstest)
        acc_train.append(acctrain); acc_test.append(acctest)
        i += 1
        print(len(loss_train),'\t', losstrain,'\t', acctrain,'\t', losstest,'\t', acctest)

In [7]:
is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')

## 4. Learn
---

In [61]:
global loss_train, loss_test, acc_train, acc_test
loss_train = []; loss_test = []; acc_train = []; acc_test = []
model = CNN_module().to(device)
criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss2d()
while True:
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, weight_decay=0.0015)
    learn(model, data_train, data_test, criterion, optimizer, 32, 1, device)
    if acc_test[-1] > 95:
        break

  import sys


1 	 1.6348427469357134 	 83.37659744408946 	 2.4106104373931885 	 9.228333333333333
2 	 1.5131438353572029 	 95.74680511182109 	 1.5647214651107788 	 74.11833333333334
3 	 1.4965402996197295 	 96.96485623003196 	 1.5142992734909058 	 93.44666666666667
4 	 1.4889058221262483 	 97.81349840255591 	 1.5016241073608398 	 93.40166666666667
5 	 1.4837122744264695 	 98.10303514376997 	 1.4959627389907837 	 94.44333333333333
6 	 1.4807250789179207 	 98.3626198083067 	 1.493674635887146 	 94.99
7 	 1.4786829502818684 	 98.59225239616613 	 1.491932988166809 	 95.01333333333334


In [None]:
while True:
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0015)
    learn(model, data_train, data_test, criterion, optimizer, 64, 1, device)
    if acc_test[-1] > 97:
        break

In [None]:
while True:
    optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01, weight_decay=0.0015)
    learn(model, data_train, data_test, criterion, optimizer, 32, 1, device)
    if loss_test[-1] > loss_test[-2] > loss_test[-3] > loss_test[-4] > loss_test[-5]:
        for i in range(4):
            del loss_test[-1]; del loss_train[-1]; del acc_test[-1]; del acc_train[-1]
        break

In [65]:
print(acc_test[-1])

96.88
