In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from torch.utils.data import DataLoader, Dataset 
from torchvision import datasets, transforms

In [2]:
batch_size = 32
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

train_dataset = datasets.MNIST(root='./mnist_data/', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./mnist_data/MNIST\raw\train-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw\train-labels-idx1-ubyte.gz


102.8%


Extracting ./mnist_data/MNIST\raw\train-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw\t10k-images-idx3-ubyte.gz


94.4%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [7]:
print(len(train_dataset))
print(train_dataset.classes)
print(train_dataset.data.shape)

60000
['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
torch.Size([60000, 28, 28])


## 퀴즈 (Easy)  
Multi Layered Perceptrion 을 통해 MNIST 데이터셋을 분류하려면  
1) 모델의 첫 번째 레이어의 입력크기  
2) 모델의 출력크기는 어떻게 되어야할까요?  

## 퀴즈 (Easy)  
간단한 MLP 모델을 구현해봅시다. 
1) 은닉층은 총 4개로 (784, 512, 256, 128) 개의 뉴런이 존재합니다. 
  
  
2) 활성화함수는 relu를 사용합니다.  

3) forward 함수에 x를 처음 입력받을 때 기존에 배운 flatten() 또는 reshape() 또는 view()를 활용해서 일차원 벡터로 변환하세요

In [26]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = nn.Linear(784, 512)
        self.l2 = nn.Linear(512, 256)
        self.l3 = nn.Linear(256, 128)
        self.l4 = nn.Linear(128, 10)
    
    def forward(self, x):
        # Flatten the data (n, 1, 28, 28)-> (n, 784)
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        y_pred = F.relu(self.l4(x))

        return y_pred
        

이제 train, test 함수를 작성해보겠습니다.  

In [54]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
model = Model().to(device)
ce_loss = nn.CrossEntropyLoss()
lr = 0.01
optimizer = optim.SGD(model.parameters(), lr=lr)

def train(epoch, model, loss_func, train_loader, optimizer):
    model.train()
    for batch_index, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_func(y_pred, y)
        loss.backward()
        optimizer.step()
        if batch_index % 100 == 0:
            print(f'Train Epoch: {epoch} | Batch Status: {batch_index*len(x)}/{len(train_loader.dataset)} \
            ({100. * batch_index * batch_size / len(train_loader.dataset):.0f}% | Loss: {loss.item():.6f}')

def test(model, loss_func, test_loader):
    model.eval()
    test_loss = 0
    correct_count = 0
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        #print(y)
        y_pred = model(x)
        #print(f"y_pred shape: {y_pred.shape}")
        #print(f"y_pred : {y_pred}")
        test_loss += loss_func(y_pred, y).item()
        pred = y_pred.data.max(1, keepdim=True)[1]
        # torch.eq : Computes element-wise equality. return counts value
         
        #print(f"pred : {pred}")
        #print(f"view_as : {y.data.view_as(pred)}")
        correct_count += pred.eq(y.data.view_as(pred)).cpu().sum()
    
    test_loss /= len(test_loader.dataset)
    print(f'=======================\n Test set: Average loss: {test_loss:.4f}, Accuracy: {correct_count}/{len(test_loader.dataset)}')
    

cpu


In [55]:
test(model, ce_loss, test_loader)


 Test set: Average loss: 0.0721, Accuracy: 1039/10000


In [41]:
for epoch in range(1,10):
    train(epoch, model, ce_loss, train_loader, optimizer)
    test(model, ce_loss, test_loader)


Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 2.298524
Train Epoch: 1 | Batch Status: 3200/60000             (0% | Loss: 2.305743
Train Epoch: 1 | Batch Status: 6400/60000             (0% | Loss: 2.300616
Train Epoch: 1 | Batch Status: 9600/60000             (0% | Loss: 2.299551
Train Epoch: 1 | Batch Status: 12800/60000             (1% | Loss: 2.295114
Train Epoch: 1 | Batch Status: 16000/60000             (1% | Loss: 2.264555
Train Epoch: 1 | Batch Status: 19200/60000             (1% | Loss: 2.265674
Train Epoch: 1 | Batch Status: 22400/60000             (1% | Loss: 2.262136
Train Epoch: 1 | Batch Status: 25600/60000             (1% | Loss: 2.181224
Train Epoch: 1 | Batch Status: 28800/60000             (2% | Loss: 2.192803
Train Epoch: 1 | Batch Status: 32000/60000             (2% | Loss: 2.137085
Train Epoch: 1 | Batch Status: 35200/60000             (2% | Loss: 2.137395
Train Epoch: 1 | Batch Status: 38400/60000             (2% | Loss: 1.955891
Train Epoch: 1 | Ba

Train Epoch: 6 | Batch Status: 28800/60000             (2% | Loss: 0.535518
Train Epoch: 6 | Batch Status: 32000/60000             (2% | Loss: 0.947575
Train Epoch: 6 | Batch Status: 35200/60000             (2% | Loss: 0.835803
Train Epoch: 6 | Batch Status: 38400/60000             (2% | Loss: 1.090175
Train Epoch: 6 | Batch Status: 41600/60000             (2% | Loss: 1.055956
Train Epoch: 6 | Batch Status: 44800/60000             (2% | Loss: 0.842598
Train Epoch: 6 | Batch Status: 48000/60000             (2% | Loss: 0.827892
Train Epoch: 6 | Batch Status: 51200/60000             (3% | Loss: 1.066991
Train Epoch: 6 | Batch Status: 54400/60000             (3% | Loss: 1.072013
Train Epoch: 6 | Batch Status: 57600/60000             (3% | Loss: 0.777363
 Test set: Average loss: 0.0261, Accuracy: 6701/10000
Train Epoch: 7 | Batch Status: 0/60000             (0% | Loss: 0.958400
Train Epoch: 7 | Batch Status: 3200/60000             (0% | Loss: 1.128547
Train Epoch: 7 | Batch Status: 6400/600

# MNIST Classification with CNN

앞서, MLP 모델을 통해 MNIST 데이터셋을 분류해보았지만 성능이 좋지 않았습니다.  
이를 개선하기 위해 CNN을 직접 구현해봅시다.  

In [78]:
class CNN(nn.Module):
    def __init__(self, C, W, H, K, S):
        super(CNN, self).__init__()
        # nn.Module에는 이미 Conv 레이어가 구현되어 있습니다.
        # 마찬가지로 배치정규화 레이어도 구현되어 있습니다.
        self.conv1 = nn.Conv2d(C, 16, kernel_size=K, stride=S)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=K, stride=S)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=K, stride=S)
        self.bn3 = nn.BatchNorm2d(64)
        
        def conv2d_size_out(size, kernel_size=K, stride=S):
            print((size - (kernel_size - 1) - 1) // stride + 1)
            return (size - (kernel_size - 1) - 1) // stride + 1
        
        convw = conv2d_size_out(W, K, S)
        convw = conv2d_size_out(convw, K, S)        
        convw = conv2d_size_out(convw, K, S)        
        
        self.linear_input_size = convw * convw * 64
        self.fc = nn.Linear(self.linear_input_size, 10)
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = x.view(x.size(0), -1) # (batch_size, flaaten_size)
        x = F.relu(self.fc(x))
        return F.log_softmax(x)

In [79]:
model = CNN(C=1, W=28, H=28, K=3, S=2) 
model = model.to(device)
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

13
6
2


In [86]:
def train_cnn(epoch, model, train_loader, optimizer, loss_func):
    model.train()
    for batch_index, samples in enumerate(train_loader):
        data, target = samples
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        pred = model(data)
        loss = loss_func(pred, target)
        loss.backward()
        optimizer.step()
        
        if batch_index % 100 == 0:
            print(f'Train Epoch: {epoch} | Batch Status: {batch_index*len(data)}/{len(train_loader.dataset)} \
            ({100.0 * batch_index * batch_size / len(train_loader.dataset):.0f}% | Loss: {loss.item():.6f}')

            
def test_cnn(model, test_loader, loss_func):
    model.eval()
    test_loss = 0
    correct_counts = 0
    for _, samples in enumerate(test_loader):
        data, target = samples
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = loss_func(output, target)
        test_loss += loss.item()
        pred = output.data.max(1, keepdim=True)[1]
        correct_counts += pred.eq(target.data.view_as(pred)).cpu().sum()
  
    test_loss /= len(test_loader.dataset)
    print(f'=======================\n Test set: Average loss: {test_loss:.4f}, Accuracy: {correct_counts}/{len(test_loader.dataset)}' \
          f'({100.*correct_counts/len(test_loader.dataset):.0f}%)')
        

In [87]:
for epoch in range(1,3):
    train_cnn(epoch, model, train_loader, optimizer, ce_loss)
    test_cnn(model, test_loader, ce_loss)



Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 0.360748
Train Epoch: 1 | Batch Status: 3200/60000             (5% | Loss: 0.435026
Train Epoch: 1 | Batch Status: 6400/60000             (11% | Loss: 0.470100
Train Epoch: 1 | Batch Status: 9600/60000             (16% | Loss: 0.381106
Train Epoch: 1 | Batch Status: 12800/60000             (21% | Loss: 0.525765
Train Epoch: 1 | Batch Status: 16000/60000             (27% | Loss: 0.433752
Train Epoch: 1 | Batch Status: 19200/60000             (32% | Loss: 0.363350
Train Epoch: 1 | Batch Status: 22400/60000             (37% | Loss: 0.803605
Train Epoch: 1 | Batch Status: 25600/60000             (43% | Loss: 0.537166
Train Epoch: 1 | Batch Status: 28800/60000             (48% | Loss: 0.478825
Train Epoch: 1 | Batch Status: 32000/60000             (53% | Loss: 0.438385
Train Epoch: 1 | Batch Status: 35200/60000             (59% | Loss: 0.656166
Train Epoch: 1 | Batch Status: 38400/60000             (64% | Loss: 0.216127
Train Ep

In [94]:
class ResNet(nn.Module):
    def __init__(self, in_channels, num_layers, block, num_classes=10):
        super(ResNet, self).__init__()
        self.num_layers = num_layers
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        # feature map size = 32x32x16
        self.layers_2n = self.get_layers(block, 16, 16, stride=1)
        # feature map size = 16x16x32
        self.layers_4n = self.get_layers(block, 16, 32, stride=2)
        # feature map size = 8x8x64
        self.layers_6n = self.get_layers(block, 32, 64, stride=2)

        # output layers
        # self.avg_pool = nn.AvgPool2d(8, stride=1)
        self.fc_out = nn.Linear(49 * 64, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def get_layers(self, block, in_channels, out_channels, stride):
        if stride == 2:
            down_sample = True
        else:
            down_sample = False

        layers_list = nn.ModuleList(
            [block(in_channels, out_channels, stride, down_sample)])

        for _ in range(self.num_layers - 1):
            layers_list.append(block(out_channels, out_channels))

        return nn.Sequential(*layers_list)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layers_2n(x)
        x = self.layers_4n(x)
        x = self.layers_6n(x)

        #x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_out(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, down_sample=False):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.stride = stride

        if down_sample:
            self.down_sample = IdentityPadding(in_channels, out_channels, stride)
        else:
            self.down_sample = None

    def forward(self, x):
        shortcut = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.down_sample is not None:
            shortcut = self.down_sample(x)

        out += shortcut
        out = self.relu(out)
        return out

class IdentityPadding(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(IdentityPadding, self).__init__()

        self.pooling = nn.MaxPool2d(1, stride=stride)
        self.add_channels = out_channels - in_channels

    def forward(self, x):
        out = F.pad(x, (0, 0, 0, 0, 0, self.add_channels))
        out = self.pooling(out)
        return out


def resnet_model():
    block = ResidualBlock
    model = ResNet(1, 5, block)
    return model

model = resnet_model()

In [95]:
for epoch in range(1,3):
    train_cnn(epoch, model, train_loader, optimizer, ce_loss)
    test_cnn(model, test_loader, ce_loss)

Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 3.523117


KeyboardInterrupt: 