In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from torch.utils.data import DataLoader, Dataset 
from torchvision import datasets, transforms

In [2]:
batch_size = 32
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

train_dataset = datasets.MNIST(root='./mnist_data/', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./mnist_data/MNIST\raw\train-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw\train-labels-idx1-ubyte.gz


102.8%


Extracting ./mnist_data/MNIST\raw\train-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./mnist_data/MNIST\raw\t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw\t10k-labels-idx1-ubyte.gz


112.7%

Extracting ./mnist_data/MNIST\raw\t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST\raw






In [14]:
print(len(train_dataset)) # 첫 번째 데이터의 개수를 파악한다.
print(train_dataset.data.shape)# 두 번째 데이터의 shape를 파악한다.
print(train_dataset.classes) # 세 번째 데이터의 클래스들을 파악한다.
# 네 번째 모델의 입력과 출력 사이즈를 정한다.

60000
torch.Size([60000, 28, 28])
['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']


## 퀴즈 (Easy)  
Multi Layered Perceptrion 을 통해 MNIST 데이터셋을 분류하려면  
1) 모델의 첫 번째 레이어의 shape 784  
2) 모델의 출력크기는 어떻게 되어야할까요?  

## 퀴즈 (Normal)  
간단한 MLP 모델을 구현해봅시다. 
1) 레이어 수는 총 4개로 (784, 512, 256, 128) 개의 뉴런이 존재합니다. 
  
  
2) 활성화함수는 relu를 사용합니다.  

3) forward 함수에 x를 처음 입력받을 때 기존에 배운 flatten() 또는 reshape() 또는 view()를 활용해서 일차원 벡터로 변환하세요

In [None]:
# 활성화함수 사용하는 방법
# x = F.relu(x)

In [19]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.l1 = nn.Linear(784, 512)
        self.l2 = nn.Linear(512, 256)
        self.l3 = nn.Linear(256, 128)
        self.l4 = nn.Linear(128, 10)
        
    def forward(self, x):
        # Flatten the data (batch_size, 1, 28, 28)-> (batch_size, 784)
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        y_pred = F.softmax(self.l4(x))
        # F.softmax()
        
        return y_pred

이제 train, test 함수를 작성해보겠습니다.  

## train, test 함수 작성

In [20]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
model = MLP().to(device)
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
total_params = sum(p.numel() for p in model.parameters())
print(total_params)

def train(epoch, model, loss_func, train_loader, optimizer):
    model.train()
    for batch_index, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_func(y_pred, y)
        loss.backward()
        optimizer.step()
        if batch_index % 100 == 0:
            print(f'Train Epoch: {epoch+1} | Batch Status: {batch_index*len(x)}/{len(train_loader.dataset)} \
            ({100. * batch_index * batch_size / len(train_loader.dataset):.0f}% | Loss: {loss.item():.6f}')

def test(model, loss_func, test_loader):
    model.eval()
    test_loss = 0
    correct_count = 0
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        y_pred = model(x)
        test_loss += loss_func(y_pred, y).item()
        pred = y_pred.data.max(1, keepdim=True)[1]
        # torch.eq : Computes element-wise equality. return counts value
        correct_count += pred.eq(y.data.view_as(pred)).cpu().sum()
    
    test_loss /= len(test_loader.dataset)
    print(f'=======================\n Test set: Average loss: {test_loss:.4f}, Accuracy: {correct_count/len(test_loader.dataset):.3}')
    

cpu
567434


## train 및 test 실행  

In [17]:
test(model, ce_loss, test_loader)

 Test set: Average loss: 0.0721, Accuracy: 0.0958


In [21]:
for epoch in range(0,1):
    train(epoch, model, ce_loss, train_loader, optimizer)
    test(model, ce_loss, test_loader)


  from ipykernel import kernelapp as app


Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 2.301915
Train Epoch: 1 | Batch Status: 3200/60000             (5% | Loss: 1.682014
Train Epoch: 1 | Batch Status: 6400/60000             (11% | Loss: 1.712533
Train Epoch: 1 | Batch Status: 9600/60000             (16% | Loss: 1.589786
Train Epoch: 1 | Batch Status: 12800/60000             (21% | Loss: 1.642243
Train Epoch: 1 | Batch Status: 16000/60000             (27% | Loss: 1.639403
Train Epoch: 1 | Batch Status: 19200/60000             (32% | Loss: 1.572750
Train Epoch: 1 | Batch Status: 22400/60000             (37% | Loss: 1.567739
Train Epoch: 1 | Batch Status: 25600/60000             (43% | Loss: 1.563347
Train Epoch: 1 | Batch Status: 28800/60000             (48% | Loss: 1.686077
Train Epoch: 1 | Batch Status: 32000/60000             (53% | Loss: 1.612079
Train Epoch: 1 | Batch Status: 35200/60000             (59% | Loss: 1.525046
Train Epoch: 1 | Batch Status: 38400/60000             (64% | Loss: 1.514946
Train Ep

# MNIST Classification with CNN

In [22]:
class CNN(nn.Module):
    def __init__(self, C, W, H, K, S): # 채널, 너비, 높이, 커널 사이즈, 스트라이드
        super(CNN, self).__init__()
        # nn.Module에는 이미 conv 레이어가 구현되어 있다. 
        # 배치정규화도 구현되어있고 다 구현되어있습니다. 
        self.conv1 = nn.Conv2d(C, 32, kernel_size=K, stride=S)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=K, stride=S)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=K, stride=S)
        self.bn3 = nn.BatchNorm2d(128)
        
        def conv2d_size_out(size, kernel_size=K, stride=S):
            print((size - (kernel_size - 1) - 1) // stride + 1)
            return (size - (kernel_size - 1) - 1) // stride + 1
        
        convw = conv2d_size_out(W, K, S)
        convw = conv2d_size_out(convw, K, S)
        convw = conv2d_size_out(convw, K, S)
        
        self.linear_input_size = convw * convw * 128
        self.fc = nn.Linear(self.linear_input_size, 10)
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = x.view(x.size(0), -1) # (batch_size, flatten_size)
        x = F.relu(self.fc(x))
        return F.log_softmax(x)
        

In [27]:
cnn = CNN(C=1, W=28, H=28, K=3, S=2) 
cnn = cnn.to(device)
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=0.001)

13
6
2


In [28]:
total_params = sum(p.numel() for p in cnn.parameters())
print(total_params)

98250


In [29]:
for epoch in range(0,1):
    train(epoch, cnn, ce_loss, train_loader, optimizer)
    test(cnn, ce_loss, test_loader)



Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 2.295867
Train Epoch: 1 | Batch Status: 3200/60000             (5% | Loss: 0.512540
Train Epoch: 1 | Batch Status: 6400/60000             (11% | Loss: 0.586473
Train Epoch: 1 | Batch Status: 9600/60000             (16% | Loss: 0.749562
Train Epoch: 1 | Batch Status: 12800/60000             (21% | Loss: 0.246079
Train Epoch: 1 | Batch Status: 16000/60000             (27% | Loss: 0.457897
Train Epoch: 1 | Batch Status: 19200/60000             (32% | Loss: 0.641234
Train Epoch: 1 | Batch Status: 22400/60000             (37% | Loss: 0.601799
Train Epoch: 1 | Batch Status: 25600/60000             (43% | Loss: 0.436592
Train Epoch: 1 | Batch Status: 28800/60000             (48% | Loss: 0.588477
Train Epoch: 1 | Batch Status: 32000/60000             (53% | Loss: 0.455620
Train Epoch: 1 | Batch Status: 35200/60000             (59% | Loss: 0.710014
Train Epoch: 1 | Batch Status: 38400/60000             (64% | Loss: 0.508702
Train Ep

In [30]:
class ResNet(nn.Module):
    def __init__(self, in_channels, num_layers, block, num_classes=10):
        super(ResNet, self).__init__()
        self.num_layers = num_layers
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        # feature map size = 32x32x16
        self.layers_2n = self.get_layers(block, 16, 16, stride=1)
        # feature map size = 16x16x32
        self.layers_4n = self.get_layers(block, 16, 32, stride=2)
        # feature map size = 8x8x64
        self.layers_6n = self.get_layers(block, 32, 64, stride=2)

        # output layers
        # self.avg_pool = nn.AvgPool2d(8, stride=1)
        self.fc_out = nn.Linear(49 * 64, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def get_layers(self, block, in_channels, out_channels, stride):
        if stride == 2:
            down_sample = True
        else:
            down_sample = False

        layers_list = nn.ModuleList(
            [block(in_channels, out_channels, stride, down_sample)])

        for _ in range(self.num_layers - 1):
            layers_list.append(block(out_channels, out_channels))

        return nn.Sequential(*layers_list)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layers_2n(x)
        x = self.layers_4n(x)
        x = self.layers_6n(x)

        #x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_out(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, down_sample=False):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.stride = stride

        if down_sample:
            self.down_sample = IdentityPadding(in_channels, out_channels, stride)
        else:
            self.down_sample = None

    def forward(self, x):
        shortcut = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.down_sample is not None:
            shortcut = self.down_sample(x)

        out += shortcut
        out = self.relu(out)
        return out

class IdentityPadding(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(IdentityPadding, self).__init__()

        self.pooling = nn.MaxPool2d(1, stride=stride)
        self.add_channels = out_channels - in_channels

    def forward(self, x):
        out = F.pad(x, (0, 0, 0, 0, 0, self.add_channels))
        out = self.pooling(out)
        return out


def resnet_model():
    block = ResidualBlock
    model = ResNet(1, 3, block)
    return model



In [31]:
resnet = resnet_model()
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
total_params = sum(p.numel() for p in resnet.parameters())
print(total_params)

300154


In [None]:
for epoch in range(0,1):
    train(epoch, resnet, ce_loss, train_loader, optimizer)
    test(resnet, ce_loss, test_loader)

Train Epoch: 1 | Batch Status: 0/60000             (0% | Loss: 2.971932
Train Epoch: 1 | Batch Status: 3200/60000             (5% | Loss: 2.700802
Train Epoch: 1 | Batch Status: 6400/60000             (11% | Loss: 3.128146
Train Epoch: 1 | Batch Status: 9600/60000             (16% | Loss: 2.539647
Train Epoch: 1 | Batch Status: 12800/60000             (21% | Loss: 2.866781
Train Epoch: 1 | Batch Status: 16000/60000             (27% | Loss: 2.788840
Train Epoch: 1 | Batch Status: 19200/60000             (32% | Loss: 3.067997
Train Epoch: 1 | Batch Status: 22400/60000             (37% | Loss: 2.649715
Train Epoch: 1 | Batch Status: 25600/60000             (43% | Loss: 2.787101
Train Epoch: 1 | Batch Status: 28800/60000             (48% | Loss: 3.007999
Train Epoch: 1 | Batch Status: 32000/60000             (53% | Loss: 2.727302
Train Epoch: 1 | Batch Status: 35200/60000             (59% | Loss: 2.915809
Train Epoch: 1 | Batch Status: 38400/60000             (64% | Loss: 2.796563
Train Ep

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

## Assignment  

1) cifar10 데이터셋, 데이터로더를 할당하는 코드를 실행해서 데이터셋을 다운받으세요.  
2) 데이터셋의 채널과 모양을 확인해보세요. 그리고 채널과 모양은 CNN을 구현할 때 반영하세요.  
3) conv layer가 5개인 CNN 모델을 구현하세요 모델의 총 파라미터 수는 약 30만개여야 합니다. 배치정규화를 사용하세요.    
4) 구현한 CNN 모델을 학습시키세요. learning_rate=0.001, optimizer=Adam, Epochs=100  
5) 구현되어 있는 Resnet 모델을 불러와서 학습시켜보세요. 옵티마이저와 하이퍼파라미터는 기존과 동일합니다.  
6) 두 모델 중 어느 것이 더 좋은가요?  
7) 필요하다면 구글링을 적극적으로 하세요!!   