In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

In [7]:
device = 'cpu'

torch.manual_seed(777)

<torch._C.Generator at 0x1d5c2787450>

#### hyperparameters

In [8]:
learning_rate = 0.001
epochs = 15
batch_size = 100

#### datasets

In [17]:
mnist_train = dsets.MNIST(root='MNIST_data/',
                         train=True,
                         transform=transforms.ToTensor(),
                         download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                        train=False,
                        transform=transforms.ToTensor(),
                        download=True)

dataloader = torch.utils.data.DataLoader(dataset=mnist_train,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        drop_last=True)

total_batch = len(dataloader)

#### model

In [30]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        # no.1 layer
        self.layer1 = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,stride=2))
        
        #no.2 layer
        self.layer2 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))
        
        # FC layer
        self.fc = nn.Linear(7 * 7 * 64, 10 ,bias=True)
        
        # FC layer의 가중치 초기화
        torch.nn.init.xavier_uniform_(self.fc.weight)
        
        
    def forward(self, x) :
        out = self.layer1(x)
        out = self.layer2(out)
        
        # FC layer를 위한 Flatten
        # 첫번째 차원인 배치 차원은 그대로 두고 나머지는 펼쳐라
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        
        return out
    

In [22]:
model = CNN()
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

#### training SGD 사용

In [24]:
for e in range(epochs) :
    avg_cost = 0
    
    for x, y in dataloader:
        
        x = x.to('cpu')
        y = y.to('cpu')
        pred = model(x)
        cost = loss(pred, y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print("Epoch : {}/{} Cost : {:.5f}".format(e,epochs,avg_cost))

Epoch : 0/15 Cost : 1.68850
Epoch : 1/15 Cost : 0.98371
Epoch : 2/15 Cost : 0.63826
Epoch : 3/15 Cost : 0.50615
Epoch : 4/15 Cost : 0.43932
Epoch : 5/15 Cost : 0.39801
Epoch : 6/15 Cost : 0.36912
Epoch : 7/15 Cost : 0.34742
Epoch : 8/15 Cost : 0.33004
Epoch : 9/15 Cost : 0.31570
Epoch : 10/15 Cost : 0.30334
Epoch : 11/15 Cost : 0.29263
Epoch : 12/15 Cost : 0.28288
Epoch : 13/15 Cost : 0.27435
Epoch : 14/15 Cost : 0.26594


#### training Adam 사용

In [47]:
import time

c = time.time()
print(type(c))
print("{:d}".format(int(time.time()-c)))

<class 'float'>
0


In [48]:
import time

model = CNN()
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print("Start Training!\n")
for e in range(epochs) :
    avg_cost = 0
    current_t = time.time()
    for x, y in dataloader:
        
        x = x.to('cpu')
        y = y.to('cpu')
        pred = model(x)
        cost = loss(pred, y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print("Epoch : {}/{} Cost : {:.5f} >> Time : {:d}s".format(e,epochs,avg_cost,int(time.time()-current_t)))

print("!! Done !!")

Start Training!

Epoch : 0/15 Cost : 0.23606 >> Time : 114s
Epoch : 1/15 Cost : 0.06315 >> Time : 110s
Epoch : 2/15 Cost : 0.04638 >> Time : 103s
Epoch : 3/15 Cost : 0.03815 >> Time : 107s
Epoch : 4/15 Cost : 0.03175 >> Time : 107s
Epoch : 5/15 Cost : 0.02747 >> Time : 102s
Epoch : 6/15 Cost : 0.02282 >> Time : 103s
Epoch : 7/15 Cost : 0.01982 >> Time : 113s
Epoch : 8/15 Cost : 0.01721 >> Time : 105s
Epoch : 9/15 Cost : 0.01512 >> Time : 108s
Epoch : 10/15 Cost : 0.01191 >> Time : 103s
Epoch : 11/15 Cost : 0.01067 >> Time : 105s
Epoch : 12/15 Cost : 0.00930 >> Time : 103s
Epoch : 13/15 Cost : 0.00827 >> Time : 101s
Epoch : 14/15 Cost : 0.00669 >> Time : 111s
!! Done !!


In [49]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())



Accuracy: 0.9657999873161316


### super(CNN, self).__init__() 의 의미 알아보기, 왜 CNN까지 포함 시키는지, 앞서 볼땐 그냥 super().__init__() 이였는데..

### torch.nn.init.xavier_uniform(self.fc.weight)의 fc layer의 가중치를 왜 초기화 시키는지?