In [34]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn
import tqdm

In [24]:
if torch.cuda.is_available():
    device = 'cuda' 
else:
    device = 'cpu'
device

'cuda'

In [25]:
learning_rate = 0.001 # 1e-3
training_epochs = 15
batch_size = 128

In [26]:
mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서로 변환
                         download=True)


In [27]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True,) # 마지막배치 버린다.

In [28]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # conv2d, maxpool2d, relu, linear
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.linear = nn.Linear(7 * 7 * 64, 10)


    def forward(self, x):
        # 첫번째 CNN / Conv block => 14 * 14 * 32
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)

        # 두번째 CNN / Conv block => 7 * 7 * 64
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)

        # Fully Connected Layer (128, 64, 7, 7) -> (128, 3136)
        x = x.view(-1, 7 * 7 * 64)
        x = self.linear(x)
        
        return x

In [36]:
model = CNN()
print(model)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=3136, out_features=10, bias=True)
)


In [37]:
model.to(device)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=3136, out_features=10, bias=True)
)

In [38]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [39]:
len(data_loader)

468

In [40]:
for epoch in range(training_epochs):
    avg_cost = 0 # epoch마다 손실값 잡아주려고

    for x, y in tqdm.tqdm(data_loader):
        x = x.to(device)
        y = y.to(device)

        y_pred = model(x)

        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        avg_cost += loss / len(data_loader)

    
    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))


100%|██████████| 468/468 [00:06<00:00, 76.19it/s]


[Epoch:    1] cost = 0.24957715


100%|██████████| 468/468 [00:06<00:00, 77.54it/s]


[Epoch:    2] cost = 0.0649716333


100%|██████████| 468/468 [00:06<00:00, 77.78it/s]


[Epoch:    3] cost = 0.0474621281


100%|██████████| 468/468 [00:06<00:00, 76.88it/s]


[Epoch:    4] cost = 0.0383858308


100%|██████████| 468/468 [00:06<00:00, 76.46it/s]


[Epoch:    5] cost = 0.0318559892


100%|██████████| 468/468 [00:06<00:00, 75.47it/s]


[Epoch:    6] cost = 0.0259004142


100%|██████████| 468/468 [00:06<00:00, 75.88it/s]


[Epoch:    7] cost = 0.0237787962


100%|██████████| 468/468 [00:06<00:00, 75.73it/s]


[Epoch:    8] cost = 0.0190994609


100%|██████████| 468/468 [00:06<00:00, 76.13it/s]


[Epoch:    9] cost = 0.0165469144


100%|██████████| 468/468 [00:06<00:00, 76.08it/s]


[Epoch:   10] cost = 0.0146973617


100%|██████████| 468/468 [00:06<00:00, 75.77it/s]


[Epoch:   11] cost = 0.0120879337


100%|██████████| 468/468 [00:06<00:00, 75.91it/s]


[Epoch:   12] cost = 0.0109384246


100%|██████████| 468/468 [00:06<00:00, 75.59it/s]


[Epoch:   13] cost = 0.00868007634


100%|██████████| 468/468 [00:06<00:00, 76.11it/s]


[Epoch:   14] cost = 0.00867997948


100%|██████████| 468/468 [00:06<00:00, 76.23it/s]

[Epoch:   15] cost = 0.00785908196





In [41]:
with torch.no_grad():
    x_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    y_test = mnist_test.test_labels.to(device)

    prediction = model(x_test)

    correct_prediction = torch.argmax(prediction, 1) == y_test

    accuarcy = correct_prediction.float().mean()

    print('Accuracy: ', accuarcy.item())



Accuracy:  0.9853000044822693
