In [1]:
import torch

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as opt
from torch.autograd import Variable

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
train_dataset = MNIST(root = './data', train=True, download=True, transform=transforms.ToTensor())
test_dataset= MNIST(root = './data', train=False, download=True, transform=transforms.ToTensor())

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
# hyperparameter 
train_batch_size = 100
test_batch_szie = 1000

# train dataloader
train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=train_batch_size, 
    shuffle=True
    )

# test dataloader
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=test_batch_szie, 
    shuffle=False
    )

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )
        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)
        return output

In [6]:
model = CNN()
model.to(device)

CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)

In [7]:
# CrossEntropyLoss
class CustomCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(CustomCrossEntropyLoss, self).__init__()
        self.softmax = nn.Softmax(dim=1)
        self.nll_loss = nn.NLLLoss()
 
    def forward(self, output, label):
        # softmax
        output = self.softmax(output)
        # log(softmax_output)
        log_output = torch.log(output)
        nlloss_output = self.nll_loss(log_output, label)
        return nlloss_output

In [8]:
# hypyerperameter
learning_rate = 0.001

loss_func = CustomCrossEntropyLoss()
optimizer = opt.Adam(model.parameters(), lr=learning_rate)

In [9]:
# hyperparameter 
num_epochs = 5

for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = Variable(images.to(device))
        labels = Variable(labels.to(device))

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1)%100 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 1, Batch: 100, Loss: 0.1881
Epoch: 1, Batch: 200, Loss: 0.0336
Epoch: 1, Batch: 300, Loss: 0.0348
Epoch: 1, Batch: 400, Loss: 0.1354
Epoch: 1, Batch: 500, Loss: 0.0357
Epoch: 1, Batch: 600, Loss: 0.0206
Epoch: 2, Batch: 100, Loss: 0.0050
Epoch: 2, Batch: 200, Loss: 0.0434
Epoch: 2, Batch: 300, Loss: 0.0248
Epoch: 2, Batch: 400, Loss: 0.0408
Epoch: 2, Batch: 500, Loss: 0.0335
Epoch: 2, Batch: 600, Loss: 0.0098
Epoch: 3, Batch: 100, Loss: 0.0308
Epoch: 3, Batch: 200, Loss: 0.0469
Epoch: 3, Batch: 300, Loss: 0.0023
Epoch: 3, Batch: 400, Loss: 0.0103
Epoch: 3, Batch: 500, Loss: 0.0202
Epoch: 3, Batch: 600, Loss: 0.0090
Epoch: 4, Batch: 100, Loss: 0.0226
Epoch: 4, Batch: 200, Loss: 0.0062
Epoch: 4, Batch: 300, Loss: 0.0151
Epoch: 4, Batch: 400, Loss: 0.0390
Epoch: 4, Batch: 500, Loss: 0.0586
Epoch: 4, Batch: 600, Loss: 0.0022
Epoch: 5, Batch: 100, Loss: 0.0067
Epoch: 5, Batch: 200, Loss: 0.0071
Epoch: 5, Batch: 300, Loss: 0.0343
Epoch: 5, Batch: 400, Loss: 0.0002
Epoch: 5, Batch: 500

In [10]:
correct = 0
total = 0
for images, labels in test_loader:
  images = Variable(images.to(device))
  outputs = model(images)

  _, pred = torch.max(outputs.data, 1)
  
  correct += (pred == labels.to(device)).sum()
  total += labels.size(0)

print('Accuracy:%.3f%%' %(100.0 * float(correct)/float(total)))

Accuracy:98.830%
