In [72]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import torch.utils.data
import torchvision
from torchvision import datasets, transforms;
import matplotlib.pyplot as plt
import numpy as np

# train set과 test set 불러오기
train_data = datasets.MNIST('./datasets', train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.MNIST('./datasets', train=False, download=True, transform=transforms.ToTensor())
# 한번에 batch_size  만큼의 데이터만 불러오게 하고, 순서를 섞어서 불러오게 해줌. 
batch_size = 12
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)


weight_type = torch.float64
torch.set_default_dtype(weight_type)
# Define MLP neural network model
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.in_dim = 28 * 28  # input dimension
        self.out_dim = 10  # output dimension (class 0~9)
        # 층 정의
        self.fc1 = nn.Linear(self.in_dim, 512, dtype=weight_type)
        self.fc2 = nn.Linear(512, 256, dtype=weight_type)
        self.fc3 = nn.Linear(256, 128, dtype=weight_type)
        self.fc4 = nn.Linear(128, 64, dtype=weight_type)
        self.fc5 = nn.Linear(64, self.out_dim, dtype=weight_type)

    def forward(self, x):
        # Relu를 activation function으로 사용하는 layer들 정의
        x = F.relu(self.fc1(x.view(-1, self.in_dim)))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def accuracy(self):
        n_predict = 0
        n_correct = 0
        wrong = []
        expected = []
        results = []
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            n_predict += len(predicted)
            for label, pred, input in zip(labels, predicted, inputs):
                if label != pred:
                    wrong.append(input)
                    expected.append(pred)
                    results.append(label)
                else:
                    n_correct += 1
        return n_correct / n_predict

    def size(self):
        param_size = 0
        buffer_size = 0
        for param in model.parameters():
            param_size += param.nelement() * param.element_size()
        for buffer in model.buffers():
            buffer_size += buffer.nelement() * buffer.element_size()
        
        size_all_mb = (param_size + buffer_size) / 1024 ** 2
        return size_all_mb
    
model = MLP()

criterion = nn.CrossEntropyLoss()  # Loss function
optimizer = optim.SGD(model.parameters(), lr=0.01)  # optimizer  
for epoch in range(5):
    # batch iteration, batch_size가 12이고, MNIST 데이터는 6만개이므로 5000번의 iteration
    for i, data in enumerate(train_loader, 0):
        inputs, label = data
        model.zero_grad()  # 이전 batch iteration 에서 기울기 값이 누적 되지 않기 위해 기울기를 초기화
        outputs = model(inputs)
        loss = criterion(outputs, label)  # loss 계산
        loss.backward()  # 기울기 계산
        optimizer.step()  # 파라미터 업데이트
        if (i + 1) % 1000 == 0:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, loss.item()))

print("Accuracy: {}".format((model.accuracy())))
print('Size: {:.3f} MB'.format(model.size()))

[1,  1000] loss: 2.270
[1,  2000] loss: 1.827
[1,  3000] loss: 0.343
[1,  4000] loss: 0.255
[1,  5000] loss: 0.070
[2,  1000] loss: 0.115
[2,  2000] loss: 0.282
[2,  3000] loss: 0.070
[2,  4000] loss: 0.175
[2,  5000] loss: 0.064
[3,  1000] loss: 0.463
[3,  2000] loss: 0.041
[3,  3000] loss: 0.060
[3,  4000] loss: 0.215
[3,  5000] loss: 0.135
[4,  1000] loss: 0.136
[4,  2000] loss: 0.007
[4,  3000] loss: 0.033
[4,  4000] loss: 0.016
[4,  5000] loss: 0.457
[5,  1000] loss: 0.182
[5,  2000] loss: 0.007
[5,  3000] loss: 0.009
[5,  4000] loss: 0.018
[5,  5000] loss: 0.031
Accuracy: 0.9714
Size: 4.387 MB
