In [1]:
from matplotlib import pyplot as plt
import numpy as np
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)),  # mean value = 0.1307, standard deviation value = 0.3081
])

In [3]:
data_path = './MNIST'

data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)
data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)

In [4]:
print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())

the number of your training data (must be 10,000) =  10000
hte number of your testing data (must be 60,000) =  60000


In [5]:
data_loader = torch.utils.data.DataLoader(data_train, batch_size=20, shuffle=True, drop_last=True)

In [10]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # layer definition
        self.fc1 = nn.Linear(784, 256, bias = True)
        self.fc2 = nn.Linear(256, 256, bias = True)
        self.fc3 = nn.Linear(256, 10, bias = True)
        self.dropout = nn.Dropout(p=0.2)
        self.RELU = nn.ReLU()
        
        # weight init
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.RELU(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.RELU(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        return x
        
net = Net()

In [11]:
net

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (RELU): ReLU()
)

In [12]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3, weight_decay=1e-3)

In [32]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [33]:
net.to(device)

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (elu): ELU(alpha=1.0)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [25]:
net.train()
num_epochs = 20
for epoch in range(num_epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for X, Y in data_loader: 
        X = X.view(-1, 28*28).to(device) 
        Y = Y.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
    print("Epoch : {} loss: {}".format(epoch, running_loss))
print('Finished Training')

Epoch : 0 loss: 48.889449536800385
Epoch : 1 loss: 20.627399496734142
Epoch : 2 loss: 14.851273652166128
Epoch : 3 loss: 11.369889928027987
Epoch : 4 loss: 11.976506352424622
Epoch : 5 loss: 9.167719485238194
Epoch : 6 loss: 7.631057754158974
Epoch : 7 loss: 8.701541702263057
Epoch : 8 loss: 6.997101535089314
Epoch : 9 loss: 7.680372170172632
Epoch : 10 loss: 7.176578030921519
Epoch : 11 loss: 8.509250281378627
Epoch : 12 loss: 6.528269550763071
Epoch : 13 loss: 7.070951913483441
Epoch : 14 loss: 6.180322602391243
Epoch : 15 loss: 6.918470132164657
Epoch : 16 loss: 6.3951578345149755
Epoch : 17 loss: 5.929766229819506
Epoch : 18 loss: 5.616943866014481
Epoch : 19 loss: 6.78544154483825
Finished Training


In [27]:
with torch.no_grad(): 
    net.eval() 
    # dropout을 사용하지 않겠다는 의미. 
    # test dataset의 데이터 형태를 (batch*784)로 바꿔줌. 
    X_test = data_test.data.view(-1, 28*28).float().to(device) 
    Y_test = data_test.targets.to(device) 
    prediction = net(X_test) 
    # 각 배치별로 가장 높은 가능성의 숫자 클래스를 뽑아줌. 
    predicted_classes = torch.argmax(prediction, 1) 
    correct_count = (predicted_classes == Y_test) 
    # 맞는 개수의 평균을 내면 정확도가 나옴. 
    accuracy = correct_count.float().mean() 
    print(accuracy.item())

0.940583348274231
