In [12]:
import torch
import torchvision
import numpy as np
from torch import nn
from torchvision import transforms
import matplotlib.pyplot as plt

In [13]:
# 우리가 사용할 컴퓨터를 check하는 부분
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
device

device(type='cuda')

In [14]:
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [15]:
# 데이터 셋 로딩 -- 2번에 걸쳐서 로딩
# 1step...download
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True, 
                                           transform=transforms.ToTensor(), 
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=False,                      # test 데이터
                                           transform=transforms.ToTensor())

# 2step...DataLoader...BatchSize를 사용
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)                     # test 데이터는 섞을 필요 없음

### Model 생성하기

In [16]:
class NeuralNet(nn.Module):

  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)

  # 모델의 Forward Path를 정의
  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    
    return out

# 위에서 정의한 클래스를 인스턴스화 시킴
model = NeuralNet(input_size, hidden_size, num_classes).to(device) # to(device) : 이 모델을 gpu 서버에서 돌린다는 뜻
# loss, optimizer를 선정의
loss_function = nn.CrossEntropyLoss() # Loss 기능 안에 Softmax 함수 기능 포함되어져 있다.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader) # 6만 / 100 = 600

for epoch in range(num_epochs): # 5번
  for i, (images, labels) in enumerate(train_loader):
    # 네트워크에 넣어줄 때는 1차원으로 펼쳐서 넣는다.
    images = images.reshape(-1, 28*28).to(device) # gpu로 돌린다.
    labels = labels.to(device)

    # Forward Pass
    pred = model(images)
    loss = loss_function(pred, labels)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    if (i + 1) % 100 == 0:
      print(f"Epoch : [{epoch + 1}/{num_epochs}], Step : [{i + 1}/{total_step}], Loss : {loss.item():.4f}")

Epoch : [1/5], Step : [100/600], Loss : 0.2681
Epoch : [1/5], Step : [200/600], Loss : 0.1923
Epoch : [1/5], Step : [300/600], Loss : 0.1593
Epoch : [1/5], Step : [400/600], Loss : 0.1670
Epoch : [1/5], Step : [500/600], Loss : 0.0962
Epoch : [1/5], Step : [600/600], Loss : 0.1329
Epoch : [2/5], Step : [100/600], Loss : 0.1436
Epoch : [2/5], Step : [200/600], Loss : 0.1181
Epoch : [2/5], Step : [300/600], Loss : 0.0733
Epoch : [2/5], Step : [400/600], Loss : 0.0833
Epoch : [2/5], Step : [500/600], Loss : 0.1018
Epoch : [2/5], Step : [600/600], Loss : 0.0524
Epoch : [3/5], Step : [100/600], Loss : 0.1573
Epoch : [3/5], Step : [200/600], Loss : 0.0653
Epoch : [3/5], Step : [300/600], Loss : 0.0658
Epoch : [3/5], Step : [400/600], Loss : 0.0350
Epoch : [3/5], Step : [500/600], Loss : 0.0365
Epoch : [3/5], Step : [600/600], Loss : 0.1246
Epoch : [4/5], Step : [100/600], Loss : 0.0462
Epoch : [4/5], Step : [200/600], Loss : 0.0424
Epoch : [4/5], Step : [300/600], Loss : 0.0790
Epoch : [4/5]

In [17]:
# Test
'''
1) with torch.no_grad():
  some code
  --> backward를 하지 않겠다.
2) with torch.set_grad_enabled(False)

안 해주면 기본적으로 BackPropagation 진행에 필요한 메모리를 따로 빼둔다.
'''
with torch.no_grad(): # 미분 안하겠다...실제로 학습할 필요가 없을 때 이 구문을 반드시 작성
  correct = 0
  total = 0

  for images, labels in test_loader:
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)

    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)

    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print(f"Accuracy of the Network on the Test Images : {100*correct/total}%")
  
torch.save(model.state_dict(), 'model.ckpt')

Accuracy of the Network on the Test Images : 97.97%
