<a href="https://colab.research.google.com/github/EG-Kim/pytorch_example_01/blob/master/4_Feedforward_Neural_Network_with_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Feedforward Neural Network

![대체 텍스트](https://i.imgur.com/wJbR7EW.png)

모듈 불러오기

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

#### Device configuration

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### Hyper-parameters 

In [None]:
# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

#### MNIST dataset 

In [None]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

#### Fully connected neural network with one hidden layer

In [None]:
# torch.nn.Module을 상속받아 만든 class
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):   # __init__()에서 모델의 구조와 동작을 정의하는 생성자 정의
    # 생성자: 파이썬에서 객체가 갖는 속성값 초기화. 객체가 생성될 때 자동 호출
        super(NeuralNet, self).__init__() # super() 함수: 부르면 여기서 만든 NeuralNet class는 nn.Module class의 속성들을 가지고 초기화됨
        self.fc1 = nn.Linear(input_size, hidden_size) # fully connected layer
        self.relu = nn.ReLU() # activation function 활성 함수
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x): # forward() 함수는 모델이 학습데이터를 입력받아 forward 연산을 진행시키는 함수.
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device) # model 객체를 NeuralNet 클래스로 생성. 각각의 생성자는 괄호 안에.
# to(device)를 통해 device에 model 올림

#### Loss and optimizer


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

#### Train the model

In [None]:
total_step = len(train_loader) # 600. batch size가 100이므로 훈련집합 전체 60000장 나누기 batch size 100.
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images) # model 객체를 데이터(images) 와 함께 호출 -> 앞서 class에 구현해둔 forward 함수 자동 실행
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad() # 텐서의 미분 값 전부 0으로 초기화
        loss.backward() # 미분
        optimizer.step() # 가중치 업데이트
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

#### Test the model

In [None]:
# In test phase, we don't need to compute gradients (for memory efficiency) 
# torch.no_grad()로 감쌈 -> 가중치를 추적할 필요 X -> 계산에 필요한 메모리 적게 듦
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device) # 28*28로 1자로 펴서 to(device)를 통해 장치(gpu)로
        labels = labels.to(device) # to(device)를 이용해 장치로 보내야 연산 가능
        outputs = model(images) # 모델 예측 결과(점수) 계산
        _, predicted = torch.max(outputs.data, 1) # 예측값. torch.max()에서 우리가 필요한 몇 번째 label이 가장 큰 점수를 맞았는가. 
        total += labels.size(0) 
        correct += (predicted == labels).sum().item() # 맞춘 개수

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


In [None]:
# functions to show an image
def imshow(img):
    npimg = img.numpy() # 이미지를 numpy로 변환
    plt.imshow(np.transpose(npimg, (1, 2, 0))) 
    plt.show()


with torch.no_grad(): # 미분 계산 필요없음
    # get some random training images
    dataiter = iter(test_loader) 
    images, labels = dataiter.next() # 한 배치 100장 꺼내오기

    # show images
    imshow(torchvision.utils.make_grid(images))
    
    images = images.reshape(-1, 28*28) # 모델에 넣기 위해 모양 변화
    outputs = model(images) # 모델이 예측한 값
    
    _, predicted = torch.max(outputs.data, 1) # 0-9까지 가장 높은 점수를 가진 예측값
    
    

    
    # print labels
    print("ground truth: ", labels) # 정답값
    print("predicted: ", predicted) # 예측값

## 실습


http://yann.lecun.com/exdb/mnist/

hyper parameter를 조절하여 위사이트의 2-layer NN의 성능과 근접하게 만드세요

In [None]:
# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 10
batch_size = 100
learning_rate = 0.001


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
            
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
