## 1. Loader

In [1]:
import torch
import torchvision.datasets as datasets
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [18]:
len(mnist_trainset)  #데이터 확인 

60000

In [27]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

batch_size = 100
train_loader = torch.utils.data.DataLoader( 
    datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
    batch_size=1000)

In [28]:
train_iter = iter(train_loader) #한 batch만 뽑아서 확인해보기 
images, labels = train_iter.next() 

print(len(train_loader))
print(images.shape)
print(labels) #1차원 tensor인데 length 가 4인! 

600
torch.Size([100, 1, 28, 28])
tensor([9, 3, 8, 0, 2, 6, 9, 8, 7, 3, 7, 1, 7, 3, 9, 0, 4, 9, 3, 1, 3, 4, 1, 0,
        1, 2, 6, 3, 7, 9, 5, 0, 6, 4, 3, 0, 7, 3, 1, 7, 2, 3, 8, 8, 3, 0, 4, 8,
        7, 4, 7, 2, 4, 2, 3, 7, 7, 6, 4, 3, 2, 6, 5, 2, 9, 1, 0, 9, 7, 6, 2, 0,
        7, 4, 7, 3, 4, 6, 0, 3, 6, 4, 2, 3, 1, 4, 7, 0, 5, 2, 1, 5, 1, 5, 9, 0,
        5, 2, 3, 4])


## 2. Model

In [29]:
src = {'input_size': 1*28*28,   #config #환경설정 #들어가야하는 input들 한번에 묶어놓은 것 
       'hidden_size1':512,
       'hidden_size2':256,
       'hidden_size3':128,
       'output_size':10,
       'init_weight_range':0.5,
       'num_epochs':5,
       'batch_size':100,
       'learning_rate':1e-3}

In [30]:
import torch.nn as nn
import torch.nn.functional as F

class MnistModel_Yum(nn.Module):
    def __init__(self, src):
        super(MnistModel_Yum, self).__init__()
        self.fc1 = nn.Linear(src['input_size'], src['hidden_size1']) #총 4개 layer!
        self.fc2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.fc3 = nn.Linear(src['hidden_size2'], src['hidden_size3'])
        self.fc4 = nn.Linear(src['hidden_size3'], src['output_size']) #최종 length가 10인 tensor가 나옴
                             
        ## sequential layer
        self.seq_fc = nn.Sequential(
                            nn.Linear(src['input_size'], src['hidden_size1']),
                            nn.Linear(src['hidden_size1'], src['hidden_size2']),
                            nn.Linear(src['hidden_size2'], src['hidden_size3']),
                            nn.Linear(src['hidden_size3'], src['output_size'])
                            )   #하나의 layer로 묶음  #위랑 결과 같음!
        
        self.init_range = src['init_weight_range']
        
    def init_weight(self):
        self.fc1.weight.data.uniform_(-self.init_range, self.init_range) #가중치 초기화 
        self.fc2.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc3.weight.data.uniform_(-self.init_range, self.init_range)
        self.fc4.weight.data.uniform_(-self.init_range, self.init_range)
        
        for fc in self.seq_fc:
            fc.weight.data.uniform_(-self.init_range, self.init_range)
        
    def forward(self, img): #layer가 4개 
        x = img.view(img.shape[0], -1) #image shape 100, 1, 28, 28 중 100차원만 유지하고 나머지는 한 차원으로 묶어버리겠다
        #--------------------
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        y = self.fc4(x)
        #--------------------
        
        ## 4 lines above are identical to #sequencial로 써도 위와 같은 모델 
        #  y = self.seq_fc(x)

        return y #마지막 y는 length10짜리 tensor

In [31]:
model = MnistModel_Yum(src) #init하는 부분 
y = model(images) #forward부분 

In [32]:
print(y.shape) 

torch.Size([100, 10])


## 3. Train / Eval

### 손실 함수 및 optimizer 정의

In [33]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()  #loss
optimizer = optim.SGD(model.parameters(), #gradient를 어떤 방식으로 할건지 
                      src['learning_rate'], 
                      momentum=0.9)

### 네트워크 학습
단순히 데이터를 반복시켜 네트워크와 optimizer의 입력으로 넘겨준다.

In [36]:
from tqdm import tqdm  #빨간줄 

model.init_weight()

for epoch in tqdm(range(src['num_epochs'])):   #epoch 5번 
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(train_loader): #4개씩 묶여있는 사진들이 루프 한번만에!
        # get the inputs
        inputs, labels = data  #image에 사진이랑 클래스 있음 
        
        # zero the parameter gradients
        optimizer.zero_grad()  #0으로 초기화 
        
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels) #실제 사진과 확률의 차이
        loss.backward()  #편미분 값들 계산
        optimizer.step()  #계산한 값들로 update
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 100 == 0 and step != 0:     # print every 1000 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  (epoch + 1, src['num_epochs'], step, len(train_loader)//100 * 100, current_loss / 100)) #누적합의 평균 
            current_loss = 0.0  #12500개의 데이터 --> 1000번씩 찍으면 500이 남으므로 다음 epoch로 넘어가 loss가 







  0%|                                                    | 0/5 [00:00<?, ?it/s]

Epoch [1/5], Step [100/600], Loss: 53.6500
Epoch [1/5], Step [200/600], Loss: 14.6841
Epoch [1/5], Step [300/600], Loss: 10.2796
Epoch [1/5], Step [400/600], Loss: 7.4278
Epoch [1/5], Step [500/600], Loss: 6.1657
Epoch [1/5], Step [600/600], Loss: 5.5989








 20%|████████▌                                  | 1/5 [02:15<09:02, 135.60s/it]

Epoch [2/5], Step [100/600], Loss: 4.6715
Epoch [2/5], Step [200/600], Loss: 4.3907
Epoch [2/5], Step [300/600], Loss: 3.8163
Epoch [2/5], Step [400/600], Loss: 3.6536
Epoch [2/5], Step [500/600], Loss: 3.3704
Epoch [2/5], Step [600/600], Loss: 3.0018








 40%|█████████████████▏                         | 2/5 [04:45<06:59, 139.96s/it]

Epoch [3/5], Step [100/600], Loss: 2.9900
Epoch [3/5], Step [200/600], Loss: 2.8956
Epoch [3/5], Step [300/600], Loss: 3.0615
Epoch [3/5], Step [400/600], Loss: 2.5494
Epoch [3/5], Step [500/600], Loss: 2.3866
Epoch [3/5], Step [600/600], Loss: 2.6555








 60%|█████████████████████████▊                 | 3/5 [06:54<04:33, 136.73s/it]

Epoch [4/5], Step [100/600], Loss: 2.3115
Epoch [4/5], Step [200/600], Loss: 2.3547
Epoch [4/5], Step [300/600], Loss: 2.2709
Epoch [4/5], Step [400/600], Loss: 2.2540
Epoch [4/5], Step [500/600], Loss: 2.0345
Epoch [4/5], Step [600/600], Loss: 2.0752








 80%|██████████████████████████████████▍        | 4/5 [08:40<02:07, 127.44s/it]

Epoch [5/5], Step [100/600], Loss: 1.9571
Epoch [5/5], Step [200/600], Loss: 1.9825
Epoch [5/5], Step [300/600], Loss: 1.7831
Epoch [5/5], Step [400/600], Loss: 1.8317
Epoch [5/5], Step [500/600], Loss: 1.8118
Epoch [5/5], Step [600/600], Loss: 1.7474








100%|███████████████████████████████████████████| 5/5 [10:24<00:00, 120.28s/it]

### 평가 데이터를 이용한 네트워크 평가
학습 데이터셋을 이용해 총 5번 반복하면서 학습을 시켰다. 그러나 실제로 네트워크가 무엇인가를 배웠는지에 대하여 테스트를 해야한다.

뉴럴 네트워크의 출력인 클래스 label을 예측하고 실제 데이터와 비교함으로써 테스트를 수행했다.

In [37]:
# Test the Model
correct = 0
total = 0
for i, data in enumerate(test_loader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 6000 test images: %d %%' % (100 * correct / total))  #오~~~~ 이 정도면 만족쓰 

Accuracy of the network on the 6000 test images: 86 %
