In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torch.optim as  optim

In [2]:
# MNIST dataset
mnist_train = datasets.MNIST(root='data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = datasets.MNIST(root='data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [3]:
from torch.utils.data import DataLoader
trainloader = DataLoader(mnist_train, 
                         batch_size=4,
                         shuffle=True, 
                         drop_last=False)

testloader = DataLoader(mnist_test, 
                        batch_size=4,
                        shuffle=False,
                        drop_last=False)

In [4]:
#iterator, 데이터를 하나씩 돌면서 탐색한다.
trainiter = iter(trainloader)
images, labels = trainiter.next()

print(len(trainloader))
print(images.shape)
print(labels)

15000
torch.Size([4, 1, 28, 28])
tensor([9, 1, 5, 5])


In [5]:
print(len(testloader))

2500


In [6]:
# layer 
src = {'input_size':28*28,
       'hidden_size1':256,
       'hidden_size2':158,
       'output_size':10,
       'num_epochs':2,
       'batch_size':100,
       'learning_rate':0.01}

In [7]:
import torch.nn as nn
# nn에 필요한 다양한 함수들을 제공한다.
import torch.nn.functional as F

In [8]:
class TobigsNet(nn.Module):
    def __init__(self, src):
        super(TobigsNet, self).__init__()   
        # two layer
        self.fc1 = nn.Linear(src['input_size'], src['hidden_size1'])
        self.fc2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.fc3 = nn.Linear(src['hidden_size2'], src['output_size'])
    
    def forward(self, img):
        # 차원을 바꾼다.
        x = img.view(img.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        y = F.softmax(x)

        return y

In [9]:
# init
model = TobigsNet(src)
# oupput 
y = model(images)



In [10]:

# loss corssentropy
criterion = nn.CrossEntropyLoss()
# descent gradient 
# 미니배치 하강법.

optimizer = optim.SGD(model.parameters(), 
                      src['learning_rate'])



In [11]:
#진행 표시바
from tqdm import tqdm

for epoch in tqdm(range(src['num_epochs'])):
    #epoch 마다 loss 갱신해야 함으로, current loss =0으로 지정!
    current_loss = 0.0
#     model.train(True)
    
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # loss를 최소화 하는 방향으로, backward gradient descent weight 업데이트!
        
        loss.backward()
        optimizer.step()
        
        # print statistics
        step = i + 1
        current_loss += loss.item()
        
        if step % 1000 == 0 and step != 0:     # print every 1000 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  #  평균 loss를 사용해야 한다.
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//1000 * 1000, current_loss / 1000))
            current_loss = 0.0
        



Epoch [1/2], Step [1000/15000], Loss: 2.3016
Epoch [1/2], Step [2000/15000], Loss: 2.2982
Epoch [1/2], Step [3000/15000], Loss: 2.2910
Epoch [1/2], Step [4000/15000], Loss: 2.2638
Epoch [1/2], Step [5000/15000], Loss: 2.1344
Epoch [1/2], Step [6000/15000], Loss: 1.9804
Epoch [1/2], Step [7000/15000], Loss: 1.8708
Epoch [1/2], Step [8000/15000], Loss: 1.8260
Epoch [1/2], Step [9000/15000], Loss: 1.7854
Epoch [1/2], Step [10000/15000], Loss: 1.7639
Epoch [1/2], Step [11000/15000], Loss: 1.7447
Epoch [1/2], Step [12000/15000], Loss: 1.7419
Epoch [1/2], Step [13000/15000], Loss: 1.7275
Epoch [1/2], Step [14000/15000], Loss: 1.7200
Epoch [1/2], Step [15000/15000], Loss: 1.7383


 50%|██████████████████████████████████████████                                          | 1/2 [00:46<00:46, 46.13s/it]

Epoch [2/2], Step [1000/15000], Loss: 1.7149
Epoch [2/2], Step [2000/15000], Loss: 1.7095
Epoch [2/2], Step [3000/15000], Loss: 1.7195
Epoch [2/2], Step [4000/15000], Loss: 1.7213
Epoch [2/2], Step [5000/15000], Loss: 1.7073
Epoch [2/2], Step [6000/15000], Loss: 1.7120
Epoch [2/2], Step [7000/15000], Loss: 1.7174
Epoch [2/2], Step [8000/15000], Loss: 1.7227
Epoch [2/2], Step [9000/15000], Loss: 1.7038
Epoch [2/2], Step [10000/15000], Loss: 1.6909
Epoch [2/2], Step [11000/15000], Loss: 1.7090
Epoch [2/2], Step [12000/15000], Loss: 1.7089
Epoch [2/2], Step [13000/15000], Loss: 1.7031
Epoch [2/2], Step [14000/15000], Loss: 1.7047
Epoch [2/2], Step [15000/15000], Loss: 1.7056


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:33<00:00, 46.48s/it]


In [14]:
# Test the Model
correct = 0
total = 0
# test set을 돈다!
for i, data in enumerate(testloader):
    inputs, labels = data
#     images = images.view(-1, 28*28)
    outputs = model(inputs)
    # outputs 확률에서 가장 크게 나온 값을 가져온다!
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    # 만약 두 값이 같으면 sum에 +1 ~!
    
    correct += (predicted == labels).sum()

# 몇 %를 맞췄는지.
print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))



Accuracy of the network on the 2500 test images: 75 %


## Adam 을 이용하여 optimalization

# batch norm & dropout

In [15]:
class TobigsNet_nb(nn.Module):
    def __init__(self, src):
        super(TobigsNet_nb, self).__init__()   
        
        self.linear1 = nn.Linear(src['input_size'], src['hidden_size1'])
        self.linear2 = nn.Linear(src['hidden_size1'], src['hidden_size2'])
        self.linear3 = nn.Linear(src['hidden_size2'], src['output_size'])
        self.bn1 = nn.BatchNorm1d(src['hidden_size1'])
        self.bn2 = nn.BatchNorm1d(src['hidden_size2'])

    
    def forward(self, img):
        # 차원을 바꾼다.
        x = img.view(img.shape[0], -1)
        x = self.bn1(F.relu(self.linear1(x)))
        x = self.bn2(F.relu(self.linear2(x)))
        x = F.dropout(x)
        x = self.linear3(x)
        y = F.softmax(x)

        return y

In [16]:
# init
model2 = TobigsNet_nb(src)
# oupput 
y = model2(images)
print(y)



tensor([[0.1372, 0.1803, 0.0746, 0.0485, 0.0639, 0.0815, 0.0639, 0.1416, 0.1423,
         0.0661],
        [0.1269, 0.0602, 0.0694, 0.1179, 0.1002, 0.0717, 0.1432, 0.1606, 0.0619,
         0.0880],
        [0.0770, 0.0635, 0.1556, 0.0581, 0.2759, 0.0949, 0.0798, 0.0943, 0.0580,
         0.0430],
        [0.0990, 0.2020, 0.0572, 0.0643, 0.0677, 0.0883, 0.0708, 0.0705, 0.1846,
         0.0954]], grad_fn=<SoftmaxBackward>)


In [17]:

#adam 적용 
optimizer_a = optim.Adam(model2.parameters(), 
                      src['learning_rate'])

In [18]:
#진행 표시바
from tqdm import tqdm

for epoch in tqdm(range(src['num_epochs'])):
    current_loss = 0.0
    
    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer_a.zero_grad()
        
        outputs = model2(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer_a.step()

        step = i + 1
        current_loss += loss.item()
        
        if step % 1000 == 0 and step != 0:     # print every 1000 mini-batches
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                  #  평균 loss를 사용해야 한다.
                  (epoch + 1, src['num_epochs'], step, len(trainloader)//1000 * 1000, current_loss / 1000))
            current_loss = 0.0
        



Epoch [1/2], Step [1000/15000], Loss: 1.9406
Epoch [1/2], Step [2000/15000], Loss: 1.8624
Epoch [1/2], Step [3000/15000], Loss: 1.8663
Epoch [1/2], Step [4000/15000], Loss: 1.8566
Epoch [1/2], Step [5000/15000], Loss: 1.8447
Epoch [1/2], Step [6000/15000], Loss: 1.8519
Epoch [1/2], Step [7000/15000], Loss: 1.8401
Epoch [1/2], Step [8000/15000], Loss: 1.8299
Epoch [1/2], Step [9000/15000], Loss: 1.8334
Epoch [1/2], Step [10000/15000], Loss: 1.8285
Epoch [1/2], Step [11000/15000], Loss: 1.8270
Epoch [1/2], Step [12000/15000], Loss: 1.8149
Epoch [1/2], Step [13000/15000], Loss: 1.8077
Epoch [1/2], Step [14000/15000], Loss: 1.8115
Epoch [1/2], Step [15000/15000], Loss: 1.8214


 50%|█████████████████████████████████████████▌                                         | 1/2 [04:26<04:26, 266.76s/it]

Epoch [2/2], Step [1000/15000], Loss: 1.8150
Epoch [2/2], Step [2000/15000], Loss: 1.8185
Epoch [2/2], Step [3000/15000], Loss: 1.8282
Epoch [2/2], Step [4000/15000], Loss: 1.8181
Epoch [2/2], Step [5000/15000], Loss: 1.8227
Epoch [2/2], Step [6000/15000], Loss: 1.8313
Epoch [2/2], Step [7000/15000], Loss: 1.8160
Epoch [2/2], Step [8000/15000], Loss: 1.7964
Epoch [2/2], Step [9000/15000], Loss: 1.8177
Epoch [2/2], Step [10000/15000], Loss: 1.8173
Epoch [2/2], Step [11000/15000], Loss: 1.8119
Epoch [2/2], Step [12000/15000], Loss: 1.8184
Epoch [2/2], Step [13000/15000], Loss: 1.8146
Epoch [2/2], Step [14000/15000], Loss: 1.8198
Epoch [2/2], Step [15000/15000], Loss: 1.8316


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [09:36<00:00, 279.56s/it]


In [19]:
# Test the Model
correct = 0
total = 0
# test set을 돈다!
for i, data in enumerate(testloader):
    inputs, labels = data
    outputs = model2(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.shape[0]
    
    correct += (predicted == labels).sum()

print('Accuracy of the network on the 2500 test images: %d %%' % (100 * correct / total))



Accuracy of the network on the 2500 test images: 66 %


In [None]:
# 흠..왜 정확도가 더 낮아졌을까...
