In [2]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
# parameter (learning rate, training epochs, batch_size)

learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [6]:
# train과 test set으로 나누어 MNIST data 불러오기
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [7]:
# assign train & test(batch size, shuffle, drop_last)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)

In [8]:
# Layer (use 3 Layers, DropOut (p=0.3), ReLU fn, Batch normalization)
# Hidden node # : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

relu = torch.nn.ReLU()

dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [9]:
# initialize weight of each layer through xavier initialization

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 1.3072e-01,  1.9814e-01,  1.8934e-01,  3.0248e-02,  2.3331e-01,
         -6.1776e-02,  1.7788e-01,  1.8208e-01, -3.1497e-02, -1.1475e-01,
          2.1595e-01, -1.8682e-01,  2.3012e-01,  4.5476e-02,  1.1029e-01,
         -2.7012e-02,  1.3552e-01, -6.4889e-02,  3.6765e-02,  1.0757e-02,
         -8.4150e-04, -3.2775e-02,  1.3041e-02,  1.2794e-02, -1.0681e-01,
         -2.3288e-01,  7.6600e-02, -1.2600e-01,  8.2899e-02,  1.9033e-01,
         -1.5869e-01,  1.6418e-01, -6.7378e-02,  8.5599e-02, -1.3145e-01,
         -1.9620e-01, -1.9974e-02,  1.3684e-01, -1.7222e-01,  6.6143e-02,
         -4.6003e-02, -2.2426e-01,  8.4634e-02, -2.0383e-01,  1.3463e-01,
          1.3541e-01,  1.9029e-01,  3.6678e-02,  2.0246e-01, -1.0174e-01,
         -1.4966e-02, -4.8589e-02,  2.0126e-01,  1.2877e-01,  2.1136e-01,
          3.1746e-02, -1.9490e-01, -2.0658e-01, -4.2215e-02,  2.2841e-01,
          8.7683e-03,  2.8277e-02, -1.7528e-01,  2.2467e-01, -1.6006e-01,
         -2.0618

In [10]:
# use torch.nn.Sequential to define model(order:linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

# Loss Function (CrossEntropy)

criterion = torch.nn.CrossEntropyLoss().to(device)

# optimizer (Adam optimizer)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# cost calculation variable

train_total_batch = len(train_loader)

In [11]:
# Training epoch 

for epoch in range(training_epochs):
    model.train()
    avg_cost = 0  # cost 초기값 설정
    
    #use back propagation & optimizer to optimize loss
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.516279161
Epoch: 0002 cost = 0.380112588
Epoch: 0003 cost = 0.333815247
Epoch: 0004 cost = 0.314076543
Epoch: 0005 cost = 0.294467062
Epoch: 0006 cost = 0.293753892
Epoch: 0007 cost = 0.276014209
Epoch: 0008 cost = 0.283835918
Epoch: 0009 cost = 0.262976766
Epoch: 0010 cost = 0.261742920
Epoch: 0011 cost = 0.263456464
Epoch: 0012 cost = 0.242976040
Epoch: 0013 cost = 0.247123137
Epoch: 0014 cost = 0.239156827
Epoch: 0015 cost = 0.237752944
Learning finished


In [12]:
# check test data model accuracy 


with torch.no_grad():
    model.eval() #evaluation mode (no dropout)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test # prediction 값과 실제 test data 값이 같은가 (correct = 1)
    accuracy = correct_prediction.float().mean() # 0 or 1 값들의 평균 >>> 정확도
    print('Accuracy:', accuracy.item())
    
    ##compare label & prediction in test set (random) 
    r = random.randint(0, len(mnist_test) - 1) # randint: 범위 내 임의의 정수(난수) 추출 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9264000058174133
Label:  1
Prediction:  1




##Hidden Node

1. increase the nodes (784,400,200,10)

In [14]:
linear4 = torch.nn.Linear(784, 400, bias=True)
linear5 = torch.nn.Linear(400, 200, bias=True)
linear6 = torch.nn.Linear(200, 10, bias=True)

bn3 = torch.nn.BatchNorm1d(400)
bn4 = torch.nn.BatchNorm1d(200)

In [15]:
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
torch.nn.init.xavier_uniform_(linear6.weight)

Parameter containing:
tensor([[ 0.0158,  0.1454,  0.1115,  ...,  0.0397,  0.0642, -0.1611],
        [-0.0089,  0.0764, -0.1661,  ...,  0.0543,  0.0031, -0.1407],
        [-0.1163, -0.1425, -0.1655,  ..., -0.1092, -0.0141, -0.0067],
        ...,
        [ 0.1253,  0.1041,  0.1023,  ...,  0.1061,  0.1265, -0.0555],
        [-0.0189,  0.1562, -0.1390,  ..., -0.0685,  0.1369, -0.0029],
        [-0.1500,  0.0160, -0.1401,  ..., -0.0850, -0.0647,  0.1299]],
       requires_grad=True)

In [16]:
#new model
model2 = torch.nn.Sequential(linear4, bn3, relu, dropout, 
                            linear5, bn4, relu, dropout, 
                            linear6).to(device)

In [17]:

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [18]:
# Training epoch 

for epoch in range(training_epochs):
    model2.train()
    avg_cost = 0  # cost 초기값 설정
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드 
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model2(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.434152335
Epoch: 0002 cost = 0.303556889
Epoch: 0003 cost = 0.265937448
Epoch: 0004 cost = 0.244121760
Epoch: 0005 cost = 0.233371511
Epoch: 0006 cost = 0.218170539
Epoch: 0007 cost = 0.205545351
Epoch: 0008 cost = 0.206433401
Epoch: 0009 cost = 0.194864497
Epoch: 0010 cost = 0.187096596
Epoch: 0011 cost = 0.191462398
Epoch: 0012 cost = 0.180904567
Epoch: 0013 cost = 0.181844175
Epoch: 0014 cost = 0.179348946
Epoch: 0015 cost = 0.169732019
Learning finished


In [19]:

with torch.no_grad():
    model2.eval() #evaluation mode (no dropout)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model2(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8615000247955322
Label:  1
Prediction:  1




accuracy가 감소...?

2. decrease the node #

In [21]:
linear7 = torch.nn.Linear(784, 200, bias=True)
linear8 = torch.nn.Linear(200, 50, bias=True)
linear9 = torch.nn.Linear(50, 10, bias=True)

bn5 = torch.nn.BatchNorm1d(200)
bn6 = torch.nn.BatchNorm1d(50)

In [22]:
torch.nn.init.xavier_uniform_(linear7.weight)
torch.nn.init.xavier_uniform_(linear8.weight)
torch.nn.init.xavier_uniform_(linear9.weight)

Parameter containing:
tensor([[ 5.5382e-02,  2.0352e-02,  1.4677e-01, -1.8797e-01,  1.1940e-01,
          1.7699e-02, -1.1697e-01, -2.6360e-01,  1.5180e-01, -1.8004e-01,
         -1.6300e-01,  1.7950e-01,  1.1045e-01,  1.1680e-01, -9.8153e-02,
          2.8324e-01, -1.0570e-01,  1.9990e-02, -2.3745e-01, -9.8609e-02,
          4.1862e-02, -1.9429e-01,  2.8855e-01, -1.3421e-01, -2.8547e-01,
          3.1183e-01, -1.7423e-02,  9.3613e-02,  1.5501e-01, -3.0459e-01,
          2.9480e-01, -8.4449e-02,  1.9942e-01, -8.7541e-02,  4.7364e-02,
          7.7128e-02, -2.6937e-01,  6.0224e-03,  6.1231e-02, -1.0890e-01,
         -2.7680e-01,  5.7228e-04,  2.8817e-01, -1.3726e-01, -2.6335e-01,
         -1.9567e-01,  2.9440e-01, -4.0337e-02, -2.0285e-02,  3.3141e-02],
        [-2.2963e-01, -1.2430e-01,  4.1255e-02,  2.7440e-01, -1.1553e-01,
         -1.7844e-01,  1.8403e-01, -2.2876e-01, -5.1220e-02,  2.4598e-01,
          1.6256e-01, -2.5505e-01, -3.1220e-01, -1.6838e-01, -2.8551e-01,
          2.188

In [23]:
# new model
model3 = torch.nn.Sequential(linear7, bn5, relu, dropout, 
                            linear8, bn6, relu, dropout, 
                            linear9).to(device)

In [24]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [25]:
model3.train()

for epoch in range(training_epochs):
    avg_cost = 0

    for X,Y in train_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))

print('Learning finished.')

Epoch: 0001 cost= 0.464254886
Epoch: 0002 cost= 0.325357229
Epoch: 0003 cost= 0.278919876
Epoch: 0004 cost= 0.267685682
Epoch: 0005 cost= 0.257565498
Epoch: 0006 cost= 0.245031893
Epoch: 0007 cost= 0.228003755
Epoch: 0008 cost= 0.230987519
Epoch: 0009 cost= 0.215713888
Epoch: 0010 cost= 0.217587203
Epoch: 0011 cost= 0.214308083
Epoch: 0012 cost= 0.203691110
Epoch: 0013 cost= 0.202096626
Epoch: 0014 cost= 0.201364219
Epoch: 0015 cost= 0.192190230
Learning finished.


In [26]:
    model3.eval() #evaluation mode (no dropout)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model3(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9004999995231628
Label:  2
Prediction:  2




accuracy가 증가..?
node # optimize 방법: 수연 링크 참조