## Q1-1

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100

random.seed(123)

In [3]:
#train과 test set으로 나누어 MNIST data 불러오기
mnist_train = dsets.MNIST(root='MNIST_data/',
                         train=True,
                         transform = transforms.ToTensor(),
                         download =True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                        train=False, # trian=False
                        transform = transforms.ToTensor(),
                        download=True)

In [4]:
#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)
train_loader = torch.utils.data.DataLoader(dataset=mnist_train, 
                                          batch_size = batch_size,
                                          shuffle=True,
                                          drop_last = True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test, 
                                          batch_size = batch_size,
                                          shuffle=False,
                                          drop_last = True)

In [5]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

dropout = torch.nn.Dropout(p=0.3)

relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100) #number of size
bn2 = torch.nn.BatchNorm1d(100)

In [6]:
#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)

  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


Parameter containing:
tensor([[-1.0981e-01, -1.0721e-01,  8.6952e-02,  1.5056e-02,  1.5518e-01,
          1.9080e-01, -2.1751e-01, -1.0514e-01,  4.2662e-02,  1.0691e-01,
         -9.3403e-02,  5.1347e-02,  3.9552e-02,  1.7345e-01, -1.9088e-01,
         -7.8941e-02, -3.9995e-02,  1.7880e-01, -8.6480e-02, -2.1657e-01,
          1.0383e-02,  1.4692e-01,  1.0108e-01, -1.8897e-01,  2.1710e-01,
         -1.0112e-01, -1.2709e-01, -1.3951e-01,  4.4903e-02, -9.1287e-02,
         -1.6695e-01, -8.1733e-02, -1.4031e-01,  1.4424e-01,  1.5719e-01,
         -7.0156e-02,  3.2802e-02, -1.4733e-01,  7.6466e-02,  1.2095e-02,
          1.1030e-01,  6.6536e-02,  1.1679e-01,  1.7579e-01,  1.9097e-01,
          4.8054e-02,  1.0372e-01, -1.6012e-01,  5.2269e-03,  2.9185e-02,
         -1.8551e-02, -1.6568e-01,  4.2194e-02,  2.0233e-01, -5.6011e-02,
         -1.4117e-01,  1.7412e-01, -6.3179e-02, -1.2242e-02, -2.1941e-01,
         -9.1435e-02, -1.3028e-01,  2.1283e-01, -2.5206e-02, -6.4272e-02,
          2.4650

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)
model = torch.nn.Sequential(linear1,  bn1, relu, dropout, 
                           linear2, bn2, relu,  dropout,
                            linear3)

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
#optimizer 정의하기 (Adam optimizer를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)
model.train()

#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    avg_cost = 0 
    
    # train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    for X,Y in train_loader:
        
        # data
        X= X.view(-1,28*28) # reshaping  X data
        Y= Y
        
        # optimizer
        optimizer.zero_grad()
        
        #H(X) 
        hypothesis = model(X)
        
        #Cost
        cost = criterion(hypothesis, Y)
        
        # H(X) 개선; backpropagation
        cost.backward()
        optimizer.step()
        
        avg_cost +=cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))
    
print('Learning finished.')

Epoch: 0001 cost= 0.488649964
Epoch: 0002 cost= 0.362622976
Epoch: 0003 cost= 0.321941078
Epoch: 0004 cost= 0.308767676
Epoch: 0005 cost= 0.302153140
Epoch: 0006 cost= 0.280230403
Epoch: 0007 cost= 0.279722035
Epoch: 0008 cost= 0.269009650
Epoch: 0009 cost= 0.263899088
Epoch: 0010 cost= 0.254575878
Epoch: 0011 cost= 0.257850736
Epoch: 0012 cost= 0.252794951
Epoch: 0013 cost= 0.244105220
Epoch: 0014 cost= 0.241841078
Epoch: 0015 cost= 0.238994747
Learning finished.


In [10]:
## test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
## X_test 불러올 때 view를 사용하여 차원 변환할 것 / Y_test를 불러올때 labels 사용
## accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    model.eval()
    
    X_test = mnist_test.test_data.view(-1,28*28).float()
    Y_test = mnist_test.test_labels
    
    prediction = model(X_test)
    
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())
    
    # test set에서 random으로 data를 뽑아 label과 prediction 비교
    r = random.randint(0,len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r+1].view(-1,28*28).float()
    Y_single_data = mnist_test.test_labels[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())



Accuracy: 0.9373000264167786
Label:  5
Prediction:  3


## Q1-2

지금까지는 Layer의 수를 바꾸거나, Batch Normalization Layer를 추가하는 등 Layer에만 변화를 주며 모델의 성능을 향상 시켰습니다.  
이번 문제에서는 위에서 만든 모델에서 있던 Layer 들의 Hidden node 수를 증가 또는 감소 (ex: 200, 300, 50...) 시켰을 때, train set에서의 cost와 test set에서 Accuracy가 기존 결과와 비교하였을 때 어떻게 달라졌는지 비교해주시면 됩니다.

### 1) 200 -> 100

In [11]:
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(100)

torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


Parameter containing:
tensor([[ 0.1951, -0.2133, -0.2134, -0.1415, -0.1620, -0.1431, -0.1249, -0.1338,
          0.0800,  0.1021, -0.0372, -0.2148, -0.1905,  0.1356, -0.2102, -0.0140,
         -0.1480,  0.1875,  0.1343, -0.1517, -0.0526, -0.1073,  0.1789,  0.1999,
         -0.1290, -0.1908, -0.2164,  0.2228, -0.2028,  0.2091,  0.1757,  0.1191,
          0.0966,  0.0155, -0.0659,  0.2021, -0.0796, -0.1018,  0.2250, -0.0780,
         -0.1173,  0.1345,  0.1729, -0.2312, -0.0326, -0.1480,  0.1174, -0.0660,
          0.1260,  0.1695,  0.0371,  0.1525, -0.2014, -0.1760, -0.1217,  0.1358,
         -0.1405, -0.0584, -0.0756,  0.0834, -0.1673,  0.0778,  0.0291, -0.0301,
          0.1514, -0.1559, -0.1588,  0.0123, -0.2090, -0.1544, -0.1687, -0.2231,
          0.0927, -0.0825, -0.1326, -0.0513, -0.1794, -0.0619, -0.0980, -0.1099,
         -0.1337, -0.1644, -0.0753, -0.1961, -0.0322,  0.0352,  0.0598, -0.1656,
         -0.0871, -0.0680,  0.1298,  0.1650,  0.1186,  0.1699, -0.1877, -0.0783,
      

In [12]:
model2 = torch.nn.Sequential(linear1,  bn1, relu, dropout, 
                           linear2, bn2, relu,  dropout,
                            linear3)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [13]:
train_total_batch = len(train_loader)
model2.train()

for epoch in range(training_epochs):
    avg_cost = 0 
    
    for X,Y in train_loader:
        
        X= X.view(-1,28*28) # reshaping  X data
        Y= Y
        
        optimizer.zero_grad()
        hypothesis = model2(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost +=cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))
    
print('Learning finished.')

Epoch: 0001 cost= 0.474311352
Epoch: 0002 cost= 0.334704727
Epoch: 0003 cost= 0.293660253
Epoch: 0004 cost= 0.266254723
Epoch: 0005 cost= 0.246628985
Epoch: 0006 cost= 0.242055714
Epoch: 0007 cost= 0.233642489
Epoch: 0008 cost= 0.231927484
Epoch: 0009 cost= 0.220189765
Epoch: 0010 cost= 0.215765893
Epoch: 0011 cost= 0.212811112
Epoch: 0012 cost= 0.209594056
Epoch: 0013 cost= 0.202941656
Epoch: 0014 cost= 0.200198144
Epoch: 0015 cost= 0.196202755
Learning finished.


In [14]:
with torch.no_grad():
    model2.eval()
    
    X_test = mnist_test.test_data.view(-1,28*28).float()
    Y_test = mnist_test.test_labels
    
    prediction = model2(X_test)
    
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy2 = correct_prediction.float().mean()
    print('Accuracy:', accuracy2.item())
    
    # test set에서 random으로 data를 뽑아 label과 prediction 비교
    r = random.randint(0,len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r+1].view(-1,28*28).float()
    Y_single_data = mnist_test.test_labels[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9151999950408936
Label:  2
Prediction:  2


### 2) 300 -> 150

In [15]:
linear1 = torch.nn.Linear(784, 300, bias=True)
linear2 = torch.nn.Linear(300, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(300)
bn2 = torch.nn.BatchNorm1d(150)

torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


Parameter containing:
tensor([[-4.9427e-02,  4.2683e-02, -4.8571e-02,  ..., -1.6365e-01,
          1.1969e-01, -1.2394e-01],
        [ 9.4951e-02, -1.0727e-04,  3.8698e-02,  ...,  5.4700e-02,
         -3.3416e-02, -1.2180e-01],
        [ 8.9520e-03, -8.9457e-02, -1.9016e-01,  ..., -1.5570e-01,
          1.7070e-01,  1.5789e-01],
        ...,
        [ 4.9225e-02, -4.4570e-02, -9.2898e-02,  ...,  3.3420e-03,
         -1.6494e-01,  7.3739e-02],
        [-1.6762e-01, -1.7451e-01,  1.2406e-02,  ..., -1.7601e-01,
         -1.1124e-01,  4.0459e-02],
        [-1.3576e-01, -2.6659e-02, -8.2791e-02,  ...,  5.2860e-02,
         -1.0762e-02,  1.1018e-01]], requires_grad=True)

In [16]:
model3 = torch.nn.Sequential(linear1,  bn1, relu, dropout, 
                           linear2, bn2, relu,  dropout,
                            linear3)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [17]:
train_total_batch = len(train_loader)
model3.train()

for epoch in range(training_epochs):
    avg_cost = 0 
    
    for X,Y in train_loader:
        
        X= X.view(-1,28*28) # reshaping  X data
        Y= Y
        
        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost +=cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))
    
print('Learning finished.')

Epoch: 0001 cost= 0.439125389
Epoch: 0002 cost= 0.323592544
Epoch: 0003 cost= 0.278251022
Epoch: 0004 cost= 0.251083732
Epoch: 0005 cost= 0.233822539
Epoch: 0006 cost= 0.224421576
Epoch: 0007 cost= 0.218976259
Epoch: 0008 cost= 0.218363106
Epoch: 0009 cost= 0.206216887
Epoch: 0010 cost= 0.200487524
Epoch: 0011 cost= 0.200869381
Epoch: 0012 cost= 0.189756408
Epoch: 0013 cost= 0.184653997
Epoch: 0014 cost= 0.178289533
Epoch: 0015 cost= 0.180036753
Learning finished.


In [18]:
with torch.no_grad():
    model3.eval()
    
    X_test = mnist_test.test_data.view(-1,28*28).float()
    Y_test = mnist_test.test_labels
    
    prediction = model3(X_test)
    
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy3 = correct_prediction.float().mean()
    print('Accuracy:', accuracy3.item())
    
    # test set에서 random으로 data를 뽑아 label과 prediction 비교
    r = random.randint(0,len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r+1].view(-1,28*28).float()
    Y_single_data = mnist_test.test_labels[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9247000217437744
Label:  9
Prediction:  9


### 3) 50 -> 50

In [19]:
linear1 = torch.nn.Linear(784, 50, bias=True)
linear2 = torch.nn.Linear(50, 50, bias=True)
linear3 = torch.nn.Linear(50, 10, bias=True)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(50)
bn2 = torch.nn.BatchNorm1d(50)

torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


Parameter containing:
tensor([[-0.0859,  0.2939,  0.0792, -0.0595,  0.2942, -0.0709, -0.3069,  0.0708,
         -0.2937, -0.2490,  0.1329, -0.2324,  0.0138,  0.1527, -0.2444, -0.1057,
         -0.0701,  0.1633, -0.1426,  0.3095,  0.1796, -0.2392,  0.2409, -0.2607,
         -0.1302, -0.2881, -0.3119, -0.1508,  0.3113,  0.2817, -0.1143, -0.1484,
          0.0127, -0.0560, -0.0056, -0.0517, -0.1593,  0.2496,  0.2327,  0.1428,
         -0.1397,  0.2655, -0.1027, -0.2741, -0.0955,  0.0878,  0.0401, -0.0931,
         -0.1616, -0.2649],
        [-0.1367, -0.2729,  0.2768,  0.2472,  0.0869, -0.0482,  0.0973,  0.2266,
         -0.0029,  0.1113,  0.2314, -0.3105, -0.2541,  0.0431,  0.2128,  0.0305,
          0.2885, -0.1136,  0.1639, -0.0057, -0.2585, -0.2884,  0.1348,  0.3016,
         -0.3062, -0.2462, -0.2749, -0.0954, -0.2362,  0.2584,  0.2907, -0.0131,
          0.2138,  0.2711, -0.3119,  0.2027, -0.0644,  0.2886,  0.2370, -0.0249,
         -0.0667, -0.2123,  0.0812,  0.2988,  0.0283,  0.02

In [20]:
model4 = torch.nn.Sequential(linear1,  bn1, relu, dropout, 
                           linear2, bn2, relu,  dropout,
                            linear3)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model4.parameters(), lr=learning_rate)

In [21]:
train_total_batch = len(train_loader)
model4.train()

for epoch in range(training_epochs):
    avg_cost = 0 
    
    for X,Y in train_loader:
        
        X= X.view(-1,28*28) # reshaping  X data
        Y= Y
        
        optimizer.zero_grad()
        hypothesis = model4(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost +=cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))
    
print('Learning finished.')

Epoch: 0001 cost= 0.601092041
Epoch: 0002 cost= 0.442086041
Epoch: 0003 cost= 0.405375123
Epoch: 0004 cost= 0.387025237
Epoch: 0005 cost= 0.379317105
Epoch: 0006 cost= 0.364590406
Epoch: 0007 cost= 0.364192456
Epoch: 0008 cost= 0.354141831
Epoch: 0009 cost= 0.345203608
Epoch: 0010 cost= 0.342546344
Epoch: 0011 cost= 0.338277072
Epoch: 0012 cost= 0.331260473
Epoch: 0013 cost= 0.333992779
Epoch: 0014 cost= 0.328149199
Epoch: 0015 cost= 0.325725675
Learning finished.


In [22]:
with torch.no_grad():
    model4.eval()
    
    X_test = mnist_test.test_data.view(-1,28*28).float()
    Y_test = mnist_test.test_labels
    
    prediction = model4(X_test)
    
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy4 = correct_prediction.float().mean()
    print('Accuracy:', accuracy4.item())
    
    # test set에서 random으로 data를 뽑아 label과 prediction 비교
    r = random.randint(0,len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r+1].view(-1,28*28).float()
    Y_single_data = mnist_test.test_labels[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model4(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9459999799728394
Label:  0
Prediction:  0


In [23]:
print(accuracy.item()) # 100,100
print(accuracy2.item()) # 200, 100
print(accuracy3.item()) # 300, 150
print(accuracy4.item()) # 50, 50

0.9373000264167786
0.9151999950408936
0.9247000217437744
0.9459999799728394
