## Q1-1) 아래에 주어진 주석을 기반으로 하여 코딩을 해주세요.

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [3]:
#train과 test set으로 나누어 MNIST data 불러오기


# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)
##train=True/False로 train과 test set 나눠주기

In [4]:
#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)
#shuffle=True: 무작위 순서로 batch를 불러올지.
#drop_last: batch_size로 자를 때 맨 마지막에 남는 데이터를 사용할 것인가 버릴 것인가, True면 버리는 것!

In [5]:
#Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

##각 layer 앞 뒤 node 숫자 일치시키기##
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

p=0.3 #p=사용하지 않을 비율 설정
relu=torch.nn.ReLU()

##batch normalization layer의 node숫자와 일치시키기##
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

dropout= torch.nn.Dropout(p)

In [6]:
#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.1205,  0.0780,  0.2083, -0.0511, -0.1907, -0.0021,  0.2305,  0.0628,
         -0.2080, -0.1073, -0.0640,  0.0439,  0.0447, -0.0028,  0.2298, -0.1694,
         -0.0139,  0.0272, -0.1436, -0.0879, -0.1024, -0.0190, -0.0448,  0.0658,
          0.0409,  0.2276, -0.0474,  0.0322, -0.1632, -0.2285,  0.1079,  0.1244,
         -0.0589,  0.1471,  0.1774,  0.1100, -0.1320,  0.0690,  0.1815, -0.0626,
          0.1877, -0.1733, -0.0384, -0.0366,  0.0745,  0.0683,  0.0731, -0.2073,
         -0.1977, -0.2081,  0.2183,  0.0862,  0.0402, -0.0257,  0.0546, -0.0773,
         -0.1108,  0.1662, -0.1015, -0.0791, -0.2149, -0.1971, -0.0928, -0.0671,
         -0.0284,  0.0535, -0.1230, -0.0234, -0.0683, -0.1999,  0.1092,  0.2007,
         -0.0566, -0.0696, -0.1398, -0.0425,  0.1945,  0.0866, -0.0272, -0.0198,
         -0.1480, -0.0042, -0.0540,  0.0091, -0.2271,  0.1585, -0.1174,  0.0069,
         -0.1820,  0.1152,  0.2179, -0.1312,  0.0164, -0.1986,  0.2067,  0.1842,
      

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model= torch.nn.Sequential(linear1, bn1, relu, dropout,
                          linear2, bn2, relu, dropout,
                          linear3)
#.to(device)? : CUDA로 GPU를 사용하지 않는 경우 끝에 .to(device)쓰지 않으면 됨

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss()

In [9]:
#optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer= torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [13]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    
    #reshape imput image into [batch_size by 784]
    for X, Y in train_loader:
        X=X.view(-1,28*28) #view를 사용하여 차원 변환
        Y=Y
        #train data set의 X,Y값 불러옴
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        #backpropagtion과 optimizer로 loss를 최적화함
        
        avg_cost += cost/train_total_batch
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')


Epoch: 0001 cost = 0.505973935
Epoch: 0002 cost = 0.374820977
Epoch: 0003 cost = 0.330094486
Epoch: 0004 cost = 0.305057883
Epoch: 0005 cost = 0.294240087
Epoch: 0006 cost = 0.284462243
Epoch: 0007 cost = 0.271324962
Epoch: 0008 cost = 0.275348186
Epoch: 0009 cost = 0.255264670
Epoch: 0010 cost = 0.255746990
Epoch: 0011 cost = 0.254456669
Epoch: 0012 cost = 0.255053639
Epoch: 0013 cost = 0.253777713
Epoch: 0014 cost = 0.242705539
Epoch: 0015 cost = 0.243390620
Learning finished


In [17]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

model.eval() #set the model to evaluation mode

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    bn_acc = correct_prediction.float().mean()
    print("Accuracy: ", bn_acc.item())
    
    
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.9121000170707703
Label:  7
Prediction:  7


## Q1-2) 지금까지는 Layer의 수를 바꾸거나, Batch Normalization Layer를 추가하는 등 Layer에만 변화를 주며 모델의 성능을 향상 시켰습니다.

이번 문제에서는 위에서 만든 모델에서 있던 Layer 들의 Hidden node 수를 증가 또는 감소 (ex: 200, 300, 50...) 시켰을 때, train set에서의 cost와 test set에서 Accuracy가 기존 결과와 비교하였을 때 어떻게 달라졌는지 비교해주시면 됩니다.

### 1) 784 -> 100 -> 10

In [18]:
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

p=0.3 #p=사용하지 않을 비율 설정
relu=torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

dropout= torch.nn.Dropout(p)

In [19]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0141,  0.0575,  0.1267,  0.1323, -0.0520, -0.0983, -0.0328,  0.0146,
         -0.0935, -0.1003, -0.1462, -0.1260,  0.1620, -0.1910, -0.1845, -0.0143,
          0.0726, -0.0219,  0.0643, -0.0700,  0.0056,  0.1205,  0.1401, -0.1086,
          0.2312,  0.0255,  0.0363,  0.2021, -0.0030,  0.0865, -0.0153,  0.2048,
         -0.1891, -0.1599, -0.2078, -0.0810, -0.1818, -0.0546,  0.2137,  0.1072,
         -0.2031, -0.0940, -0.2078, -0.2330,  0.0370,  0.2050, -0.2225, -0.2101,
         -0.1164,  0.0313, -0.1084, -0.1578, -0.0453,  0.1554, -0.0503,  0.1085,
          0.1284, -0.0671,  0.1871, -0.0860,  0.1700, -0.1653, -0.2013,  0.1971,
          0.0529,  0.2136,  0.0149, -0.1469,  0.1622, -0.2162, -0.2207,  0.0086,
         -0.1243, -0.1667, -0.1443, -0.0995,  0.0138, -0.0087,  0.0174,  0.2297,
          0.1413,  0.0397,  0.1469,  0.1085, -0.1170, -0.2129, -0.0195, -0.0308,
         -0.0232, -0.0171, -0.0926, -0.1880, -0.0160, -0.0438,  0.1291, -0.0794,
      

In [20]:
model1= torch.nn.Sequential(linear1, bn1, relu, dropout,
                          linear2, bn2, relu, dropout,
                          linear3)

In [21]:
criterion = torch.nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(model1.parameters(), lr=learning_rate)

In [22]:
train_total_batch = len(train_loader)

In [23]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    
    #reshape imput image into [batch_size by 784]
    for X, Y in train_loader:
        X=X.view(-1,28*28) #view를 사용하여 차원 변환
        Y=Y
        #train data set의 X,Y값 불러옴
        
        optimizer.zero_grad()
        hypothesis = model1(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        #backpropagtion과 optimizer로 loss를 최적화함
        
        avg_cost += cost/train_total_batch
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.509752035
Epoch: 0002 cost = 0.376984417
Epoch: 0003 cost = 0.325008869
Epoch: 0004 cost = 0.304919869
Epoch: 0005 cost = 0.303272873
Epoch: 0006 cost = 0.295696229
Epoch: 0007 cost = 0.277839959
Epoch: 0008 cost = 0.277612209
Epoch: 0009 cost = 0.260496169
Epoch: 0010 cost = 0.253079027
Epoch: 0011 cost = 0.257494658
Epoch: 0012 cost = 0.246476203
Epoch: 0013 cost = 0.244516090
Epoch: 0014 cost = 0.232028931
Epoch: 0015 cost = 0.234589934
Learning finished


In [36]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

model1.eval() #set the model to evaluation mode

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model1(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    bn_acc = correct_prediction.float().mean()
    print("Accuracy: ", bn_acc.item())
    
    
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model1(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.9451000094413757
Label:  0
Prediction:  0


### 2) 784 -> 1000 -> 1500

In [24]:
linear4 = torch.nn.Linear(784, 1000, bias=True)
linear5 = torch.nn.Linear(1000, 1500, bias=True)
linear6 = torch.nn.Linear(1500, 10, bias=True)

p=0.3 #p=사용하지 않을 비율 설정
relu=torch.nn.ReLU()

bn3 = torch.nn.BatchNorm1d(1000)
bn4 = torch.nn.BatchNorm1d(1500)

dropout= torch.nn.Dropout(p)

In [25]:
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
torch.nn.init.xavier_uniform_(linear6.weight)

Parameter containing:
tensor([[ 0.0407, -0.0047, -0.0232,  ..., -0.0086, -0.0559,  0.0302],
        [-0.0364, -0.0028, -0.0491,  ...,  0.0306, -0.0187, -0.0070],
        [-0.0484, -0.0125,  0.0539,  ..., -0.0450,  0.0080, -0.0296],
        ...,
        [-0.0627, -0.0428,  0.0387,  ..., -0.0351, -0.0130, -0.0013],
        [-0.0512,  0.0069,  0.0154,  ...,  0.0064,  0.0580,  0.0128],
        [ 0.0284,  0.0277,  0.0373,  ...,  0.0465,  0.0297,  0.0496]],
       requires_grad=True)

In [26]:
model2= torch.nn.Sequential(linear4, bn3, relu, dropout,
                          linear5, bn4, relu, dropout,
                          linear6)

In [27]:
criterion = torch.nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [28]:
train_total_batch = len(train_loader)

In [29]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    
    #reshape imput image into [batch_size by 784]
    for X, Y in train_loader:
        X=X.view(-1,28*28) #view를 사용하여 차원 변환
        Y=Y
        #train data set의 X,Y값 불러옴
        
        optimizer.zero_grad()
        hypothesis = model2(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        #backpropagtion과 optimizer로 loss를 최적화함
        
        avg_cost += cost/train_total_batch
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.595859170
Epoch: 0002 cost = 0.299860537
Epoch: 0003 cost = 0.246342838
Epoch: 0004 cost = 0.237264767
Epoch: 0005 cost = 0.214813247
Epoch: 0006 cost = 0.202371538
Epoch: 0007 cost = 0.195428580
Epoch: 0008 cost = 0.198250443
Epoch: 0009 cost = 0.179849774
Epoch: 0010 cost = 0.186480060
Epoch: 0011 cost = 0.177620798
Epoch: 0012 cost = 0.171020985
Epoch: 0013 cost = 0.158515245
Epoch: 0014 cost = 0.155551821
Epoch: 0015 cost = 0.156162798
Learning finished


In [37]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

model2.eval() #set the model to evaluation mode

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model2(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    bn_acc = correct_prediction.float().mean()
    print("Accuracy: ", bn_acc.item())
    
    
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.6467999815940857
Label:  7
Prediction:  2


### 3) 784 ->100 -> 50

In [30]:
linear7 = torch.nn.Linear(784, 100, bias=True)
linear8 = torch.nn.Linear(100, 50, bias=True)
linear9 = torch.nn.Linear(50, 10, bias=True)

p=0.3 #p=사용하지 않을 비율 설정
relu=torch.nn.ReLU()

bn5 = torch.nn.BatchNorm1d(100)
bn6 = torch.nn.BatchNorm1d(50)

dropout= torch.nn.Dropout(p)

In [31]:
torch.nn.init.xavier_uniform_(linear7.weight)
torch.nn.init.xavier_uniform_(linear8.weight)
torch.nn.init.xavier_uniform_(linear9.weight)

Parameter containing:
tensor([[-0.2558,  0.0322, -0.0335,  0.2357, -0.0295,  0.1702, -0.2362, -0.2294,
          0.2278, -0.1203,  0.0405, -0.0365,  0.0576,  0.1175, -0.1440,  0.1539,
         -0.2912,  0.1186, -0.2204, -0.1538,  0.2017, -0.0124, -0.2390,  0.1142,
          0.2699, -0.1331, -0.1918, -0.2755, -0.2551, -0.1655, -0.0820,  0.1560,
          0.0983,  0.2108, -0.1410, -0.1500, -0.1539, -0.3015, -0.1923,  0.0974,
          0.0144, -0.1713, -0.2626, -0.3053,  0.0906, -0.0865,  0.0568,  0.1816,
         -0.3102, -0.0425],
        [-0.0609, -0.2609, -0.1620,  0.2394, -0.0776, -0.1102, -0.2819, -0.2259,
         -0.1561,  0.2034,  0.3002, -0.2456,  0.2383,  0.0705,  0.3127, -0.0086,
         -0.0297,  0.2253, -0.0819, -0.1649,  0.2037, -0.0192,  0.2881,  0.0704,
         -0.1196, -0.2933,  0.2009, -0.1122, -0.3105, -0.2292, -0.1595, -0.1218,
         -0.1431, -0.0089, -0.2752, -0.1150,  0.1539, -0.1333,  0.2920, -0.0164,
          0.0296,  0.2287,  0.0672, -0.2799, -0.2850, -0.29

In [32]:
model3= torch.nn.Sequential(linear7, bn5, relu, dropout,
                          linear8, bn6, relu, dropout,
                          linear9)

In [33]:
criterion = torch.nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [34]:
train_total_batch = len(train_loader)

In [35]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    
    #reshape imput image into [batch_size by 784]
    for X, Y in train_loader:
        X=X.view(-1,28*28) #view를 사용하여 차원 변환
        Y=Y
        #train data set의 X,Y값 불러옴
        
        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        #backpropagtion과 optimizer로 loss를 최적화함
        
        avg_cost += cost/train_total_batch
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.501817763
Epoch: 0002 cost = 0.364305645
Epoch: 0003 cost = 0.326035678
Epoch: 0004 cost = 0.309241623
Epoch: 0005 cost = 0.295210183
Epoch: 0006 cost = 0.286034703
Epoch: 0007 cost = 0.276048630
Epoch: 0008 cost = 0.273326099
Epoch: 0009 cost = 0.264486879
Epoch: 0010 cost = 0.260078013
Epoch: 0011 cost = 0.251592517
Epoch: 0012 cost = 0.250828534
Epoch: 0013 cost = 0.243016526
Epoch: 0014 cost = 0.240743205
Epoch: 0015 cost = 0.242477134
Learning finished


In [38]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

model3.eval() #set the model to evaluation mode

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model3(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    bn_acc = correct_prediction.float().mean()
    print("Accuracy: ", bn_acc.item())
    
    
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.944599986076355
Label:  4
Prediction:  4
