# Week2: HW 
### 김민회

# 1. 주석 기반 코딩

[ReLU + BatchNorm](https://github.com/deeplearningzerotoall/PyTorch/blob/master/lab-09_6_mnist_batchnorm.ipynb)  
[ReLU + Dropout](https://github.com/deeplearningzerotoall/PyTorch/blob/master/lab-09_5_mnist_nn_dropout.ipynb)

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# 파라미터 설정 (learning rate, training epochs, batch_size)

learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [4]:
# train과 test set으로 나누어 MNIST data 불러오기

mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [5]:
# dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)

In [6]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

relu = torch.nn.ReLU()

dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [7]:
# xavier initialization을 이용하여 각 layer의 weight 초기화 

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0982, -0.0550,  0.2209,  0.2286,  0.0469, -0.0362,  0.1229,  0.0523,
         -0.0064,  0.0592,  0.1908,  0.0773,  0.1596, -0.0948,  0.1055, -0.0632,
         -0.0124, -0.0390, -0.0823,  0.0599,  0.0940,  0.0718, -0.1384,  0.1566,
         -0.0402,  0.1256,  0.0708,  0.0563,  0.2251,  0.0763, -0.0974,  0.1039,
          0.1944, -0.0152,  0.1894, -0.0522, -0.2156,  0.0644, -0.1813, -0.0065,
         -0.0703, -0.2119,  0.1261, -0.0947,  0.0989, -0.1729,  0.0034, -0.1530,
          0.1973, -0.1719, -0.2085,  0.0689,  0.0940,  0.0318, -0.2329, -0.0049,
         -0.0749, -0.0542, -0.1351, -0.1640,  0.1477,  0.0976, -0.1833,  0.0049,
         -0.0185, -0.1022, -0.1370, -0.0821,  0.1838,  0.2245, -0.2113, -0.0492,
         -0.0897, -0.0105,  0.0377, -0.0337, -0.2274,  0.1602, -0.0410,  0.1693,
         -0.0423,  0.0036,  0.0811, -0.0132,  0.0051,  0.2129, -0.1136,  0.0369,
         -0.1363,  0.0058, -0.0194,  0.1273,  0.2252, -0.1302,  0.2307,  0.1700,
      

In [8]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

In [9]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)

In [10]:
# optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# cost 계산을 위한 변수 설정 

train_total_batch = len(train_loader)

In [14]:
# Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 

for epoch in range(training_epochs):
    model.train()
    avg_cost = 0  # cost 초기값 설정
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드 
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.502910972
Epoch: 0002 cost = 0.372203529
Epoch: 0003 cost = 0.340591162
Epoch: 0004 cost = 0.311733752
Epoch: 0005 cost = 0.301339507
Epoch: 0006 cost = 0.284228802
Epoch: 0007 cost = 0.281810135
Epoch: 0008 cost = 0.273160994
Epoch: 0009 cost = 0.266850591
Epoch: 0010 cost = 0.266489565
Epoch: 0011 cost = 0.254399896
Epoch: 0012 cost = 0.243856922
Epoch: 0013 cost = 0.247039258
Epoch: 0014 cost = 0.254141092
Epoch: 0015 cost = 0.239667773
Learning finished


In [17]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것 

with torch.no_grad():
    model.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test # prediction 값과 실제 test data 값이 같은가 (correct = 1)
    accuracy = correct_prediction.float().mean() # 0 or 1 값들의 평균 >>> 정확도
    print('Accuracy:', accuracy.item())
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test) - 1) # randint: 범위 내 임의의 정수(난수) 추출 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9103999733924866
Label:  6
Prediction:  6


---

# 2. Hidden Node 조작 시 차이점

### 1) 전체적으로 node 수를 늘렸을 경우
#### 784 > 300 > 200 > 10

In [12]:
# node 수 변경

linear4 = torch.nn.Linear(784, 300, bias=True)
linear5 = torch.nn.Linear(300, 200, bias=True)
linear6 = torch.nn.Linear(200, 10, bias=True)

bn3 = torch.nn.BatchNorm1d(300)
bn4 = torch.nn.BatchNorm1d(200)

In [13]:
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
torch.nn.init.xavier_uniform_(linear6.weight)

Parameter containing:
tensor([[ 0.0624, -0.1323,  0.0979,  ...,  0.0795,  0.1644,  0.0238],
        [-0.1012, -0.1020, -0.0767,  ..., -0.0967,  0.0805, -0.1462],
        [-0.0476,  0.1664,  0.0205,  ..., -0.1234,  0.0241, -0.0244],
        ...,
        [-0.0374,  0.1590,  0.0063,  ...,  0.1373, -0.1311, -0.0103],
        [-0.1139, -0.1332,  0.1153,  ..., -0.1413, -0.0044, -0.0073],
        [-0.0411,  0.1690, -0.1019,  ..., -0.0657, -0.0127,  0.1021]],
       requires_grad=True)

In [14]:
# 새 모델 설정

model2 = torch.nn.Sequential(linear4, bn3, relu, dropout, 
                            linear5, bn4, relu, dropout, 
                            linear6).to(device)

In [15]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [16]:
# Training epoch 

for epoch in range(training_epochs):
    model2.train()
    avg_cost = 0  # cost 초기값 설정
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드 
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model2(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.447802544
Epoch: 0002 cost = 0.316706449
Epoch: 0003 cost = 0.267640710
Epoch: 0004 cost = 0.249820262
Epoch: 0005 cost = 0.233272001
Epoch: 0006 cost = 0.219855964
Epoch: 0007 cost = 0.210375264
Epoch: 0008 cost = 0.227158725
Epoch: 0009 cost = 0.199710339
Epoch: 0010 cost = 0.196030900
Epoch: 0011 cost = 0.194281578
Epoch: 0012 cost = 0.192709640
Epoch: 0013 cost = 0.182906941
Epoch: 0014 cost = 0.173025414
Epoch: 0015 cost = 0.176281631
Learning finished


In [18]:
with torch.no_grad():
    model2.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model2(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9172000288963318
Label:  0
Prediction:  0


### 2) 전체적으로 node 수를 줄였을 경우
#### 784 > 80 > 50 > 10

In [19]:
# node 수 변경

linear7 = torch.nn.Linear(784, 80, bias=True)
linear8 = torch.nn.Linear(80, 50, bias=True)
linear9 = torch.nn.Linear(50, 10, bias=True)

bn5 = torch.nn.BatchNorm1d(80)
bn6 = torch.nn.BatchNorm1d(50)

In [20]:
torch.nn.init.xavier_uniform_(linear7.weight)
torch.nn.init.xavier_uniform_(linear8.weight)
torch.nn.init.xavier_uniform_(linear9.weight)

Parameter containing:
tensor([[ 5.8076e-02,  5.8679e-02, -3.1408e-01, -2.2941e-01, -2.7232e-02,
         -2.0816e-01, -7.6057e-02,  2.7781e-01, -2.1255e-01, -1.5896e-01,
         -5.7349e-02,  3.0359e-01, -1.6189e-01, -1.5784e-01,  2.0372e-01,
          2.1505e-01,  1.0756e-01,  4.2314e-02,  1.0020e-01,  1.8523e-01,
         -2.1148e-01,  1.0073e-01,  1.7656e-01,  2.1576e-02, -1.7762e-02,
         -6.4110e-02, -1.3598e-01, -1.0970e-01, -1.0404e-01, -1.4859e-01,
          4.0875e-02,  2.0404e-01,  3.0612e-01,  3.0520e-01, -2.9589e-01,
         -1.9178e-01,  6.7925e-02, -1.0832e-01, -9.1400e-02,  1.2293e-01,
          3.0871e-02, -1.3555e-01, -1.7181e-01,  2.4449e-01, -2.3757e-01,
          1.6370e-01, -2.8055e-01,  2.7008e-01,  1.9871e-01, -2.0131e-04],
        [ 2.3480e-03, -1.1830e-01,  2.6135e-01,  5.7918e-03, -2.3352e-01,
          4.7584e-03,  2.6468e-01, -1.4411e-01,  2.6322e-01,  1.0663e-01,
         -1.7985e-01, -3.9026e-02,  1.8420e-01, -1.8220e-01,  1.4159e-01,
         -1.271

In [21]:
# 새 모델 설정
model3 = torch.nn.Sequential(linear7, bn5, relu, dropout, 
                            linear8, bn6, relu, dropout, 
                            linear9).to(device)

In [22]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [23]:
model3.train()

for epoch in range(training_epochs):
    avg_cost = 0

    for X,Y in train_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))

print('Learning finished.')

Epoch: 0001 cost= 0.539309978
Epoch: 0002 cost= 0.403191447
Epoch: 0003 cost= 0.356301278
Epoch: 0004 cost= 0.332073182
Epoch: 0005 cost= 0.315065086
Epoch: 0006 cost= 0.307772726
Epoch: 0007 cost= 0.303723037
Epoch: 0008 cost= 0.294286698
Epoch: 0009 cost= 0.296302527
Epoch: 0010 cost= 0.295208335
Epoch: 0011 cost= 0.286631435
Epoch: 0012 cost= 0.275844872
Epoch: 0013 cost= 0.277649581
Epoch: 0014 cost= 0.271076798
Epoch: 0015 cost= 0.265455544
Learning finished.


In [24]:
with torch.no_grad():
    model3.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model3(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9495000243186951
Label:  6
Prediction:  6
