# Week2: HW 
### 손지우

# Question 1. 주석 기반 코딩

[ReLU + BatchNorm](https://github.com/deeplearningzerotoall/PyTorch/blob/master/lab-09_6_mnist_batchnorm.ipynb)  
[ReLU + Dropout](https://github.com/deeplearningzerotoall/PyTorch/blob/master/lab-09_5_mnist_nn_dropout.ipynb)

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [3]:
# train과 test set으로 나누어 MNIST data 불러오기

mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [4]:
# dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)

In [5]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

relu = torch.nn.ReLU()

dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [6]:
# xavier initialization을 이용하여 각 layer의 weight 초기화 
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-1.0629e-02,  8.4158e-02, -1.3903e-01,  3.5614e-02, -1.3131e-01,
         -2.0189e-01,  1.4222e-01,  2.3109e-01, -1.1594e-02, -9.8138e-02,
          1.4809e-01, -7.9048e-03,  1.1158e-02,  1.6674e-01, -1.9916e-01,
         -1.9218e-01,  8.4182e-02,  8.2943e-02,  1.5958e-02, -3.1829e-02,
          1.4884e-01, -4.6380e-03,  2.0886e-01, -1.8597e-01, -2.6672e-02,
          5.8527e-02,  1.4898e-01,  1.0450e-01,  1.8793e-01,  6.6393e-03,
          2.1926e-02,  2.1737e-01,  9.1966e-02,  1.1258e-01, -2.1619e-01,
          2.5430e-02,  1.8140e-01, -2.0899e-01, -1.0319e-01,  3.9974e-02,
          2.3275e-01,  3.7698e-02, -1.3693e-01, -4.9869e-02,  2.3313e-01,
         -1.0087e-01, -2.2375e-01,  3.7332e-02,  2.2167e-01, -2.0185e-01,
         -7.2648e-02, -1.8003e-01,  2.3124e-01, -7.1334e-02,  1.6326e-01,
          6.9623e-02,  1.9456e-01,  1.3823e-01, -1.6573e-01,  1.0222e-02,
          2.2103e-01, -6.4158e-02, -7.9413e-02,  2.1403e-01, -1.3068e-01,
          9.1833

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3)

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss()

In [9]:
# optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# cost 계산을 위한 변수 설정 
train_total_batch = len(train_loader)

In [11]:
# Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    model.train()
    avg_cost = 0  # cost 초기값 설정
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드 
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y = Y
        
        optimizer.zero_grad()
        hypothesis = model(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.492597938
Epoch: 0002 cost = 0.369442731
Epoch: 0003 cost = 0.327968687
Epoch: 0004 cost = 0.309517503
Epoch: 0005 cost = 0.295981616
Epoch: 0006 cost = 0.286550671
Epoch: 0007 cost = 0.277565539
Epoch: 0008 cost = 0.265243977
Epoch: 0009 cost = 0.273790479
Epoch: 0010 cost = 0.253159583
Epoch: 0011 cost = 0.261577159
Epoch: 0012 cost = 0.248263180
Epoch: 0013 cost = 0.250646114
Epoch: 0014 cost = 0.243508860
Epoch: 0015 cost = 0.242987826
Learning finished


In [12]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것 

with torch.no_grad():
    model.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test # prediction 값과 실제 test data 값이 같은가 (correct = 1)
    accuracy = correct_prediction.float().mean() # 0 or 1 값들의 평균 >>> 정확도
    print('Accuracy:', accuracy.item())
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test) - 1) # randint: 범위 내 임의의 정수(난수) 추출 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())



Accuracy: 0.9381999969482422
Label:  0
Prediction:  0


---

# 2. Hidden Node 조작 시 차이점

### 1) 전체적으로 node 수를 늘렸을 경우
#### 784 > 1000 > 1000 > 10

In [13]:
# node 수 변경

linear4 = torch.nn.Linear(784, 1000, bias=True)
linear5 = torch.nn.Linear(1000, 1000, bias=True)
linear6 = torch.nn.Linear(1000, 10, bias=True)

bn3 = torch.nn.BatchNorm1d(1000)
bn4 = torch.nn.BatchNorm1d(1000)

In [14]:
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
torch.nn.init.xavier_uniform_(linear6.weight)

Parameter containing:
tensor([[-0.0173, -0.0509,  0.0105,  ...,  0.0746, -0.0470,  0.0411],
        [ 0.0601, -0.0477,  0.0480,  ...,  0.0228, -0.0266,  0.0731],
        [ 0.0743,  0.0598,  0.0692,  ...,  0.0770,  0.0665, -0.0248],
        ...,
        [-0.0480, -0.0630, -0.0059,  ..., -0.0096,  0.0758, -0.0673],
        [-0.0201, -0.0186, -0.0167,  ...,  0.0221, -0.0032,  0.0057],
        [ 0.0279,  0.0049, -0.0100,  ...,  0.0506,  0.0142,  0.0616]],
       requires_grad=True)

In [15]:
# 새 모델 설정
model2 = torch.nn.Sequential(linear4, bn3, relu, dropout, 
                            linear5, bn4, relu, dropout, 
                            linear6)

In [16]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [17]:
# Training epoch 
for epoch in range(training_epochs):
    model2.train()
    avg_cost = 0  # cost 초기값 설정
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드 
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y = Y
        
        optimizer.zero_grad()
        hypothesis = model2(X) 
        cost = criterion(hypothesis, Y) 
        cost.backward()
        optimizer.step() 
        
        avg_cost += cost / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.518427730
Epoch: 0002 cost = 0.305316567
Epoch: 0003 cost = 0.253975689
Epoch: 0004 cost = 0.236607134
Epoch: 0005 cost = 0.217563853
Epoch: 0006 cost = 0.214124203
Epoch: 0007 cost = 0.200371519
Epoch: 0008 cost = 0.191649124
Epoch: 0009 cost = 0.189661220
Epoch: 0010 cost = 0.177701846
Epoch: 0011 cost = 0.178726092
Epoch: 0012 cost = 0.168122888
Epoch: 0013 cost = 0.166439384
Epoch: 0014 cost = 0.159359455
Epoch: 0015 cost = 0.161055058
Learning finished


In [22]:
with torch.no_grad():
    model2.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model2(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.7526999711990356
Label:  6
Prediction:  6


### 2) 전체적으로 node 수를 줄였을 경우
#### 784 > 50 > 50 > 10

In [30]:
# node 수 변경

linear7 = torch.nn.Linear(784, 50, bias=True)
linear8 = torch.nn.Linear(50, 50, bias=True)
linear9 = torch.nn.Linear(50, 10, bias=True)

bn5 = torch.nn.BatchNorm1d(50)
bn6 = torch.nn.BatchNorm1d(50)

In [31]:
torch.nn.init.xavier_uniform_(linear7.weight)
torch.nn.init.xavier_uniform_(linear8.weight)
torch.nn.init.xavier_uniform_(linear9.weight)

Parameter containing:
tensor([[ 0.2768, -0.0066,  0.2594,  0.0037,  0.0748, -0.0950,  0.2350, -0.0216,
         -0.1150,  0.1747,  0.2733,  0.2373, -0.2357,  0.0113,  0.0312, -0.0127,
          0.1781, -0.0041,  0.1235, -0.1992, -0.1354,  0.2730, -0.1826, -0.1496,
          0.1826, -0.1633, -0.1312, -0.1416,  0.2052,  0.1681, -0.0636, -0.0413,
         -0.1979,  0.1553, -0.1949, -0.0291,  0.2636,  0.2663,  0.1208,  0.1718,
         -0.2483, -0.2745, -0.1082, -0.0256,  0.1525, -0.0417, -0.1570,  0.2243,
          0.0455,  0.0081],
        [ 0.0974, -0.2759,  0.3013,  0.2777,  0.2216, -0.2469,  0.2586,  0.1442,
          0.0942,  0.2417,  0.2862, -0.0251, -0.1470,  0.0844, -0.2684, -0.2361,
          0.1356, -0.0309,  0.2563, -0.1077, -0.1500,  0.1550,  0.0836, -0.1327,
          0.1939, -0.2548,  0.0937,  0.0287,  0.1231, -0.0540,  0.1403,  0.0395,
          0.3039, -0.1964,  0.2183,  0.0231, -0.1095, -0.0296, -0.0055, -0.1248,
         -0.1033, -0.1588, -0.2953,  0.0908,  0.1779, -0.30

In [32]:
# 새 모델 설정
model3 = torch.nn.Sequential(linear7, bn5, relu, dropout, 
                            linear8, bn6, relu, dropout, 
                            linear9)

In [33]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [34]:
model3.train()

for epoch in range(training_epochs):
    avg_cost = 0

    for X,Y in train_loader:
        X = X.view(-1, 28*28)
        Y = Y
        
        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost/ train_total_batch
    
    print('Epoch:','%04d' % (epoch+1),'cost=', '{:.9f}'.format(avg_cost))

print('Learning finished.')

Epoch: 0001 cost= 0.580256581
Epoch: 0002 cost= 0.435751230
Epoch: 0003 cost= 0.401764810
Epoch: 0004 cost= 0.386084884
Epoch: 0005 cost= 0.374100596
Epoch: 0006 cost= 0.363532782
Epoch: 0007 cost= 0.342887193
Epoch: 0008 cost= 0.346728772
Epoch: 0009 cost= 0.331339180
Epoch: 0010 cost= 0.355457664
Epoch: 0011 cost= 0.348862529
Epoch: 0012 cost= 0.331119746
Epoch: 0013 cost= 0.329321146
Epoch: 0014 cost= 0.324384987
Epoch: 0015 cost= 0.321096927
Learning finished.


In [36]:
with torch.no_grad():
    model3.eval() #evaluation mode (검증하는 단계이므로 dropout 제외)
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = model3(X_test)
    
    correct_prediction = torch.argmax(prediction, 1) == Y_test 
    accuracy = correct_prediction.float().mean() 
    print('Accuracy:', accuracy.item())
    
    r = random.randint(0, len(mnist_test) - 1) 
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9430000185966492
Label:  2
Prediction:  2
