Q 1-1)

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# 파라미터 설정
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [4]:
# train과 test set 나누어 MNIST data 불러오기
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True, transform=transforms.ToTensor(),
                          download=False)
mnist_test = dsets.MNIST(root='MNIST_data/',
                          train=False, transform=transforms.ToTensor(),
                          download=False)

In [5]:
# dataset loader에 train, test 할당하기
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,batch_size=batch_size,
                                         shuffle=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,batch_size=batch_size,
                                         shuffle=False, drop_last=True)

In [6]:
# layer 쌓기
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

dropout = torch.nn.Dropout(p=0.3)

In [7]:
# xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.1479,  0.0584, -0.0677, -0.1924,  0.2141, -0.1536,  0.0003,  0.1571,
         -0.1759, -0.1425, -0.1968,  0.0435, -0.0940,  0.1455, -0.0497,  0.1693,
         -0.2082, -0.0038,  0.0512,  0.0077,  0.0101,  0.0898,  0.1196, -0.0272,
          0.1343,  0.0084, -0.0544,  0.1222,  0.1573, -0.2330,  0.2058, -0.2101,
          0.0464, -0.2219,  0.1613,  0.0013, -0.0433, -0.2094,  0.0847, -0.1192,
          0.1494,  0.1668, -0.1581,  0.0658, -0.1719, -0.1338,  0.1115, -0.0143,
          0.0178, -0.0565,  0.0130,  0.1343, -0.0008, -0.0731,  0.2197, -0.1627,
          0.1970,  0.1480,  0.1809, -0.2208,  0.1775,  0.1300, -0.0036,  0.1076,
         -0.0837, -0.0259,  0.0949, -0.1149, -0.1612, -0.0453,  0.0822, -0.0819,
         -0.0386,  0.1041,  0.0669,  0.2001, -0.0025, -0.1266,  0.0711, -0.2116,
         -0.1043, -0.1606,  0.1253,  0.0368,  0.0990,  0.0702, -0.2066, -0.1482,
         -0.0578,  0.1987, -0.1100,  0.0316,  0.1025, -0.1254,  0.1480,  0.1081,
      

In [8]:
# torch.nn.Sequential을 이용하여 model 정의하기
model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3).to(device)

In [11]:
# Loss Function 정의하기
# optimizer 정의하기

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
# cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [13]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것)
model.train()

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.506110966
Epoch: 0002 cost = 0.385601014
Epoch: 0003 cost = 0.337459683
Epoch: 0004 cost = 0.309609890
Epoch: 0005 cost = 0.298802972
Epoch: 0006 cost = 0.289947689
Epoch: 0007 cost = 0.267638892
Epoch: 0008 cost = 0.265033185
Epoch: 0009 cost = 0.258958697
Epoch: 0010 cost = 0.260056317
Epoch: 0011 cost = 0.253153592
Epoch: 0012 cost = 0.249791428
Epoch: 0013 cost = 0.241146311
Epoch: 0014 cost = 0.249875352
Epoch: 0015 cost = 0.238394678
Learning finished


In [15]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것
accuracy = 0
with torch.no_grad():
    model.eval()
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())
  
  ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9283000230789185
Label:  5
Prediction:  5


Q 1-2) (1) hidden node 수 : 200 => 150

In [16]:
linear4 = torch.nn.Linear(784, 200, bias=True)
linear5 = torch.nn.Linear(200, 150, bias=True)
linear6 = torch.nn.Linear(150, 10, bias=True)

relu = torch.nn.ReLU()
bn3 = torch.nn.BatchNorm1d(200)
bn4 = torch.nn.BatchNorm1d(150)
dropout = torch.nn.Dropout(p=0.3)

In [17]:
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)
torch.nn.init.xavier_uniform_(linear6.weight)

Parameter containing:
tensor([[-1.9068e-01, -6.2199e-02, -1.4252e-01,  ...,  4.5238e-02,
         -1.2357e-01, -8.0516e-02],
        [ 6.5690e-02,  2.4245e-02,  7.0571e-02,  ...,  1.9027e-03,
         -9.8914e-05, -9.1301e-02],
        [-5.2039e-02, -1.6119e-01,  1.4351e-01,  ..., -1.4897e-01,
          3.8543e-02,  5.3871e-04],
        ...,
        [-1.0427e-01, -1.7911e-01, -1.7444e-01,  ..., -1.5858e-01,
          1.8313e-01,  6.3561e-02],
        [-1.3753e-01, -5.3235e-02,  2.6823e-02,  ...,  8.2039e-02,
          5.4786e-02,  1.9252e-01],
        [ 9.8617e-02,  3.9332e-03, -1.0863e-01,  ...,  1.2813e-03,
         -4.3141e-02,  1.2183e-01]], requires_grad=True)

In [18]:
model2 = torch.nn.Sequential(linear4, bn3, relu, dropout,
                             linear5, bn4, relu, dropout,
                             linear6).to(device)

In [19]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [20]:
train_total_batch = len(train_loader)

In [21]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것)
model2.train()

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model2(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.468592763
Epoch: 0002 cost = 0.329932898
Epoch: 0003 cost = 0.285081834
Epoch: 0004 cost = 0.279221296
Epoch: 0005 cost = 0.254149199
Epoch: 0006 cost = 0.257239342
Epoch: 0007 cost = 0.246597201
Epoch: 0008 cost = 0.218884617
Epoch: 0009 cost = 0.229808465
Epoch: 0010 cost = 0.220741987
Epoch: 0011 cost = 0.214828297
Epoch: 0012 cost = 0.213137433
Epoch: 0013 cost = 0.192335472
Epoch: 0014 cost = 0.199395984
Epoch: 0015 cost = 0.198151141
Learning finished


In [22]:
accuracy = 0
with torch.no_grad():
    model2.eval()
    
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model2(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())
  
  ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(mnist_test) - 1)
    
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.927299976348877
Label:  1
Prediction:  1


Q 1-2) (2) 

In [23]:
linear7 = torch.nn.Linear(784, 80, bias=True)
linear8 = torch.nn.Linear(80, 40, bias=True)
linear9 = torch.nn.Linear(40, 10, bias=True)

relu = torch.nn.ReLU()
bn5 = torch.nn.BatchNorm1d(80)
bn6 = torch.nn.BatchNorm1d(40)
dropout = torch.nn.Dropout(p=0.3)

In [24]:
torch.nn.init.xavier_uniform_(linear7.weight)
torch.nn.init.xavier_uniform_(linear8.weight)
torch.nn.init.xavier_uniform_(linear9.weight)

Parameter containing:
tensor([[-1.1439e-01, -3.4353e-01,  4.6309e-02, -3.0787e-01, -2.8631e-01,
          3.1436e-01, -1.2091e-02,  2.4332e-01,  2.0685e-01,  2.5263e-01,
         -1.2524e-01,  2.2020e-01,  1.3973e-01, -8.2670e-02, -2.7067e-01,
          2.1708e-01, -2.3986e-01, -1.0857e-01, -3.6727e-02,  1.1108e-01,
          2.7367e-01,  3.1921e-02, -6.7093e-02,  1.9017e-01,  9.5596e-02,
         -2.9438e-01,  9.2366e-02,  1.4966e-01,  1.2890e-01,  1.4049e-01,
          2.1794e-01, -2.9305e-04, -1.5955e-02,  3.0355e-01,  3.3802e-01,
         -2.9909e-01, -2.7273e-01,  9.2785e-02,  2.5354e-01, -2.0231e-01],
        [ 1.8191e-01,  1.1805e-01,  1.6448e-01,  2.8627e-01, -5.5894e-02,
         -7.9493e-02, -3.4101e-01, -1.8534e-01, -1.2129e-02,  8.2012e-02,
          1.4794e-02,  1.6948e-02,  1.3592e-01, -1.3372e-01, -1.7592e-02,
         -3.3916e-01,  2.3865e-01,  3.1275e-01, -2.9194e-01, -1.1431e-01,
          3.1404e-01,  2.0820e-01, -1.1172e-01, -3.2147e-01,  2.8090e-01,
          1.043

In [25]:
model3 = torch.nn.Sequential(linear7, bn5, relu, dropout, 
                                linear8, bn6, relu, dropout,
                                linear9).to(device)

In [26]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [27]:
train_total_batch = len(train_loader)

In [28]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것)
model3.train()

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model3(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.529559612
Epoch: 0002 cost = 0.382952601
Epoch: 0003 cost = 0.342321306
Epoch: 0004 cost = 0.319407374
Epoch: 0005 cost = 0.310401887
Epoch: 0006 cost = 0.300189525
Epoch: 0007 cost = 0.300844997
Epoch: 0008 cost = 0.289053589
Epoch: 0009 cost = 0.278807044
Epoch: 0010 cost = 0.281914711
Epoch: 0011 cost = 0.266391963
Epoch: 0012 cost = 0.263719916
Epoch: 0013 cost = 0.270280004
Epoch: 0014 cost = 0.259037703
Epoch: 0015 cost = 0.259047896
Learning finished


In [29]:
accuracy = 0
with torch.no_grad():
    model3.eval()
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model3(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())
  
  ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9455999732017517
Label:  1
Prediction:  1


#### hidden node 수 증가 했을 경우 cost가 더 빠르게 감소했으나, accuracy에는 큰 차이가 없었다.