# Q1)

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [3]:
#train과 test set으로 나누어 MNIST data 불러오기
train_dataset = dsets.MNIST(root='MNIST_data', train=True,
                      transform=transforms.ToTensor())
test_dataset = dsets.MNIST(root='MNIST_data', train=False,
                      transform=transforms.ToTensor())

In [4]:
#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)
from torch.utils.data.dataloader import DataLoader

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size,
                          shuffle=True, drop_last=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size,
                         shuffle=False, drop_last=True)

In [5]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

dropout = torch.nn.Dropout(p=0.3)
relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [6]:
#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-1.8456e-01, -1.0288e-01, -1.8460e-01,  9.8734e-02,  1.9576e-01,
         -1.1690e-01, -7.3035e-02,  1.7796e-01,  8.3608e-02,  2.9346e-02,
          7.5124e-04,  3.6764e-02,  4.8139e-02,  1.7494e-01,  3.5080e-02,
         -7.1267e-02,  9.2584e-02,  1.4563e-01, -2.2576e-01, -9.6950e-02,
          7.8035e-02, -1.4205e-01,  1.0978e-01,  1.3822e-01,  9.8931e-02,
          9.0927e-02, -1.2801e-01, -9.7189e-02, -1.6117e-01, -1.7466e-02,
         -8.4207e-02, -1.8178e-01,  2.2356e-01, -7.3179e-02,  1.4821e-01,
         -5.1725e-02,  2.1346e-02,  7.7231e-02,  6.5650e-03, -1.5362e-01,
         -2.2148e-01, -1.3293e-01,  1.8341e-01, -5.8854e-02,  1.6075e-01,
         -1.0289e-01,  2.1212e-01, -8.3755e-02,  2.1887e-01, -1.8897e-01,
         -2.1393e-01,  4.3875e-02, -4.2989e-02,  1.9082e-01,  1.8661e-02,
         -2.1566e-01, -1.3035e-01, -9.5964e-02,  1.0613e-01, -8.4691e-02,
         -2.0427e-01, -1.8781e-01, -2.0166e-01, -4.8597e-02, -1.4990e-01,
         -1.0347

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)
model = torch.nn.Sequential(linear1, bn1, relu, dropout, linear2, bn2, relu, dropout, linear3).to(device)

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss().to(device)

In [9]:
#optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [11]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드    
    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.503435910
Epoch: 0002 cost = 0.365048051
Epoch: 0003 cost = 0.334249139
Epoch: 0004 cost = 0.302047849
Epoch: 0005 cost = 0.303128302
Epoch: 0006 cost = 0.281297147
Epoch: 0007 cost = 0.275884509
Epoch: 0008 cost = 0.269686431
Epoch: 0009 cost = 0.259295195
Epoch: 0010 cost = 0.261926115
Epoch: 0011 cost = 0.255251110
Epoch: 0012 cost = 0.248109043
Epoch: 0013 cost = 0.254848748
Epoch: 0014 cost = 0.242148101
Epoch: 0015 cost = 0.235981181
Learning finished


In [12]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것
with torch.no_grad():
    model.eval()
    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())



Accuracy: 0.9366000294685364
Label:  6
Prediction:  6


# Q2)

In [13]:
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)

bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

In [14]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.1609, -0.1106, -0.0153,  ..., -0.0826, -0.1499,  0.1354],
        [-0.1761,  0.1137,  0.0819,  ...,  0.0894, -0.1467, -0.0995],
        [-0.0313,  0.0049, -0.1319,  ...,  0.0149, -0.1753,  0.1601],
        ...,
        [-0.1923, -0.0557,  0.0464,  ...,  0.0191, -0.0793,  0.1561],
        [-0.0153, -0.1482, -0.0116,  ..., -0.1368, -0.1072,  0.1538],
        [-0.1457, -0.0847,  0.0137,  ..., -0.1606, -0.0393,  0.1047]],
       requires_grad=True)

In [15]:
model = torch.nn.Sequential(linear1, bn1, relu, dropout, linear2, bn2, relu, dropout, linear3).to(device)

In [16]:
criterion = torch.nn.CrossEntropyLoss().to(device)

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [18]:
train_total_batch = len(train_loader)

In [19]:
model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드    
    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')

Epoch: 0001 cost = 0.472110003
Epoch: 0002 cost = 0.332314372
Epoch: 0003 cost = 0.288377047
Epoch: 0004 cost = 0.267691672
Epoch: 0005 cost = 0.257264644
Epoch: 0006 cost = 0.238321006
Epoch: 0007 cost = 0.232339770
Epoch: 0008 cost = 0.229101345
Epoch: 0009 cost = 0.226597697
Epoch: 0010 cost = 0.211771518
Epoch: 0011 cost = 0.209984258
Epoch: 0012 cost = 0.204505101
Epoch: 0013 cost = 0.208193734
Epoch: 0014 cost = 0.184867427
Epoch: 0015 cost = 0.196432039
Learning finished


In [20]:
with torch.no_grad():
    model.eval()
    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())



Accuracy: 0.9157000184059143
Label:  6
Prediction:  6


큰 차이는 없어 보인다