# Q 1-1) 아래에 주어진 주석을 기반으로 코딩을 해주세요

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
# 파라미터 설정
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [3]:
# train과 test set으로 나누어 MNIST data 불러오기
train = dsets.MNIST(root='MNIST_data/',
                    train = True,
                    transform = transforms.ToTensor(),
                    download = True)
test = dsets.MNIST(root='MNIST_data/',
                   train = False,
                   transform = transforms.ToTensor(),
                   download = True)

In [4]:
#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)
train_loader = torch.utils.data.DataLoader(dataset=train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

test_loader = torch.utils.data.DataLoader(dataset=test,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

In [5]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)
linear1 = torch.nn.Linear(784, 100, bias = True)
linear2 = torch.nn.Linear(100, 100, bias = True)
linear3 = torch.nn.Linear(100, 10, bias = True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p = 0.3)
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [6]:
#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.1061,  0.1171,  0.1897, -0.2068, -0.1429, -0.1203, -0.0339, -0.1187,
          0.0867, -0.1745, -0.0858, -0.1713,  0.1788,  0.1784,  0.0838, -0.0200,
         -0.1273,  0.2078,  0.2287, -0.0328, -0.0244, -0.1916, -0.1104,  0.1045,
          0.2265,  0.0765, -0.1907, -0.1358,  0.0962,  0.1731, -0.1784,  0.1765,
          0.1268, -0.2318, -0.0584, -0.1518,  0.0475,  0.0087,  0.0236, -0.1465,
          0.2123, -0.0823,  0.0968,  0.1805,  0.0704,  0.2256,  0.0521,  0.0855,
         -0.0572,  0.1395, -0.1459, -0.0405, -0.0033, -0.0890,  0.0056, -0.0305,
         -0.0803,  0.0035, -0.0334, -0.0532,  0.0193,  0.0151, -0.2333, -0.0639,
          0.2015, -0.0596, -0.1510,  0.1850,  0.1448,  0.2179, -0.0722,  0.1960,
          0.2247,  0.0017, -0.0077,  0.0363,  0.1166,  0.1629, -0.0850,  0.0184,
          0.1257,  0.2102,  0.1442,  0.0036, -0.0635, -0.0218, -0.2082,  0.0890,
         -0.0393, -0.0478, -0.1239,  0.2262,  0.1905, -0.2294,  0.2024,  0.1591,
      

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)
model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3)

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss() 

In [9]:
#optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    model.train()
    avg_cost = 0
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y_hat = model(X)
        cost = criterion(Y_hat, Y)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.496009558
Epoch: 0002 cost = 0.365117162
Epoch: 0003 cost = 0.321423441
Epoch: 0004 cost = 0.304662853
Epoch: 0005 cost = 0.292454153
Epoch: 0006 cost = 0.289761156
Epoch: 0007 cost = 0.275276959
Epoch: 0008 cost = 0.268245369
Epoch: 0009 cost = 0.274177849
Epoch: 0010 cost = 0.257648170
Epoch: 0011 cost = 0.241259992
Epoch: 0012 cost = 0.241195634
Epoch: 0013 cost = 0.239378348
Epoch: 0014 cost = 0.236419350
Epoch: 0015 cost = 0.227829695
Learning finished


In [11]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것
with torch.no_grad():
    model.eval()
    accuracy = 0
    
    for i, (X, Y) in enumerate(test_loader):
        X_test = X.view(-1, 28 * 28)
        Y_test = Y
        prediction = model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy = correct_prediction.float().mean()
       
    print("Accuracy: ", accuracy.item())
    
    ## Test set에서 random으로 data를 뽑아 Label과 Preditcion을 비교하는 코드
    r = random.randint(0, len(test_loader)-1)
    X_single_data = X[r:r+1].view(-1, 28*28)
    Y_single_data = Y[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())
    

Accuracy:  0.9700000286102295
Label:  3
Prediction:  3


# Q 1-2) Layer들의 Hidden node 수를 증가 또는 감소시켜보기

In [18]:
# 증가
linear1 = torch.nn.Linear(784, 200, bias = True)
linear2 = torch.nn.Linear(200, 150, bias = True)
linear3 = torch.nn.Linear(150, 10, bias = True)

bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

In [19]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.1409, -0.1490, -0.0132,  ..., -0.1501, -0.0596, -0.1913],
        [-0.1624,  0.0009,  0.1408,  ..., -0.1339, -0.0253,  0.1415],
        [ 0.1044, -0.1092,  0.1175,  ...,  0.0904,  0.0186, -0.0249],
        ...,
        [-0.1252, -0.0718, -0.0555,  ...,  0.1434,  0.1532,  0.0019],
        [-0.0872, -0.1050, -0.0568,  ..., -0.1874,  0.0539, -0.0934],
        [ 0.1372, -0.0544,  0.0367,  ..., -0.0490, -0.0068,  0.1394]],
       requires_grad=True)

In [20]:
model2 = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3)

In [21]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss() 

#optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)

In [22]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    model2.train()
    avg_cost = 0
    
    #train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y_hat = model2(X)
        cost = criterion(Y_hat, Y)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.459155381
Epoch: 0002 cost = 0.327410132
Epoch: 0003 cost = 0.289819002
Epoch: 0004 cost = 0.273510128
Epoch: 0005 cost = 0.250397474
Epoch: 0006 cost = 0.256609350
Epoch: 0007 cost = 0.238115966
Epoch: 0008 cost = 0.233371884
Epoch: 0009 cost = 0.225198969
Epoch: 0010 cost = 0.215849593
Epoch: 0011 cost = 0.220183015
Epoch: 0012 cost = 0.207451299
Epoch: 0013 cost = 0.206049830
Epoch: 0014 cost = 0.204814732
Epoch: 0015 cost = 0.190526366
Learning finished


In [23]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것
with torch.no_grad():
    model2.eval()
    accuracy = 0
    
    for i, (X, Y) in enumerate(test_loader):
        X_test = X.view(-1, 28 * 28)
        Y_test = Y
        prediction = model2(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy = correct_prediction.float().mean()
       
    print("Accuracy: ", accuracy.item())
    
    ## Test set에서 random으로 data를 뽑아 Label과 Preditcion을 비교하는 코드
    r = random.randint(0, len(test_loader)-1)
    X_single_data = X[r:r+1].view(-1, 28*28)
    Y_single_data = Y[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model2(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  1.0
Label:  6
Prediction:  6


Loss도 많이 줄어들고, Accuracy가 1이 나옴

In [24]:
# 감소
linear1 = torch.nn.Linear(784, 70, bias = True)
linear2 = torch.nn.Linear(70, 50, bias = True)
linear3 = torch.nn.Linear(50, 10, bias = True)
bn1 = torch.nn.BatchNorm1d(70)
bn2 = torch.nn.BatchNorm1d(50)

In [25]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0466, -0.0164, -0.1147,  0.3002,  0.0831, -0.2412, -0.0331,  0.0613,
         -0.2212,  0.2542,  0.0849, -0.2804,  0.1685, -0.0052, -0.0893,  0.0243,
         -0.1935, -0.0522, -0.1118,  0.1373,  0.2536, -0.0136, -0.2451,  0.1960,
         -0.0241,  0.2051,  0.0365, -0.1055,  0.2960,  0.1667,  0.1083,  0.2020,
          0.1318, -0.2663,  0.3076, -0.2342,  0.2477,  0.0196, -0.2697, -0.1849,
         -0.0641, -0.1532,  0.3008, -0.0925,  0.2623, -0.3114,  0.1231, -0.2500,
         -0.1580,  0.1566],
        [-0.0502, -0.3016, -0.1494,  0.0086, -0.2670,  0.0922,  0.0430, -0.0768,
         -0.0763, -0.1499, -0.3005,  0.2044, -0.3063, -0.1917,  0.2693,  0.0877,
          0.1267,  0.1547,  0.1827, -0.1984, -0.2897,  0.1113, -0.1584, -0.2167,
         -0.2574,  0.2825, -0.2097, -0.2305, -0.0680,  0.2803, -0.0802, -0.0875,
          0.2149, -0.3101, -0.2694,  0.1776, -0.0675,  0.2912,  0.1428,  0.2651,
          0.1582,  0.1508, -0.0995, -0.2166, -0.2885,  0.03

In [26]:
model3 = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3)

In [27]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss() 

#optimizer 정의하기 (Adam optimizer를 사용할 것!)
optimizer = torch.optim.Adam(model3.parameters(), lr=learning_rate)

In [28]:
#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    model3.train()
    avg_cost = 0
    
    for X,Y in train_loader:
        X = X.view(-1, 28*28)
        Y = Y
        Y_hat = model3(X)
        cost = criterion(Y_hat, Y)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / train_total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.544624627
Epoch: 0002 cost = 0.409764826
Epoch: 0003 cost = 0.375281453
Epoch: 0004 cost = 0.341665894
Epoch: 0005 cost = 0.325729996
Epoch: 0006 cost = 0.319383621
Epoch: 0007 cost = 0.320487916
Epoch: 0008 cost = 0.304233700
Epoch: 0009 cost = 0.304083586
Epoch: 0010 cost = 0.298640341
Epoch: 0011 cost = 0.285165370
Epoch: 0012 cost = 0.292602718
Epoch: 0013 cost = 0.280387670
Epoch: 0014 cost = 0.281007975
Epoch: 0015 cost = 0.286925703
Learning finished


In [30]:
with torch.no_grad():
    model3.eval()
    accuracy = 0
    
    for i, (X,Y) in enumerate(test_loader):
        X_test = X.view(-1, 28 * 28)
        Y_test = Y
        prediction = model3(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy = correct_prediction.float().mean()
       
    print("Accuracy: ", accuracy.item())
    
    ## Test set에서 random으로 data를 뽑아 Label과 Preditcion을 비교하는 코드
    r = random.randint(0, len(test_loader)-1)
    X_single_data = X[r:r+1].view(-1, 28*28)
    Y_single_data = Y[r:r+1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model3(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.9700000286102295
Label:  0
Prediction:  0


이것 역시 loss와 accuracy가 좋았음