Q1-1.

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [2]:
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [3]:
# Train과 test set으로 나누어 MNIST data 불러오기

train_dataset = dsets.MNIST(root='MNIST_data', train=True, download = True,
                      transform=transforms.ToTensor())
test_dataset = dsets.MNIST(root='MNIST_data', train=False, download = True,
                      transform=transforms.ToTensor())

In [4]:
# Dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

from torch.utils.data.dataloader import DataLoader

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size,
                          shuffle=True, drop_last=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size,
                         shuffle=False, drop_last=True)

In [5]:
# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)

dropout = torch.nn.Dropout(p=0.3)
relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [6]:
# Xavier initialization을 이용하여 각 layer의 weight 초기화

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 0.0126, -0.1293, -0.0315, -0.0310, -0.0951,  0.2176,  0.1688, -0.2306,
          0.0361, -0.2157, -0.2206, -0.2074, -0.0371, -0.0545,  0.0856, -0.1273,
          0.0408, -0.0457,  0.0361, -0.2145,  0.0731,  0.0580, -0.1159, -0.2087,
         -0.0515,  0.1005,  0.0267, -0.0963,  0.2126, -0.0618,  0.0241,  0.1729,
          0.1903,  0.1138,  0.1376, -0.0008, -0.1123, -0.1937,  0.1390,  0.0327,
          0.1554,  0.1404, -0.0818, -0.2174,  0.2211,  0.0924, -0.2182,  0.1230,
          0.2216,  0.1602,  0.1113,  0.1103,  0.0634, -0.1021, -0.1447, -0.1013,
          0.0942,  0.0921,  0.0309, -0.1407,  0.1626, -0.2149,  0.0861,  0.1962,
          0.0577, -0.1054, -0.1821,  0.1212,  0.1301,  0.1409,  0.2263, -0.1725,
         -0.1907,  0.1189,  0.1414, -0.1760, -0.2238,  0.1133, -0.1035,  0.0544,
          0.0689, -0.1950, -0.1508, -0.1563, -0.0184,  0.1570,  0.0518, -0.1999,
          0.2039, -0.1264,  0.0629, -0.0543, -0.0214, -0.0658, -0.1383,  0.0314,
      

In [7]:
# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

In [8]:
# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)

In [9]:
# optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# Cost 계산을 위한 변수 설정

train_total_batch = len(train_loader)

In [11]:
# Training epoch (cost 값 초기설정(0으로)과 model의 train 설정 꼭 할 것)
model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
    # Train dataset을 불러오고 (X, Y 불러오기), back propagation 과 optimizer를 사용하여 loss 최적화
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.508053780
Epoch: 0002 cost = 0.375132650
Epoch: 0003 cost = 0.332811415
Epoch: 0004 cost = 0.313179821
Epoch: 0005 cost = 0.298753560
Epoch: 0006 cost = 0.290933579
Epoch: 0007 cost = 0.280284733
Epoch: 0008 cost = 0.274961054
Epoch: 0009 cost = 0.270615608
Epoch: 0010 cost = 0.259013474
Epoch: 0011 cost = 0.260724068
Epoch: 0012 cost = 0.249167472
Epoch: 0013 cost = 0.247019738
Epoch: 0014 cost = 0.247677296
Epoch: 0015 cost = 0.240713581
Learning finished


In [12]:
# Test set으로 모델의 정확도를 검증하는 코드(model의 evaluation mode 설정 꼭 할 것)
# X_test 불러올 때 view 를 사용하여 차원 변환할 것/ Y_test를 불러올 때 labels 사용
# Accuracy의 초기값 설정(0으로) 꼭 할 것
with torch.no_grad():
    model.eval()
    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8834999799728394
Label:  2
Prediction:  2




Q1-2.

In [13]:
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)

dropout = torch.nn.Dropout(p=0.3)
relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_total_batch = len(train_loader)

model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

with torch.no_grad():
    model.eval()
    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Epoch: 0001 cost = 0.468723029
Epoch: 0002 cost = 0.326041341
Epoch: 0003 cost = 0.291418254
Epoch: 0004 cost = 0.269737363
Epoch: 0005 cost = 0.264427006
Epoch: 0006 cost = 0.234071359
Epoch: 0007 cost = 0.233897805
Epoch: 0008 cost = 0.234648705
Epoch: 0009 cost = 0.225692794
Epoch: 0010 cost = 0.220184028
Epoch: 0011 cost = 0.218313485
Epoch: 0012 cost = 0.207565337
Epoch: 0013 cost = 0.207952231
Epoch: 0014 cost = 0.210073262
Epoch: 0015 cost = 0.191415802
Learning finished




Accuracy: 0.8849999904632568
Label:  6
Prediction:  6


In [14]:
linear1 = torch.nn.Linear(784, 75, bias=True)
linear2 = torch.nn.Linear(75, 50, bias=True)
linear3 = torch.nn.Linear(50, 10, bias=True)

dropout = torch.nn.Dropout(p=0.3)
relu = torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(75)
bn2 = torch.nn.BatchNorm1d(50)

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_total_batch = len(train_loader)

model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

with torch.no_grad():
    model.eval()
    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Epoch: 0001 cost = 0.540303290
Epoch: 0002 cost = 0.404820502
Epoch: 0003 cost = 0.365351439
Epoch: 0004 cost = 0.338542670
Epoch: 0005 cost = 0.327690154
Epoch: 0006 cost = 0.325120628
Epoch: 0007 cost = 0.309963375
Epoch: 0008 cost = 0.302655667
Epoch: 0009 cost = 0.290860742
Epoch: 0010 cost = 0.298382550
Epoch: 0011 cost = 0.282337159
Epoch: 0012 cost = 0.290740967
Epoch: 0013 cost = 0.282151937
Epoch: 0014 cost = 0.274823010
Epoch: 0015 cost = 0.271114439
Learning finished
Accuracy: 0.913100004196167
Label:  9
Prediction:  9


