## Q1

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
# 파라미터 설정 
learning_rate=0.1
training_epochs=15
batch_size=100

In [3]:
#MNIST data 불러오기 & train-test split
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST\raw\train-images-idx3-ubyte.gz to MNIST_data/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST\raw\train-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw
Processing...




Done!


In [4]:
#dataset loader에 train_test assign (*batch size, shuffle, drop_last*)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [5]:
#Layering(조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), relu 사용, batch normalization)
#각 layer의 hidden node 수: (784,100), (100,100),(100,10)
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)
relu=torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)
dropout = torch.nn.Dropout(p=0.3)

In [6]:
#xavier initialization 
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[ 1.4783e-01, -1.3753e-01,  1.7869e-01, -1.6787e-01, -7.0605e-02,
          2.3122e-01,  6.1109e-02, -1.6011e-01, -1.3962e-01,  8.5352e-02,
          5.1042e-02, -1.6607e-01,  1.8580e-01,  5.3116e-02,  1.9414e-01,
         -1.7716e-01,  2.3164e-01,  2.6270e-02, -1.4422e-01, -3.4854e-02,
         -7.6886e-02,  1.8597e-01,  9.2216e-03, -1.8491e-01, -1.9959e-01,
          2.5342e-03,  3.1959e-02, -1.9302e-01, -1.0948e-01, -1.8535e-01,
          1.2680e-01, -1.3685e-02, -1.3191e-04, -1.6625e-01,  1.1506e-01,
         -1.9070e-01,  7.0061e-02, -1.0974e-01, -1.9623e-01, -1.8040e-01,
         -1.2158e-01, -2.2055e-01, -7.3799e-02, -2.1069e-01,  3.0007e-02,
         -2.1656e-01,  1.5055e-01, -1.1717e-01,  1.9116e-01,  8.1463e-02,
          9.0330e-02,  1.1348e-01,  4.2693e-02,  1.2470e-01,  3.2065e-02,
         -1.0036e-01,  1.7607e-01,  6.9370e-02,  1.1235e-01, -7.7071e-02,
          1.7992e-01, -6.2141e-02, -2.2293e-01,  7.0403e-02,  2.0749e-01,
          1.5280

In [9]:
#torch.nn.Sequential을 이용하여 model 정의하기(linear->batch norm layer -> relu -> dropout)
bn_model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3).to(device)

In [11]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(bn_model.parameters(), lr=learning_rate)

In [13]:
#cost 계산을 위한 변수설정
train_total_batch=len(data_loader)

In [17]:
#Training epoch (cost 값 초기설정(0으로)과 model의 train 설정 꼭 할 것)
bn_model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
    #train dataset을 불러오고 (X, Y 불러오기), back propagation 과 optimizer를 사용하여 loss 최적화
    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = bn_model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.501851320
Epoch: 0002 cost = 0.371026605
Epoch: 0003 cost = 0.332277954
Epoch: 0004 cost = 0.311494023
Epoch: 0005 cost = 0.292015493
Epoch: 0006 cost = 0.284265935
Epoch: 0007 cost = 0.281747252
Epoch: 0008 cost = 0.273527175
Epoch: 0009 cost = 0.269279897
Epoch: 0010 cost = 0.260911793
Epoch: 0011 cost = 0.257887065
Epoch: 0012 cost = 0.260118753
Epoch: 0013 cost = 0.245293409
Epoch: 0014 cost = 0.242846414
Epoch: 0015 cost = 0.226370394
Learning finished


In [18]:
#test set으로 모델의 정확도를 검증하는 코드(model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view 를 사용하여 차원 변환할 것/ Y_test를 불러올 때 labels 사용
#accuracy의 초기값 설정(0으로) 꼭 할 것
# Test model and check accuracy
with torch.no_grad():
    bn_model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = bn_model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    #test set에서 random으로 data를 뽑아, label과 prediction을 비교하는 코드
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = bn_model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8831999897956848
Label:  8
Prediction:  3




## Q2

In [25]:
#Layering(조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), relu 사용, batch normalization)
#각 layer의 hidden node 수: (784,200), (200,150),(150,10)
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)
relu=torch.nn.ReLU()

bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)
dropout = torch.nn.Dropout(p=0.3)

In [26]:
#xavier initialization 
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.0117, -0.0844, -0.1313,  ..., -0.0317, -0.1657, -0.1573],
        [ 0.0203,  0.1654,  0.1569,  ...,  0.0952,  0.0191, -0.0985],
        [-0.1679, -0.1461, -0.0127,  ...,  0.0205,  0.1855, -0.0867],
        ...,
        [-0.1440, -0.1677,  0.1842,  ..., -0.1669,  0.1412, -0.1020],
        [-0.0626,  0.1092, -0.1658,  ..., -0.0144,  0.1504, -0.0100],
        [ 0.0201, -0.0080, -0.0462,  ..., -0.1851,  0.1637, -0.0795]],
       requires_grad=True)

In [27]:
#torch.nn.Sequential을 이용하여 model 정의하기(linear->batch norm layer -> relu -> dropout)
bn_model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3).to(device)

In [28]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(bn_model.parameters(), lr=learning_rate)

In [29]:
#cost 계산을 위한 변수설정
train_total_batch=len(data_loader)

In [None]:
#Training epoch (cost 값 초기설정(0으로)과 model의 train 설정 꼭 할 것)
bn_model.train()
for epoch in range(training_epochs):
    avg_cost = 0
    
    #train dataset을 불러오고 (X, Y 불러오기), back propagation 과 optimizer를 사용하여 loss 최적화
    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = bn_model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.463630527
Epoch: 0002 cost = 0.326863289
Epoch: 0003 cost = 0.290368140
Epoch: 0004 cost = 0.275735795
Epoch: 0005 cost = 0.268540889
Epoch: 0006 cost = 0.250680059
Epoch: 0007 cost = 0.234645382
Epoch: 0008 cost = 0.232912704
Epoch: 0009 cost = 0.224910527
Epoch: 0010 cost = 0.220604822
Epoch: 0011 cost = 0.216830239


In [None]:
#test set으로 모델의 정확도를 검증하는 코드(model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view 를 사용하여 차원 변환할 것/ Y_test를 불러올 때 labels 사용
#accuracy의 초기값 설정(0으로) 꼭 할 것
# Test model and check accuracy
with torch.no_grad():
    bn_model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = bn_model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    #test set에서 random으로 data를 뽑아, label과 prediction을 비교하는 코드
    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = bn_model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())