### Q1

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)

In [3]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100
drop_prob=0.3

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [5]:
# dataset loader
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# nn layers
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)
relu = torch.nn.ReLU()
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=drop_prob)

In [7]:
# xavier initialization
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-1.5363e-01,  3.7224e-02, -1.5443e-01, -2.1791e-01,  1.7185e-01,
         -2.0676e-01, -1.1819e-01, -1.6484e-01,  1.3565e-01,  2.4250e-02,
          1.6574e-01,  1.4360e-01,  1.2099e-01,  1.0337e-01,  1.7177e-01,
         -1.9295e-01, -1.3142e-01, -1.0130e-01, -1.7494e-01, -1.6558e-01,
         -2.0379e-01, -7.8588e-02,  9.9025e-02, -8.9940e-02,  1.9087e-01,
         -1.1381e-01, -2.3247e-01,  2.5474e-02, -5.6100e-03, -1.9829e-01,
          1.4070e-01, -1.3431e-01, -2.0873e-01, -5.9762e-02,  5.9842e-02,
         -1.5402e-01, -6.3408e-02, -1.8561e-01, -6.3792e-02, -9.9554e-02,
         -4.6280e-02, -2.2085e-02, -6.4181e-02, -8.8129e-02, -1.1779e-01,
         -9.2208e-02,  1.9029e-01, -5.3670e-02,  6.7792e-02,  1.1459e-01,
         -2.1908e-01, -1.7011e-01,  1.5883e-01, -7.4235e-02,  5.7834e-02,
          1.5231e-01,  7.3779e-03,  1.6984e-01,  1.6407e-01, -2.1601e-01,
          7.8833e-02, -7.1080e-02,  1.5240e-01, -5.5697e-02, -1.5577e-01,
         -1.1549

In [8]:
# model
model = torch.nn.Sequential(linear1,bn1, relu, dropout,
                            linear2,bn2, relu, dropout,
                            linear3).to(device)

In [9]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
total_batch = len(train_loader)
model.train()    # set the model to train mode (dropout=True)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 784).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.505852759
Epoch: 0002 cost = 0.355073422
Epoch: 0003 cost = 0.321320951
Epoch: 0004 cost = 0.303041995
Epoch: 0005 cost = 0.302864492
Epoch: 0006 cost = 0.282366365
Epoch: 0007 cost = 0.265034020
Epoch: 0008 cost = 0.269699484
Epoch: 0009 cost = 0.260018706
Epoch: 0010 cost = 0.261773169
Epoch: 0011 cost = 0.257436723
Epoch: 0012 cost = 0.252168328
Epoch: 0013 cost = 0.229561925
Epoch: 0014 cost = 0.242550507
Epoch: 0015 cost = 0.250869453
Learning finished


In [11]:
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = 0
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())


Accuracy: 0.9157999753952026




In [12]:

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Label:  6
Prediction:  6


### Q2-1 Hidden node 수를 증가

In [13]:
# nn layers
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)
relu = torch.nn.ReLU()
bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=drop_prob)

In [14]:
total_batch = len(train_loader)
model.train()    # set the model to train mode (dropout=True)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 784).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')
        

In [16]:
# Calculate accuracy for train sets
X_test = mnist_train.train_data.view(-1, 28*28).float().to(device)
Y_test = mnist_train.train_labels.to(device)

prediction = model(X_test)
correct_prediction = torch.argmax(prediction, 1) == Y_test
accuracy = 0
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())




Accuracy: 0.9484500288963318


In [17]:
# Calculate accuracy for test sets
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = 0
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.9688000082969666


### Q2-2 Hidden node 수를 감소

In [18]:
# nn layers
linear1 = torch.nn.Linear(784, 50, bias=True)
linear2 = torch.nn.Linear(50, 50, bias=True)
linear3 = torch.nn.Linear(50, 10, bias=True)
relu = torch.nn.ReLU()
bn1 = torch.nn.BatchNorm1d(50)
bn2 = torch.nn.BatchNorm1d(50)

relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=drop_prob)

In [20]:
total_batch = len(train_loader)
model.train()    # set the model to train mode (dropout=True)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 784).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.213821724
Epoch: 0002 cost = 0.203128070
Epoch: 0003 cost = 0.201647580
Epoch: 0004 cost = 0.201174617
Epoch: 0005 cost = 0.193975255
Epoch: 0006 cost = 0.202385426
Epoch: 0007 cost = 0.193503931
Epoch: 0008 cost = 0.188855439
Epoch: 0009 cost = 0.196626797
Epoch: 0010 cost = 0.196404412
Epoch: 0011 cost = 0.188807771
Epoch: 0012 cost = 0.193488047
Epoch: 0013 cost = 0.183583781
Epoch: 0014 cost = 0.185089126
Epoch: 0015 cost = 0.188467070
Learning finished


In [22]:
# Calculate accuracy for train sets
X_test = mnist_train.train_data.view(-1, 28*28).float().to(device)
Y_test = mnist_train.train_labels.to(device)

prediction = model(X_test)
correct_prediction = torch.argmax(prediction, 1) == Y_test
accuracy = 0
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())


Accuracy: 0.958899974822998


In [23]:
# Calculate accuracy for test sets
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = 0
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

Accuracy: 0.9650999903678894
