# Week 2 HW 오태환

# Q1

## 0) Importing Packages

In [3]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

## 1) Parameter Setting

In [4]:
learning_rate = 0.1
training_epochs = 15
batch_size = 100

## 2) Importing Dataset

In [7]:
# Train과 test set으로 나눠서 MNIST data 불러오기
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [15]:
# dataset loader에 train과 test 할당하기
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)

In [18]:
# layer 쌓기(3 layer, dropout(p=0.3), ReLU, Batchnormalization)
class CNN(torch.nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        
        # 1st layer (784, 100)
        self.layer1 = torch.nn.Sequential(
        torch.nn.Linear(784, 100, bias = True),
        torch.nn.BatchNorm1d(100),
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 2nd layer (100, 100)
        self.layer2 = torch.nn.Sequential(
        torch.nn.Linear(100, 100, bias = True),
        torch.nn.BatchNorm1d(100), 
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 3rd layer
        self.layer3 = torch.nn.Linear(100, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.layer3.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

In [19]:
model = CNN().to(device)

In [20]:
# Loss Function & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [21]:
# cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [23]:
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

print('Learning Finished!')

[Epoch:    1] cost = 0.54530555
[Epoch:    2] cost = 0.386751205
[Epoch:    3] cost = 0.343295336
[Epoch:    4] cost = 0.325687706
[Epoch:    5] cost = 0.311418861
[Epoch:    6] cost = 0.296133846
[Epoch:    7] cost = 0.286807299
[Epoch:    8] cost = 0.289699137
[Epoch:    9] cost = 0.277597904
[Epoch:   10] cost = 0.276959747
[Epoch:   11] cost = 0.263065428
[Epoch:   12] cost = 0.262284577
[Epoch:   13] cost = 0.260551333
[Epoch:   14] cost = 0.256304532
[Epoch:   15] cost = 0.248574197
Learning Finished!


In [25]:
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8847999572753906
Label:  5
Prediction:  5


# Q2

## 2-1) node 의 수를 증가시켰을 때

In [26]:
class CNN(torch.nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        
        # 1st layer (784, 200)
        self.layer1 = torch.nn.Sequential(
        torch.nn.Linear(784, 200, bias = True),
        torch.nn.BatchNorm1d(200),
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 2nd layer (200, 150)
        self.layer2 = torch.nn.Sequential(
        torch.nn.Linear(200, 150, bias = True),
        torch.nn.BatchNorm1d(150), 
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 3rd layer (150, 10)
        self.layer3 = torch.nn.Linear(150, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.layer3.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

In [27]:
model = CNN().to(device)

In [28]:
# Loss Function & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [29]:
# cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [30]:
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

print('Learning Finished!')

[Epoch:    1] cost = 0.471379757
[Epoch:    2] cost = 0.348017275
[Epoch:    3] cost = 0.30213964
[Epoch:    4] cost = 0.271071821
[Epoch:    5] cost = 0.25173378
[Epoch:    6] cost = 0.240206286
[Epoch:    7] cost = 0.234153166
[Epoch:    8] cost = 0.234570041
[Epoch:    9] cost = 0.22688444
[Epoch:   10] cost = 0.218240887
[Epoch:   11] cost = 0.216608718
[Epoch:   12] cost = 0.219060943
[Epoch:   13] cost = 0.203816757
[Epoch:   14] cost = 0.195141777
[Epoch:   15] cost = 0.196302593
Learning Finished!


In [31]:
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9218999743461609
Label:  0
Prediction:  0


cost와 Accuracy가 더 좋아졌다!

## 2-2) node의 수를 줄였을 때

In [32]:
class CNN(torch.nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        
        # 1st layer (784, 50)
        self.layer1 = torch.nn.Sequential(
        torch.nn.Linear(784, 50, bias = True),
        torch.nn.BatchNorm1d(50),
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 2nd layer (50, 50)
        self.layer2 = torch.nn.Sequential(
        torch.nn.Linear(50, 50, bias = True),
        torch.nn.BatchNorm1d(50), 
        torch.nn.ReLU(),
        torch.nn.Dropout(p = 0.3))
        
        # 3rd layer (50, 10)
        self.layer3 = torch.nn.Linear(50, 10, bias = True)
        torch.nn.init.xavier_uniform_(self.layer3.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

In [33]:
model = CNN().to(device)

In [34]:
# Loss Function & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [35]:
# cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)

In [36]:
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in train_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / train_total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

print('Learning Finished!')

[Epoch:    1] cost = 0.621596813
[Epoch:    2] cost = 0.465849847
[Epoch:    3] cost = 0.41808632
[Epoch:    4] cost = 0.38952449
[Epoch:    5] cost = 0.391602367
[Epoch:    6] cost = 0.381991744
[Epoch:    7] cost = 0.367907941
[Epoch:    8] cost = 0.347219795
[Epoch:    9] cost = 0.355542362
[Epoch:   10] cost = 0.356186479
[Epoch:   11] cost = 0.338912457
[Epoch:   12] cost = 0.345072389
[Epoch:   13] cost = 0.335639328
[Epoch:   14] cost = 0.325445533
[Epoch:   15] cost = 0.330336928
Learning Finished!


In [37]:
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)

    # Test the model using test sets
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.9273999929428101
Label:  4
Prediction:  4


cost는 안좋아졌는데 Accuracy는 더 올랐다!?!