# Lecture 18. PyTorch Classification

> Eunmi Kim    
 계산과학 프로그래밍 및 실습


---


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision import datasets, transforms

In [None]:
# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print('Current device is %s.'% device)

## 1. Data

MNIST

In [None]:
# MNIST dataset
mnist_train = datasets.MNIST(root='./data', train=True, download=True,
                             transform=transforms.ToTensor())

mnist_test = datasets.MNIST(root='./data', train=False, download=True,
                            transform=transforms.ToTensor())

In [None]:
print('number of training data: ', len(mnist_train))
print('number of test data: ', len(mnist_test))

In [None]:
mnist_test

In [None]:
# 데이터 확인
image, label = mnist_train[0]

print(image.shape)

In [None]:
print(image)

In [None]:
plt.imshow(image.view(28, 28), cmap='gray')
plt.show()

In [None]:
print(label)

In [None]:
# hyperparameters
learning_rate = 0.001
epochs = 10
batch_size = 100

In [None]:
# dataset loader (for mini-batch training)
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size)

## 2. Model

In [None]:
# nn layers
linear1 = nn.Linear(784, 512, bias=True)
linear2 = nn.Linear(512, 256)
linear3 = nn.Linear(256, 10)
relu = nn.ReLU()

# model
model = nn.Sequential(linear1, relu,
                      linear2, relu,
                      linear3).to(device)

# define cost/loss & optimizer
criterion = nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
print(model)

## 3. Training

In [None]:
total_batch = len(train_loader)
print(total_batch)

In [None]:
for X, Y in test_loader:
    print(X.shape, Y.shape, sep='\n')
    break

In [None]:
train_loss_list = []
train_acc_list = []

for epoch in range(epochs):
    avg_loss = correct = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        Y_pred = model.forward(X)
        loss = criterion(Y_pred, Y)
        loss.backward()
        optimizer.step()

        avg_loss += loss / total_batch

        correct_prediction = torch.argmax(Y_pred, 1) == Y
        correct += correct_prediction.sum()

    acc = (100*correct/len(train_loader.dataset))
    train_acc_list.append(acc.item())
    train_loss_list.append(avg_loss.item())
    print("Epoch %2d: loss %.4f   accuracy %.2f" % (epoch+1, avg_loss, acc))

print('Learning finished')

In [None]:
# plot loss
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(range(1, epochs+1), train_loss_list)
plt.title('Training Loss')
plt.subplot(1,2,2)
plt.plot(range(1, epochs+1), train_acc_list)
plt.title('Training Accuracy')
plt.show()

## Model 평가

In [None]:
# Test model and check accuracy
with torch.no_grad():
    correct = 0
    # Test the model using test sets
    for X, Y in test_loader:
        X_test = X.view(-1, 28 * 28).to(device)
        Y_test = Y.to(device)

        prediction = model.forward(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        correct += correct_prediction.sum()

    accuracy = 100*correct/len(test_loader.dataset)
    print('Test set Accuracy: %.4f' % (accuracy))

In [None]:
# Get ten and predict
with torch.no_grad():
    r = np.random.randint(0, len(mnist_test) - 1)
    X_10_data = mnist_test.data[r:r + 10].view(-1, 28 * 28).float().to(device)
    Y_10_data = mnist_test.targets[r:r + 10].to(device)

    print('Label:      ', Y_10_data)
    prediction = model(X_10_data)
    print('Prediction: ', torch.argmax(prediction, 1))

In [None]:
plt.imshow(mnist_test.data[r + 2].view(28, 28), cmap='gray')
plt.show()

## With Batch Normailization

In [None]:
# hyperparameters
learning_rate = 0.001
training_epochs = 10
batch_size = 100

In [None]:
# nn layers
linear1 = nn.Linear(784, 512, bias=True)
linear2 = nn.Linear(512, 256, bias=True)
linear3 = nn.Linear(256, 10, bias=True)
bn1 = nn.BatchNorm1d(512)
bn2 = nn.BatchNorm1d(256)
relu = nn.ReLU()

# model
bn_model = nn.Sequential(linear1, bn1, relu,
                         linear2, bn2, relu,
                         linear3).to(device)

# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
bn_optimizer = torch.optim.Adam(bn_model.parameters(), lr=learning_rate)

In [None]:
bn_train_loss_list = []
bn_train_acc_list = []

for epoch in range(epochs):
    bn_model.train() # set the model to train mode (BatchNorm)
    avg_loss = correct = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        bn_optimizer.zero_grad()
        Y_pred = bn_model.forward(X)
        loss = criterion(Y_pred, Y)
        loss.backward()
        bn_optimizer.step()

        avg_loss += loss / total_batch

        correct_prediction = torch.argmax(Y_pred, 1) == Y
        correct += correct_prediction.sum()

    acc = (100*correct/len(train_loader.dataset))
    bn_train_acc_list.append(acc.item())
    bn_train_loss_list.append(avg_loss.item())
    print("Epoch %2d: loss %.4f   accuracy %.2f" % (epoch+1, avg_loss, acc))

print('Learning finished')

In [None]:
# plot loss
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(range(1, epochs+1), train_loss_list, range(1, epochs+1), bn_train_loss_list)
plt.legend(['Without BN', 'With BN'])
plt.title('Training Loss')
plt.subplot(1,2,2)
plt.plot(range(1, epochs+1),train_acc_list, range(1, epochs+1), bn_train_acc_list)
plt.legend(['Without BN', 'With BN'])
plt.title('Training Accuracy')
plt.show()

In [None]:
# Test model and check accuracy
with torch.no_grad():
    bn_model.eval()   # set the model to evaluation mode (BatchNorm)
    correct = 0
    # Test the model using test sets
    for X, Y in test_loader:
        X_test = X.view(-1, 28 * 28).to(device)
        Y_test = Y.to(device)

        prediction = bn_model.forward(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        correct += correct_prediction.sum()

    accuracy = 100*correct/len(test_loader.dataset)
    print('Test set Accuracy: %.4f' % (accuracy))

## With Dropout

In [None]:
# hyperparameters
learning_rate = 0.001
training_epochs = 10
batch_size = 100
drop_prob = 0.15

In [None]:
# nn layers
linear1 = nn.Linear(784, 512, bias=True)
linear2 = nn.Linear(512, 256, bias=True)
linear3 = nn.Linear(256, 10, bias=True)

relu = nn.ReLU()
dropout = nn.Dropout(p=drop_prob)

# He initialization
#nn.init.kaiming_normal_(linear1.weight, nonlinearity='relu')
#nn.init.kaiming_normal_(linear2.weight, nonlinearity='relu')
#nn.init.kaiming_normal_(linear3.weight, nonlinearity='relu')

# model
do_model = nn.Sequential(linear1, relu, dropout,
                         linear2, relu, dropout,
                         linear3).to(device)

# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
do_optimizer = torch.optim.Adam(do_model.parameters(), lr=learning_rate)

In [None]:
do_train_loss_list = []
do_train_acc_list = []

for epoch in range(epochs):
    do_model.train() # set the model to train mode (dropout=True)
    avg_loss = correct = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        do_optimizer.zero_grad()
        Y_pred = do_model.forward(X)
        loss = criterion(Y_pred, Y)
        loss.backward()
        do_optimizer.step()

        avg_loss += loss / total_batch

        correct_prediction = torch.argmax(Y_pred, 1) == Y
        correct += correct_prediction.sum()

    acc = (100*correct/len(train_loader.dataset))
    do_train_acc_list.append(acc.item())
    do_train_loss_list.append(avg_loss.item())
    print("Epoch %2d: loss %.4f   accuracy %.2f" % (epoch+1, avg_loss, acc))

print('Learning finished')

In [None]:
# plot loss
plt.figure(figsize=(14,5))
plt.subplot(1,2,1)
plt.plot(range(1, epochs+1), train_loss_list, range(1, epochs+1), do_train_loss_list)
plt.legend(['Without Dropout', 'With Dropout'])
plt.title('Training Loss')
plt.subplot(1,2,2)
plt.plot(range(1, epochs+1),train_acc_list, range(1, epochs+1), do_train_acc_list)
plt.legend(['Without Dropout', 'With Dropout'])
plt.title('Training Accuracy')
plt.show()

In [None]:
# Test model and check accuracy
with torch.no_grad():
    do_model.eval()   # set the model to evaluation mode (dropout=False)
    correct = 0
    # Test the model using test sets
    for X, Y in test_loader:
        X_test = X.view(-1, 28 * 28).to(device)
        Y_test = Y.to(device)

        prediction = do_model.forward(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        correct += correct_prediction.sum()

    accuracy = 100*correct/len(test_loader.dataset)
    print('Test set Accuracy: %.4f' % (accuracy))