In [1]:
# Library
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dataset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from matplotlib import pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device: gpu") if torch.cuda.is_available() else print("device: cpu")

device: gpu


In [2]:
# hypter parameter 설정
learning_rate = 1e-3
epochs = 30
display_step = 10
batch_size = 32
dropout_rate = .2

activation = nn.ReLU()
max_pool = nn.MaxPool2d(2,2) # kerel size, stride size, padding size 
drop_out = nn.Dropout2d(dropout_rate)

In [3]:
# load data
train_data = dataset.MNIST("./", train = True, transform = transforms.ToTensor(), target_transform = None, download = True)
test_data = dataset.MNIST("./", train = False, transform = transforms.ToTensor(), target_transform = None, download = True)

# pre-process (batch, shuffle)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True, num_workers = 1, drop_last = True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle = True, num_workers = 1, drop_last = True)

In [4]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)

In [5]:
# model 작성
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__() 
        self.feature_extraction = nn.Sequential(
            nn.Conv2d(1,32,3,padding=1), # 32 28 28
            activation,
            max_pool, # 32 14 14
            nn.Conv2d(32,64,3,padding=1), # 64 14 14
            drop_out,
            nn.Conv2d(64,64,3,padding=1), # 64 14 14
            nn.BatchNorm2d(64),
            activation, 
            max_pool # 64 7 7
        )
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7 * 7, 50),
            nn.Linear(50, 10)
        )
        
    def forward(self, x):
        extracted_feature = self.feature_extraction(x) # [32, 64, 3, 3]
        flatten = extracted_feature.view(batch_size, -1) # [32, 576 (64 * 3 * 3)]
        result = self.classifier(flatten)
        return result

model = CNN()
model.apply(init_weights)
model=model.to(device)
model.train()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = 0.99) ###  

  torch.nn.init.xavier_uniform(m.weight)


In [6]:
loss_array = []

# train the model
for i in range(epochs):
    scheduler.step() ###
    for index, [data, label] in enumerate(train_loader):
        data = data.to(device)
        label = label.to(device)
                
        optimizer.zero_grad()
        output = model.forward(data)
        loss = loss_function(output, label)
        loss.backward()
        optimizer.step()
        
    if i % display_step == 0:
        print('{} epoch lr: {}'.format(i,scheduler.get_lr())) ###
        print('{} epoch loss: {}'.format(i,loss))
        loss_array.append(loss.cpu().detach().numpy())



0 epoch lr: [0.0009801]
0 epoch loss: 0.0037397986743599176




10 epoch lr: [0.0008863848717161291]
10 epoch loss: 0.0012666613329201937
20 epoch lr: [0.0008016305895390457]
20 epoch loss: 4.35749716416467e-05


In [7]:
#test the model
model.eval()
correct = 0
total = 0

prediction_list = []
label_list = []

with torch.no_grad():
    for index, [data, label] in enumerate(test_loader):
        data = data.to(device)
        label = label.to(device)
        
        output = model.forward(data)
        _, prediction_index = torch.max(output, 1)
        
        prediction_list.append(prediction_index)
        label_list.append(label)
        
        total += label.size(0)
        correct += (prediction_index == label).sum().float()

    print("Accuracy of the model: {}".format(correct/total))

Accuracy of the model: 0.9926882982254028


In [8]:
# weight initialization
# batch size 32 -> 0.97996...
# batch size 64 -> 0.97015... -> 32로 설정
# nn.BatchNorm2d(64) -> 0.98717....
# nn.BatchNorm2d(32) -> 0.98637... -> delete
# optim.SGD -> optim.Adam -> 0.989...
# linear fuction 추가 -> 0.9905.. -> delete
# lr scheduler 추가 -> 0.9926...