In [8]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [9]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

In [30]:
BATCH_SIZE = 32
LR = 1e-3
EPOCH = 5
criterion = nn.CrossEntropyLoss()
new_model_train = True
model_type = 'MLP'
dataset = 'MNIST'
save_model_path = f'./results/{model_type}_{dataset}.pt'

In [31]:
transform = transforms.ToTensor()
train_DS = datasets.MNIST(root = './data', train=True, download=True, transform=transform)
test_DS = datasets.MNIST(root = './data', train=False, download=True, transform=transform)
train_DL = torch.utils.data.DataLoader(train_DS, batch_size=BATCH_SIZE, shuffle=True)
test_DL = torch.utils.data.DataLoader(test_DS, batch_size=BATCH_SIZE, shuffle=True)

In [32]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fcs = nn.Sequential(nn.Linear(28*28, 30),
                                 nn.BatchNorm1d(30),
                                 nn.ReLU(),
                                 nn.Linear(30, 10))
    
    def forward(self, x):
        x = torch.flatten(x, start_dim=1)
        x = self.fcs(x)
        return x

In [33]:
model = MLP().to(DEVICE)
print(model)
x_batch, _ = next(iter(train_DL))
print(x_batch.shape)
print(model(x_batch.to(DEVICE)).shape)

MLP(
  (fcs): Sequential(
    (0): Linear(in_features=784, out_features=30, bias=True)
    (1): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=30, out_features=10, bias=True)
  )
)
torch.Size([32, 1, 28, 28])
torch.Size([32, 10])


In [41]:
def Train(model, train_DL, criterion, optimizer, EPOCH):
    loss_history = []
    NoT = len(train_DL.dataset)
    for ep in range(EPOCH):
        rloss = 0
        for x_batch, y_batch in train_DL:
            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            
            # inference
            y_hat = model(x_batch)
            
            # loss
            loss = criterion(y_hat, y_batch)
            
            # update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # loss accumulation
            loss_b = loss.item() * x_batch.shape[0]
            rloss += loss_b
        
        # print loss
        loss_e = rloss/NoT # epoch loss
        loss_history += [loss_e]
        print(f'Epoch: {ep+1}, train loss: {loss_e:.3f}')
        print('-' * 20)
        
    return loss_history

In [None]:
if new_model_train:
    optimizer = optim.Adam(model.parameters(), lr=LR)
    loss_history = Train(model, train_DL, criterion, optimizer, EPOCH)
    
    torch.save(model, save_model_path)
    
    plt.plot(range(1, EPOCH+1), loss_history)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Train Loss')
    plt.grid()
    plt.show()

Epoch: 1, train loss: 0.401
--------------------
Epoch: 2, train loss: 0.206
--------------------
Epoch: 3, train loss: 0.166
--------------------
Epoch: 4, train loss: 0.144
--------------------


In [None]:
load_mode = torch.load()

In [None]:
def Test(model, test_DL):
    model.eval() # test mode로 변환
    with torch.no_grad():
        rcorrect = 0
        for x_batch, y_batch in test_DL:
            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            
            # inference
            y_hat = model(x_batch)
            
            # corrects accumulation
            pred = y_hat.argmax(dim=1)
            corrects_b = torch.sum(pred == y_batch).item()
            rcorrect += corrects_b
        accuracy_e = rcorrect/len(test_DL.dataset) * 100
    print(f'Test accuracy: {rcorrect}/{len(test_DL.dataset)} ({accuracy_e:.1f})')
    return round(accuracy_e, 1)