In [None]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import datasets, transforms

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
set_seed(1234)

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

In [None]:
# MNIST Dataset
root = './mnist_data'
train_dataset = datasets.MNIST(root=root, train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root=root, train=False, transform=transforms.ToTensor(), download=False)

# Data Loader
bs = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False)

In [None]:
def LinearModel(num_in, num_out):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(num_in, num_out),
    )

def SingleLayer(num_in, num_features, num_out):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(num_in, num_features),
        nn.GELU(),
        nn.Linear(num_features, num_out),
    )

In [None]:
do_train = True
name = 'SingleLayer_F128'
net = SingleLayer(784, 128, 10)
print(net)

net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.0003)

model_root = './model/{}'.format(name)
print('model_root:', model_root)
os.makedirs(model_root, exist_ok=True)

In [None]:
if do_train:
    num_epochs = 100
    for epoch in range(1, num_epochs+1):
        net = net.to(device)
        net = net.train()
        losses = []
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            net.zero_grad()
            out = net(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        loss_mean = np.mean(losses)
        model_path = os.path.join(model_root, '%s_epoch%03d.pth'%(name, epoch))
        torch.save(net.cpu().state_dict(), model_path)
        print('epoch %4d  |  loss %6.3f  |   model_path -> %s'%(epoch, loss_mean, model_path))

In [None]:
net = net.to(device)
net = net.eval()
models = sorted(os.listdir(model_root))

losses_train = []
losses_test = []
accuracy_train = []
accuracy_test = []
with torch.no_grad():
    for model in models:
        model_path = os.path.join(model_root, model)
        net.load_state_dict(torch.load(model_path))
        print('model_path: ', model_path)
        
        loss_train = []
        corr_train = []
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            out = net(x)
            loss = criterion(out, y)
            pred = torch.argmax(out, axis=1)
            loss_train.append(loss.item())
            corr_train.append(torch.sum(pred == y).item())
        l_train = np.mean(loss_train)
        a_train = np.sum(corr_train) / len(train_dataset)

        loss_test = []
        corr_test = []
        for i, (x, y) in enumerate(test_loader):
            x = x.to(device)
            y = y.to(device)
            out = net(x)
            loss = criterion(out, y)
            pred = torch.argmax(out, axis=1)
            loss_test.append(loss.item())
            corr_test.append(torch.sum(pred == y).item())
        l_test = np.mean(loss_test)
        a_test = np.sum(corr_test) / len(test_dataset)

        print('Loss = [%f | %f] Accuracy = [%f | %f]'%(l_train, l_test, a_train, a_test))

        losses_train.append(l_train)
        losses_test.append(l_test)
        accuracy_train.append(a_train)
        accuracy_test.append(a_test)


In [None]:
result_root = './result'
os.makedirs(result_root, exist_ok=True)
result = np.array([losses_train, losses_test, accuracy_train, accuracy_test])
result_path = os.path.join(result_root, '%s.npy'%name)
np.save(result_path, result)
print('result_path:', result_path)

In [None]:
epochs = np.arange(1, 101)
plt.figure(figsize=(16, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs, result[0], label='train')
plt.plot(epochs, result[1], label='test')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title(name)
plt.legend()
plt.grid()
plt.subplot(1, 2, 2)
plt.plot(epochs, result[2], label='train')
plt.plot(epochs, result[3], label='test')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title(name)
plt.legend()
plt.grid()

image_root = './image'
os.makedirs(image_root, exist_ok=True)
image_path = os.path.join(image_root, '%s.png'%name)
plt.savefig(image_path)
print('image_path:', image_path)