In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torchvision
import sys

print('Pytorch version: ', torch.__version__)
print('GPU availability: ', torch.cuda.is_available())


Pytorch version:  2.0.1+cu118
GPU availability:  True


In [8]:
data_dir='./data'
BATCH_SIZE = 64

train_dataset = torchvision.datasets.MNIST(
    root=data_dir,
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True
)
test_dataset = torchvision.datasets.MNIST(
    root=data_dir,
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False)

In [9]:
class Single_ANN(nn.Module):
    def __init__(self):
        super(Single_ANN, self).__init__()
        self.fc1 = nn.Linear(28*28,10)
    def forward(self, x):
        x = x.view(-1, 28*28) # Flatten every image into a single vector
        x = self.fc1(x) #do not use activation function on last layer
        return x

    def name(self):
        return "MLP"

In [15]:
LR = 0.001
EPOCHS = 50
model = Single_ANN().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.MSELoss()

In [16]:
best_loss = 1000000
best_path = './Models/Single_MLP.pt'

for epoch in range(EPOCHS):
    # trainning
    total_loss = 0
    for batch_idx, (x, target) in enumerate(train_loader):
        optimizer.zero_grad()
        x, target = x.cuda(), target.cuda()
        target_onehot = F.one_hot(target, 10).float()
        out = model(x)
        loss = criterion(out, target_onehot)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    avg_loss = total_loss / len(train_dataset)
    print(f'==>>> epoch: {epoch}, train loss: {avg_loss:.6f}')
    # TODO3: Based on average accuracy on validation set, save the model weights into a file
    if best_loss > avg_loss:
      best_loss = avg_loss
      torch.save(model.state_dict(), best_path)

==>>> epoch: 0, train loss: 0.000348
==>>> epoch: 1, train loss: 0.000314
==>>> epoch: 2, train loss: 0.000311
==>>> epoch: 3, train loss: 0.000310
==>>> epoch: 4, train loss: 0.000309
==>>> epoch: 5, train loss: 0.000309
==>>> epoch: 6, train loss: 0.000310
==>>> epoch: 7, train loss: 0.000309
==>>> epoch: 8, train loss: 0.000309
==>>> epoch: 9, train loss: 0.000309
==>>> epoch: 10, train loss: 0.000309
==>>> epoch: 11, train loss: 0.000309
==>>> epoch: 12, train loss: 0.000309
==>>> epoch: 13, train loss: 0.000309
==>>> epoch: 14, train loss: 0.000309
==>>> epoch: 15, train loss: 0.000309
==>>> epoch: 16, train loss: 0.000308
==>>> epoch: 17, train loss: 0.000309
==>>> epoch: 18, train loss: 0.000309
==>>> epoch: 19, train loss: 0.000309
==>>> epoch: 20, train loss: 0.000309
==>>> epoch: 21, train loss: 0.000309
==>>> epoch: 22, train loss: 0.000309
==>>> epoch: 23, train loss: 0.000309
==>>> epoch: 24, train loss: 0.000309
==>>> epoch: 25, train loss: 0.000309
==>>> epoch: 26, train

In [19]:
best_path = './Models/Single_MLP.pt'
checkpoint = torch.load(f=best_path)
model.load_state_dict(checkpoint)
total_loss = 0
correct_cnt = 0
model.eval()
for batch_idx, (x, target) in enumerate(test_loader):
    x, target = x.cuda(), target.cuda()
    out = model(x)
    target_onehot = F.one_hot(target, 10).float()
    loss = criterion(out, target_onehot)

    _, pred_label = torch.max(out, 1)
    correct_cnt += (pred_label == target).sum()
    # smooth average
    total_loss += loss.item()
avg_loss = total_loss / len(test_dataset)
avg_acc = correct_cnt / len(test_dataset)
print(f'test loss: {avg_loss:.6f}, test accuracy: {avg_acc:.6f}')

test loss: 0.000312, test accuracy: 0.852800
