In [1]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np
import matplotlib.pyplot as plt

In [2]:
training_data = datasets.MNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root="data", train=False, download=True, transform=ToTensor())

In [3]:
len_data = len(training_data)
train_size = int(len_data*0.8)
test_size = len_data-train_size

training_set, validation_set = random_split(training_data,[train_size,test_size ])

In [4]:
class MLP(torch.nn.Module):
    
    def __init__(self, units, activation_class = None):
        super().__init__()
        self.layers = []
        for i in range(len(units)-2):
            self.layers.append(torch.nn.Linear(units[i], units[i+1]))
            self.layers.append(torch.nn.ReLU())
        self.layers.append(torch.nn.Linear(units[len(units)-2], units[len(units)-1]))
        print([layer for layer in self.layers])
        self.layers = torch.nn.Sequential(
            torch.nn.Flatten(),
            *self.layers
        )
        
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

In [8]:
def train_eval(model, lr, nepochs, nbatch, training_set, validation_set):
    # finally return the sequence of per epoch values
    cost_hist = []
    cost_hist_valid = []
    acc_hist = []
    acc_hist_valid = []

    cost_ce = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    
    # epoch: current epoch
    # cost, cost_valid, acc, acc_valid: cost and acurracy (for training, validation set) per epoch     
    
    training_loader = DataLoader(training_set, batch_size=nbatch)
    validation_loader = DataLoader(validation_set, batch_size=nbatch)
    
    for epoch in range(nepochs):

        training_cost = 0
        correct = 0
        for inputs, targets in training_loader:
            optimizer.zero_grad()
            predictions = model(inputs)
            cost = cost_ce(predictions, targets)
            cost.backward()
            optimizer.step()
            training_cost += cost.item()
            correct += (torch.argmax(predictions, dim=1) == targets).sum()
        
        cost = training_cost / len(training_set)
        acc = correct / len(training_set)

        validation_cost = 0
        correct = 0
        for inputs, targets in validation_loader:
            predictions = model(inputs)
            cost = cost_ce(predictions, targets)
            validation_cost += cost.item()
            correct += (torch.argmax(predictions, dim=1) == targets).sum()

        cost_valid = validation_cost / len(validation_set)
        acc_valid = correct / len(validation_set)
        
        print("Epoch %i: %f, %f, %f, %f"%(epoch, cost, acc, cost_valid, acc_valid))
        
        cost_hist.append(cost.data)
        cost_hist_valid.append(cost_valid)
        acc_hist.append(acc)
        acc_hist_valid.append(acc_valid)
    return cost_hist, cost_hist_valid, acc_hist, acc_hist_valid

In [5]:
model = MLP([28*28, 100, 500, 10])
epochs = 10
lr = 0.05
batch_size = 64

_,_, train_acc, val_acc = train_eval(model, lr, epochs, batch_size, training_set, validation_set)
print(f'train_accuracy {train_acc[-1]}, val_accuracy {val_acc[-1]}')


plt.plot(train_acc, 'r', label='Training loss')
plt.plot(val_acc, 'b', label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

[Linear(in_features=784, out_features=100, bias=True), ReLU(), Linear(in_features=100, out_features=500, bias=True), ReLU(), Linear(in_features=500, out_features=10, bias=True)]


NameError: name 'train_acc' is not defined

In [11]:
torch.save(model.state_dict(), "mnist-classifier.pt")