In [1]:
# imports for data handling, network definition, and training
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# imports for illustrations
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# read MNIST training data
train_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# read MNIST test data
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 42343440.16it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 70264323.56it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 13893023.24it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6403539.08it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [3]:
# define network model class
class MLP(nn.Module):
    def __init__(self, n_input, n_output, hidden_layers):
        super(MLP, self).__init__()
        self.n_input = n_input
        self.n_output = n_output
        self.hidden_layers = hidden_layers
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(self.n_input, self.hidden_layers[0]))
        for i in range(len(self.hidden_layers)-1):
            self.layers.append(nn.Linear(self.hidden_layers[i], self.hidden_layers[i+1]))
        self.layers.append(nn.Linear(self.hidden_layers[-1], self.n_output))
        
        
    def forward(self, x):
        x = x.view(-1, self.n_input)
        for layer in self.layers[:-1]:
            x = torch.relu(layer(x))
        x = torch.softmax(self.layers[-1](x))
        return x


In [4]:
# define training loop
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    train_loss, correct = 0, 0
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    av_train_loss = train_loss / size
    av_accuracy = correct / size
    return av_train_loss, av_accuracy

In [5]:
# define testing loop
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    av_test_loss = test_loss / size
    av_accuracy = correct / size


    return av_test_loss, av_accuracy

In [6]:
# define plot function
def plot_accuracy(train_accuracy_log, test_accuracy_log):
    plt.figure(figsize=(12,8))
    plt.plot(train_accuracy_log, label='train')
    plt.plot(test_accuracy_log, label='test')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [7]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [8]:
# network architecture parameter specifications
# number input nodes
n_input = 2
# number output nodes
n_output = 2
# list with hidden layer specifications: [number hidden nodes, activation function] per layer
hidden_layers = 2

# model definition and initialization
# without explicit weight/bias initialization the PyTorch default is used (Kaiming)
model = MLP(n_input, n_output, hidden_layers)    

# network training parameter specifications
batch_size = 124
learning_rate = 0.1
momentum = 0.9
epochs =  100

# optimizer specification
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# loss function specification
loss_fn = nn.CrossEntropyLoss()

# prepare data for training (partion data into minibatches)
train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# prepare training / testing accuracy recording
train_accuracy_log = []
train_accuracy_log = []

# training and testing 
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")

    av_train_loss, train_accuracy = train_loop(train_dataloader, model, loss_fn, optimizer)
    av_test_loss, test_accuracy = test_loop(test_dataloader, model, loss_fn)
    train_accuracy_log.append(train_accuracy)
    test_accuracy_log.append(test_accuracy)
    print(f"Training Error: {av_train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")

TypeError: 'int' object is not subscriptable

In [None]:
# plot training and testing accuracy over the number of epochs
plot_accuracy(train_accuracy_log, test_accuracy_log)