In [7]:
import os

import numpy as np

from nn import losses, networks, optimizers
from utils.dataloader import MnistDataloader, batch_generator

In [8]:
def one_hot_encode(labels, num_classes=10):
    one_hot_labels = np.zeros((len(labels), num_classes))
    one_hot_labels[np.arange(len(labels)), labels] = 1
    return one_hot_labels

In [9]:
def preprocess():
    input_path = r"MNIST"
    training_images_filepath = os.path.join(
        input_path, r"train-images-idx3-ubyte/train-images-idx3-ubyte")
    training_labels_filepath = os.path.join(
        input_path, r"train-labels-idx1-ubyte/train-labels-idx1-ubyte")
    test_images_filepath = os.path.join(
        input_path, r"t10k-images-idx3-ubyte/t10k-images-idx3-ubyte")
    test_labels_filepath = os.path.join(
        input_path, r"t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte")
    mnist_dataloader = MnistDataloader(training_images_filepath,
                                    training_labels_filepath,
                                    test_images_filepath, test_labels_filepath)
    (x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()
    x_train = np.array(x_train).reshape((len(x_train), 784)) / 255.0
    x_test = np.array(x_test).reshape((len(x_test), 784)) / 255.0
    y_train = one_hot_encode(np.array(y_train).reshape(len(y_train)), 10)
    y_test = one_hot_encode(np.array(y_test).reshape(len(y_test)), 10)
    return (x_train, y_train), (x_test, y_test)

In [10]:
(x_train, y_train), (x_test, y_test) = preprocess()

In [11]:
net = networks.DNN(layers=[784, 128, 64, 32, 10], initializer="he_uniform")
criterion = losses.CrossEntropyLoss("mean")
optimizer = optimizers.SGD(parameters=net.parameters, lr=0.01)

In [12]:
for epoch in range(1, 100 + 1):
    epoch_loss = 0.0
    total_step = 0
    
    total = 0
    correct = 0
    
    for step, (x, y) in enumerate(
            batch_generator(x_train, y_train, batch_size=64, shuffle=True)):
        x = x.T.copy()
        y = y.T.copy()
        
        optimizer.zero_grad()
        y_pred = net(x)
        loss = criterion(y_pred, y)
        net.backward(criterion.backward())
        optimizer.step()
        
        epoch_loss += loss
        total_step += 1
        step_total = y_pred.shape[1]
        step_correct = np.equal(y_pred.argmax(axis=0, keepdims=True),
                                y.argmax(axis=0, keepdims=True)).sum()
        step_accuracy = step_correct / step_total
        total += step_total
        correct += step_correct

    test_total = 0
    test_correct = 0
    for _, (x, y) in enumerate(
            batch_generator(x_test, y_test, batch_size=32, shuffle=True)):
        x = x.T.copy()
        y = y.T.copy()
        
        y_pred = net(x)
        
        test_total += y_pred.shape[1]
        test_correct += np.equal(y_pred.argmax(axis=0, keepdims=True),
                                 y.argmax(axis=0, keepdims=True)).sum()
    test_accuracy = test_correct / test_total

    epoch_loss /= total_step
    accuracy = correct / total
    
    print(
        f"epoch: {epoch:>3d}, loss: {epoch_loss:.6f}, accuracy: {accuracy:.6f}, test accuracy: {test_accuracy:.6f}"
    )

epoch:   1, loss: 4.603426, accuracy: 0.790100, test accuracy: 0.903200
epoch:   2, loss: 1.980588, accuracy: 0.911367, test accuracy: 0.923000
epoch:   3, loss: 1.614014, accuracy: 0.927367, test accuracy: 0.934500
epoch:   4, loss: 1.385120, accuracy: 0.937700, test accuracy: 0.941300
