In [80]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pathlib



In [81]:
def get_mnist():
    with np.load(f"./data/mnist.npz") as f:
        images, labels = f["x_train"], f["y_train"]
    images = images.astype("float32") / 255
    images = np.reshape(images, (images.shape[0], images.shape[1] * images.shape[2]))
    labels = np.eye(10)[labels]
    return images, labels

In [82]:
images, labels = get_mnist()

In [83]:
weight_1 = np.random.uniform(-0.5, 0.5, (28, 784))
weight_2 = np.random.uniform(-0.5, 0.5, (10,28))
bias_1 = np.zeros((28, 1))
bias_2 = np.zeros((10, 1))

In [84]:
print(images.shape)
print(labels.shape)

(60000, 784)
(60000, 10)


In [85]:
learn_rate = 0.01
nr_correct = 0
epochs = 10

In [86]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=0))  # for numerical stability
    return exp_x / np.sum(exp_x, axis=0)

for epoch in range(epochs):
    for img, l in zip(images, labels):
        img.shape += (1,)
        l.shape += (1,)
        # Forward propagation input -> hidden
        h_pre = bias_1 + np.dot(weight_1, img)
        h = 1 / (1 + np.exp(-h_pre))
        # Forward propagation hidden -> output
        o_pre = bias_2 + np.dot(weight_2, h)
        o = softmax(o_pre)

        # Cost / Error calculation
        
        e = -np.sum(l * np.log(o))  # Cross-entropy loss for softmax

        nr_correct += int(np.argmax(o) == np.argmax(l))

        # Backpropagation output -> hidden (cost function derivative)
        delta_o = o - l

        weight_2 += -learn_rate * np.dot(delta_o, np.transpose(h))
        bias_2 += -learn_rate * delta_o
        # Backpropagation hidden -> input (activation function derivative)
        delta_h = np.dot(np.transpose(weight_2), delta_o) * (h * (1 - h))
        weight_1 += -learn_rate * np.dot(delta_h, np.transpose(img))
        bias_1 += -learn_rate * delta_h

    # Show accuracy for this epoch
    print(f"Acc: {round((nr_correct / images.shape[0]) * 100, 2)}%")
    nr_correct = 0


Acc: 87.83%
Acc: 93.15%
Acc: 94.3%
Acc: 95.03%
Acc: 95.57%
Acc: 95.95%
Acc: 96.24%
Acc: 96.5%
Acc: 96.69%
Acc: 96.84%


In [87]:
np.savez('./weight/weights_softmax.npz', weight_1=weight_1, bias_1=bias_1, weight_2=weight_2, bias_2=bias_2)
