In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pathlib



In [2]:
def get_mnist():
    with np.load(f"./data/mnist.npz") as f:
        images, labels = f["x_train"], f["y_train"]
    images = images.astype("float32") / 255
    images = np.reshape(images, (images.shape[0], images.shape[1] * images.shape[2]))
    labels = np.eye(10)[labels]
    return images, labels

In [3]:
images, labels = get_mnist()

In [4]:
weight_1 = np.random.uniform(-0.5, 0.5, (28, 784))
weight_2 = np.random.uniform(-0.5, 0.5, (10, 28))
bias_1 = np.zeros((28, 1))
bias_2 = np.zeros((10, 1))

In [5]:
print(images.shape)
print(labels.shape)

(60000, 784)
(60000, 10)


In [6]:
learn_rate = 0.01
nr_correct = 0
epochs = 10


In [7]:
# Sigmoid, loss function

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

for epoch in range(epochs):
    for img, l in zip(images, labels):
        img.shape += (1,)
        l.shape += (1,)
        h_pre = bias_1 + np.dot(weight_1, img)
        h = sigmoid(h_pre)
        o_pre = bias_2 + np.dot(weight_2, h)
        o = sigmoid(o_pre)

        # Cost / Error calculation
        e = 1 / len(o) * np.sum((o - l) ** 2, axis=0)
        nr_correct += int(np.argmax(o) == np.argmax(l))

        # Backpropagation output -> hidden (cost function derivative)
        delta_o = o - l

        weight_2 += -learn_rate * np.dot(delta_o, np.transpose(h))
        bias_2 += -learn_rate * delta_o
        # Backpropagation hidden -> input (activation function derivative)
        delta_h = np.dot(np.transpose(weight_2), delta_o) * (h * (1 - h))
        weight_1 += -learn_rate * np.dot(delta_h, np.transpose(img))
        bias_1 += -learn_rate * delta_h

    # Show accuracy for this epoch
    print(f"Acc: {round((nr_correct / images.shape[0]) * 100, 2)}%")
    nr_correct = 0


KeyboardInterrupt: 

In [None]:
np.savez('./weight/weights_sigmoid.npz', weight_1=weight_1, bias_1=bias_1, weight_2=weight_2, bias_2=bias_2)
