In [12]:
import numpy
import requests
import gzip
import os
import hashlib
import math

In [13]:
DEBUG = True

In [14]:
def Fetch(url, path):
    if not os.path.exists(path):
        os.makedirs(path)

    f_path = os.path.join(path, hashlib.md5(url.encode('utf-8')).hexdigest())

    if os.path.isfile(f_path):
        with open(f_path, "rb") as f:
            data = f.read()

    else:
        with open(f_path, "wb") as f:
            data = requests.get(url).content
            f.write(data)

    return numpy.frombuffer(gzip.decompress(data), dtype=numpy.uint8).copy()

def GetNumpy():
    X_train = Fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "../dataset/mnist/")[0x10:].reshape((-1, 28, 28))
    Y_train = Fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "../dataset/mnist/")[0x8:]
    X_test  = Fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "../dataset/mnist/")[0x10:].reshape((-1, 28, 28))
    Y_test  = Fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "../dataset/mnist/")[0x8:]
    return X_train, Y_train, X_test, Y_test

In [15]:
def Xavier(weights, nodes):
    arg = 1 / math.sqrt(weights * nodes)
    w = numpy.random.uniform(-arg, arg, (weights, nodes))
    b = numpy.zeros((1, nodes))
    return w, b

In [16]:
X_train, Y_train, X_test, Y_test = GetNumpy()

ALPHA = 0.0025
EPOCHS = 10000
BATCH = 20

In [17]:
X_train = X_train.reshape((-1, 28*28))

In [18]:
w1, b1 = Xavier(28*28, 100)
w2, b2 = Xavier(100, 10)

In [19]:
for i in range(EPOCHS):
    sample = numpy.random.randint(0, X_train.shape[0], size = (BATCH))

    Y_batch = numpy.zeros((BATCH, 10), numpy.float64)
    Y_batch[range(Y_batch.shape[0]), Y_train[sample]] = 1

    X_batch = X_train[sample]


    dot1 = X_batch @ w1
    sum1 = dot1 + b1
    act1 = 1 / (numpy.exp(-sum1) + 1)

    dot2 = act1 @ w2
    sum2 = dot2 + b2
    act2 = 1 / (numpy.exp(-sum2) + 1)


    de_dact2 = (2 * (act2 - Y_batch)) / act2.shape[0]

    dact2_dsum2 = numpy.exp(-sum2) / ((numpy.exp(-sum2) + 1) ** 2)
    de_dsum2 = de_dact2 * dact2_dsum2

    dsum2_db2 = numpy.ones(b2.shape)
    de_db2 = de_dsum2 * dsum2_db2

    dsum2_ddot2 = numpy.ones(dot2.shape)
    de_ddot2 = de_dsum2 * dsum2_ddot2

    ddot2_dw2 = act1
    de_dw2 = de_ddot2.T @ ddot2_dw2


    ddot2_dact1 = w2
    de_dact1 = de_ddot2 @ ddot2_dact1.T

    dact1_dsum1 = numpy.exp(-sum1) / ((numpy.exp(-sum1) + 1) ** 2)
    de_dsum1 = de_dact1 * dact1_dsum1

    dsum1_db1 = numpy.ones(b1.shape)
    de_db1 = de_dsum1 * dsum1_db1

    dsum1_ddot1 = numpy.ones(dot1.shape)
    de_ddot1 = de_dsum1 * dsum1_ddot1

    ddot1_dw1 = X_batch
    de_dw1 = de_ddot1.T @ ddot1_dw1


    b2 -= (numpy.sum(de_dsum2) * ALPHA)
    w2 -= (de_dw2.T * ALPHA)

    b1 -= (numpy.sum(de_dsum1) * ALPHA)
    w1 -= (de_dw1.T * ALPHA)

In [20]:
X_test = X_test.reshape((-1, 28*28))

dot1 = X_test @ w1
sum1 = dot1 + b1
act1 = 1 / (numpy.exp(-sum1) + 1)

dot2 = act1 @ w2
sum2 = dot2 + b2
act2 = 1 / (numpy.exp(-sum2) + 1)

test_acc=(numpy.argmax(act2, axis = 1)==Y_test).mean().item()
print(f"Accuracy: {test_acc:.4f}")

Accuracy: 0.9183
