In [29]:
import idx2numpy
import numpy as np

""" LOADING DATASET """
train_images_file = 'assets/MNIST/train-images.idx3-ubyte'
train_images_array = idx2numpy.convert_from_file(train_images_file).T

train_labels_file = 'assets/MNIST/train-labels.idx1-ubyte'
train_labels_array = idx2numpy.convert_from_file(train_labels_file).T

test_images_file = 'assets/MNIST/test-images.idx3-ubyte'
test_images_array = idx2numpy.convert_from_file(test_images_file).T

test_labels_file = 'assets/MNIST/test-labels.idx1-ubyte'
test_labels_array = idx2numpy.convert_from_file(test_labels_file).T

print(f"Size of an image in each row: {train_images_array.shape[:2]}")
print(f"Number of training examples: {train_images_array.shape[-1]}")

Size of an image in each row: (28, 28)
Number of training examples: 60000


In [33]:
X = train_images_array.reshape(
    (-1, train_images_array.shape[-1])
)

Y = train_labels_array.reshape(1, -1)

print(f"Training array: {X.shape} \nTraining labels: {Y.shape}")

Training array: (784, 60000) 
Training labels: (1, 60000)


In [None]:
""" UTILS """
def g_h(z):  # for hidden layers
    """
    Try:
    1. sigmoid
    2. tanh
    3. ReLU (using now)
    4. Leaky ReLU
    """
    return np.maximum(0, z)

def g_y(z):
    return 1 / (1 + np.exp(-z))

In [None]:
def initialise_parameters(n_x, n_h1, n_h2, n_y):
    np.random.seed(69)

    W1 = np.random.uniform(-0.01, 0.01, (n_h1, n_x))
    b1 = np.zeros((n_h1, 1))

    W2 = np.random.uniform(-0.01, 0.01, (n_h2, n_h1))
    b2 = np.zeros((n_h2, 1))

    W3 = np.random.uniform(-0.01, 0.01, (n_y, n_h2))
    b3 = np.zeros((n_y, 1))

    parameters = {
        "W1": W1,
        "b1": b1,

        "W2": W2,
        "b2": b2,

        "W3": W3,
        "b3": b3
    }

    return parameters

In [None]:
def layer_sizes(X, Y):
    n_x = X.shape[1]
    n_h1 = 16
    n_h2 = 16
    n_y  = 10
    
    return (n_x, n_h1, n_h2, n_y)

In [None]:
def forward_propogation(X, parameters):
    # fetch data
    W1 = parameters['W1']
    b1 = parameters['b1']

    W2 = parameters['W2']
    b2 = parameters['b2']
    
    W3 = parameters['W3']
    b3 = parameters['b3']

    # Forward Propagation
    Z1 = np.dot(W1, X) + b1
    A1 = g_h(Z1)
    
    Z2 = np.dot(W2, A1) + b1
    A2 = g_h(Z2)
    
    Z3 = np.dot(W3, A2) + b1
    A3 = g_h(Z3)

    # Caching for backpragation
    cache = {
        "Z1": Z1,
        "A1": A1,
        
        "Z2": Z2,
        "A2": A2,
        
        "Z3": Z3,
        "A3": A3
    }

    return A3, cache

In [None]:
def compute_cost(A3, Y, parameters):
    m = Y.shape[1]
    logprobs = (Y * np.log(A3)) + ((1 - Y) * np.log(1 - A3))
    cost = (-1/m) * np.sum(logprobs)
    cost = float(np.squeeze(cost))  # fixes dimentions

    return cost  # a scaler