For this example we'll utilize the [MNIST](https://www.wikiwand.com/en/MNIST_database) dataset. A dataset of handwriten digits.

The pixel training image is 28x28 pixels = 784 pixels overall. Each pixel has a value between 0 and 255. 255 being completely white and 0 being completely black.

The pixels can be represented as a matrix where each row will be 784 columns long, because each row will correspond to each pixel in the image. Then the matrix will be transposed (i.e. columns become rows and rows become columns).



In [1]:
# Import the essential libraries for numerical functions, dataset and plots, respectively.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
# Get the train data
data = pd.read_csv("../src/dataset/train.csv")

In [3]:
# Show the train data
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# Convert the data into Numpy Array
data = np.array(data)
rows_amount, features_amount_plus_one = data.shape

# Set the dev data & transpose it
data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:features_amount_plus_one]

# Set the train data & transpose it
data_train = data[1000:rows_amount].T
Y_train = data_train[0]
X_train = data_train[1:features_amount_plus_one]

In [10]:
X_train[:, 0].shape

(784,)

In [12]:
# Initiate parameters
def initiate_parameters():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5

def ReLU(Z):
    return np.maximum(0, Z)

def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z))

# Forward propagation
def forward_propagation(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size(), Y.max() + 1))
    one_hot_Y[np.arange(Y.size()), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def derivative_ReLU(Z):
    return Z > 0

# Backward propagation
def backward_propagation(Z1, A1, Z2, A2, W2, X, Y):
    rows_amount = Y.size()
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1/rows_amount * dZ2.dot(A1.T)
    db2 = 1/rows_amount * np.sum(dZ2, 2)
    dZ1 = W2.T.dot(dZ2) * derivative_ReLU(Z1)
    dW1 = 1/rows_amount * dZ2.dot(X.T)
    db1 = 1/rows_amount * np.sum(dZ1, 2)
    return dW1, db1, dW2, db2

