In [None]:
# Building a Neural Network from scratch for the MNIST dataset.
# 
# The MNIST dataset is the dataset of handwritten digits used for digit 
# recognition and classification.
# 
# I'll do 3 steps for bulding the model.
# 1. Forepropagation. Is to input the image to the network and output the 
#   prediction.   
# 2. Backpropagation. Is to analyze the output with respect to the actual
#   label, and how the previous weights and biases of the network contributed
#   to the error, and adjust them with something similar to a gradient descent.
# 3. Repeat. We run again the neural net with the adjusted parameters this time,
#   however this is not everything, it's supposed to be a cycle, which we search 
#   for the parameters with the minimum error.

In [None]:
# Because I am a noob, I need a function that displays the parameters of the data array I am manimpulating as I progress.
# def displayParameters(dataArray, width, height):
#     print(f"""Thdata array {}
#     """)

In [119]:
# We import the libraries for data analysis and plotting.
import numpy as np, pandas as pd
from matplotlib import pyplot as plt
print("Libraries imported!")

Libraries imported!


In [95]:
# Differentiate between data frame and numpy array.
# both are different data structures
dataFrame = pd.read_csv('train.csv')
dataArray = np.array(dataFrame)

# Taking the shape of the array
m, n = dataArray.shape
# NOTE: The width and height are n, m
print(f"The array of digit recognition is width n {n}, height m {m}\n")

# shuffling the array to prevent overfitting.
# Doing so with a random seed
print(f"Data before shuffling\n{dataArray}")
seedNumber = 42                 # set seed number to 42
np.random.seed(seedNumber)
np.random.shuffle(dataArray)    # shuffle the data array (sort in random order)    
print(f"\nData after shuffling with seed number {seedNumber}\n{dataArray}")

The array of digit recognition is width n 785, height m 42000

Data before shuffling
[[1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [7 0 0 ... 0 0 0]
 [6 0 0 ... 0 0 0]
 [9 0 0 ... 0 0 0]]

Data after shuffling with seed number 42
[[8 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [9 0 0 ... 0 0 0]
 ...
 [2 0 0 ... 0 0 0]
 [6 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [118]:
# set development data              
# The development data is the data that we use to tune the hyperparameters
devData = dataArray[0:1000].T       # the dev data will be the first 1000 values of the data array
x_devData = devData[0]              # develop predictions as the first list in the transposed array
y_devData = devData[1:n]            # develop inputs as the rest of the lists in this transposed array

# set training data                 
# The training data is the data that we use the hyperparameters with
trainData = dataArray[1000:m].T     # the training data will be the rest of the data array values
y_trainData = trainData[0]          # train predictions as the first list in the array
x_trainData = trainData[1:n]        # train inputs as the rest of the lists in the array 

# Print the information about subset of training data
print(f"""This is the development data array
{devData}                       
It has a shape of width {devData.shape[1]}, height {devData.shape[0]} 

This is the training data array
{trainData}                      
It has a shape of width {trainData.shape[1]}, height {trainData.shape[0]}""")

This is the development data array
[[8 1 9 ... 1 2 6]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]                      
It has a shape of width 1000, height 785 

This is the training data array
[[2 6 4 ... 2 6 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]                      
It has a shape of width 41000, height 785


In [None]:
# The previous code cells were the treatment of the data to prepare
# for the actual training of the neural network.

# This code is not supposed to be run here, but it can be used to
# automate variable creation, the function locals() does that.
# numberOfLayers = 2
# make n variables, each one with a name that contains a number corresponding 
# to the number in the range. This gives n different variables with ordered names.
# for i in range(numberOfLayers):
#     variable = locals()["w%s" % str(i + 1)] = a

In [None]:
# function that creates the initial hyperparameters with random values
def init_parameters():
    w1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    w2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return w1, b1, w2, b2

# function that performs as a rectifier linear unit (relu)
# the relu just gives a binary output from a non binary input.
# until this point it is just a fancy name for something really simple.
def relu(z):
    return np.maximum(0, z)

# TODO: Function that ========================================================================================
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z))

# function that does the forward propagation
def forward_prop(w1, b1, w2, b2, x):
    z1 = w1.dot(x) + b1
    a1 = relu(z1)
    z2 = w2.dot(x)
    a2 = softmax(z2)

# TODO: Function that ========================================================================================
def one_hot(y):
    one_hot_y = np.zeros((y.size, y.max) + 1)
    one_hot_y[np.arange(y.size), y] = 1
    return one_hot_y.T

# TODO: Function that ========================================================================================
def backward_prop(z1, a1, z2, a2, w2, y):
    m = y.size
    one_hot_y = one_hot(y)
    dz2 = a2 - one_hot_y
    dw2 = 1 / m * dz2.dot(a1.T)
    db2 = 1 / m * np.sum(dz1)