# Digit Recognizer - MNIST
##### https://www.kaggle.com/competitions/digit-recognizer

### Imports


In [None]:
import pandas as pd
import numpy as np

### Load Data

In [None]:
# In each row, first column contains a label (digit). The remainder 784 columns (28px x 28px image) contain pixel values of 0-255.
df = pd.read_csv("data/train.csv")
df.head()

### Transform Data

In [None]:
# Transpose the data so that the first row will be equal to all the labels and the remainder of rows in each column will be the corresponding pixel value
df_T = df.T
df_T.head()

In [None]:
# Create NumPy array from Pandas dataframe
arr_T = df_T.values

In [None]:
def transform_label(label):
    desired_output = np.zeros((10, 1))
    desired_output[label] = 1.0
    return desired_output

In [None]:
# Get the first row, our labels
labels_row = arr_T[0, :]
# Transform our labels so that 3 is represented by [0, 0, 0, 1.0, 0, 0, 0, 0, 0, 0]
desired_outputs = np.array([transform_label(l) for l in labels_row]).T.reshape((10, 42000))

In [None]:
# Get all the other rows, each column is one example
# Since the pixelvalues are 0-255 we divide them by 255 to get values between 0 and 1
pixel_rows = arr_T[1:, :] / 255.0
pixel_rows = pixel_rows.T
np.random.shuffle(pixel_rows)
pixel_rows = pixel_rows.T

In [None]:
pixel_rows

In [None]:
desired_outputs

### Setup of weights and biases
##### This will depend on the # of layers and the # of neurons in each of them.
##### We will have 1 hidden layer with 30 neurons and 1 output layer with 10 neurons
##### i.e. Network([784, 30, 10])

In [None]:
class Network:
    def __init__(self, layers):
        self.no_of_layers = len(layers)
        self.layer_sizes = layers
        self.weights = self.__init_weights()
        self.biases = self.__init_biases()

    def __init_weights(self):
        # (784,30) and (30, 10) will be zipped, we will create two matrices with dimensions 30x784 and 10x30
        return [np.random.randn(y, x) for x, y in zip(self.layer_sizes[:-1], self.layer_sizes[1:])]

    def __init_biases(self):
        # two arrays with 30, and 10 random numbers each
        return [np.random.randn(x) for x in self.layer_sizes[1:]]

In [None]:
def calculate_weighted_sums(weights, activations, biases):
    # weights is (30, 784) and activations is (784, 42000), the result will be (30, 42000)
    # biases is (30, ), we need to transpose result so that it is (42000, 30), this way we can add biases
    return (np.dot(weights, activations).T + biases).T

In [None]:
# For activation we will use the sigmoid function
def calculate_activations(weights, activations, biases):
    z = calculate_weighted_sums(weights, activations, biases)
    # Sigmoid
    return 1 / (1 + np.exp(z))

def sigmoid_derivative(activations):
    # sigmoid(z) * (1 - sigmoid(z))
    # sigmoig(z) = activation so we can do it like below
    return activations * (1 - activations)

In [None]:
def calculate_cost(output, expected):
    return np.power((output-expected)/2, 2)

def cost_derivative(output, expected):
    return output - expected

In [None]:
network = Network([784, 30, 10])

In [None]:
network.weights[0]

In [None]:
network.weights[1]

In [None]:
# Calculate activations in the first and second layer.
# A1 & A2 contain activations in respective layers for each example
A1 = calculate_activations(network.weights[0], pixel_rows, network.biases[0])
A2 = calculate_activations(network.weights[1], A1, network.biases[1])

In [None]:
A1

In [None]:
A2

In [None]:
error_in_output_layer = cost_derivative(A2, desired_outputs) * sigmoid_derivative(A2)
error_in_hidden_layer = np.dot(network.weights[1].T, error_in_output_layer) * sigmoid_derivative(A1)

In [None]:
np.mean(error_in_output_layer, 1)

In [None]:
A1[:, 0].T

In [None]:
# Gradient descent
np.dot(error_in_output_layer, A1.T)

In [None]:
network.weights[1]