# Digit Recognizer - MNIST
##### https://www.kaggle.com/competitions/digit-recognizer

### Imports


In [23]:
import pandas as pd
import numpy as np

### Load Data

In [24]:
# In each row, first column contains a label (digit). The remainder 784 columns (28px x 28px image) contain pixel values of 0-255.
df = pd.read_csv("data/train.csv")
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Transform Data

In [25]:
# Transpose the data so that the first row will be equal to all the labels and the remainder of rows in each column will be the corresponding pixel value
df_T = df.T
df_T.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41990,41991,41992,41993,41994,41995,41996,41997,41998,41999
label,1,0,1,4,0,0,7,3,5,3,...,3,1,9,6,4,0,1,7,6,9
pixel0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
pixel1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
pixel2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
pixel3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
# Create NumPy array from Pandas dataframe
arr_T = df_T.values
arr_T

array([[1, 0, 1, ..., 7, 6, 9],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [58]:
# Get the first row, our labels
labels_row = arr_T[0, :]
# Get all the other rows, each column is one example
# Since the pixelvalues are 0-255 we divide them by 255 to get values between 0 and 1
pixel_rows = arr_T[1:, :] / 255.0

In [59]:
pixel_rows

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

### Setup of weights and biases
##### This will depend on the # of layers and the # of neurons in each of them.
##### We will have 1 hidden layer with 30 neurons and 1 output layer with 10 neurons
##### i.e. Network([784, 30, 10])

In [86]:
class Network:
    def __init__(self, layers):
        self.no_of_layers = len(layers)
        self.layer_sizes = layers
        self.weights = self.__init_weights()
        self.biases = self.__init_biases()

    def __init_weights(self):
        # (784,30) and (30, 10) will be zipped, we will create two matrices with dimensions 30x784 and 10x30
        return [np.random.randn(y, x) for x, y in zip(self.layer_sizes[:-1], self.layer_sizes[1:])]

    def __init_biases(self):
        # two arrays with 30, and 10 random numbers each
        return [np.random.randn(x) for x in self.layer_sizes[1:]]

In [112]:
def calculate_z(weights, activations, biases):
    # weights is (30, 784) and activations is (784, 42000), the result will be (30, 42000)
    # biases is (30, ), we need to transpose result so that it is (42000, 30), this way we can add biases
    return (np.dot(weights, activations).T + biases).T

In [113]:
n = Network([784, 30, 10])

In [114]:
calculate_z(n.weights[0], pixel_rows, n.biases[0])

array([[  0.50498752,   0.38962856,   0.48891003, ...,   8.40500831,
          3.6949459 ,  -1.84145169],
       [  4.17564264,  16.90994695,   6.78799664, ...,   2.01034819,
         12.96763082,   8.2483994 ],
       [  7.6470797 ,   7.29945385,   7.16418205, ...,   1.24157283,
         -0.05465597,  -8.20615405],
       ...,
       [ -3.57410116,  -2.04638214,   3.34330266, ..., -15.83741241,
          2.77735663,  -0.30444712],
       [ -6.77642944, -24.09795982,  -7.31265955, ..., -14.75659962,
        -18.96786553, -15.8350089 ],
       [ -8.58022876,  -9.81847283,  -0.25034434, ...,  -7.76918342,
          3.3058975 ,   5.94690856]])

ValueError: operands could not be broadcast together with shapes (3,) (3,9) 