In [212]:
import numpy as np
import pandas as pd

In [213]:
data = pd.read_csv('train.csv')
data = np.array(data)
print(data[0])

# This is the shape of the data, we have 42,000 images and an array of 785 values
# for each of these images (label in the first column, 2-785 are the pixel values)
print(data.shape)
m, n = data.shape

[  1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0 188 255  94   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 191 250
 253  93   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0 123 248 253 167  10   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  80 247
 253 208  13   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0  29 207 253 235  77   

In [214]:
# Now we shuffle the data (example below)
test_arr = np.array([[1,2.2,3.3], [3,4.4,5.5], [5,6.6,7.7]])
np.random.shuffle(test_arr)
print(test_arr)

# Show we shuffle all the images to throw into the model (reduces bias and introduces independence)
np.random.shuffle(data)

[[3.  4.4 5.5]
 [5.  6.6 7.7]
 [1.  2.2 3.3]]


In [215]:
# Now we transpose the array (example)
t_test_array = test_arr.T
print(t_test_array)

# We want to transpose the entire x data (first we are slicing an array from the start
# to the end at 46,000 that generates a new arry). Then transpose so we can pass into neural network 
# (because imput layer is vertical):
data_train = data[0:m].T
print(data_train.shape)

[[3.  5.  1. ]
 [4.4 6.6 2.2]
 [5.5 7.7 3.3]]
(785, 42000)


In [216]:
# After the transpose, the first row of the data is going to be the labels (which is your y)
# Example 
y_label_test_array = t_test_array[0]
x_label_test_array = t_test_array[1:]

print(y_label_test_array)
print(x_label_test_array)

Y_train = data_train[0]
X_train = data_train[1:n]
print(X_train.shape)

[3. 5. 1.]
[[4.4 6.6 2.2]
 [5.5 7.7 3.3]]
(784, 42000)


In [217]:
# We divide the training data to normalize the inputs to fall into the 0 to 1 range (grayscaling)
# The "."" here makes sure we do float division 
X_train = X_train / 255.


In [218]:
W1 = np.random.rand(10, 784) - 0.5

In [219]:
def init_params():
    # Initialize weights and biases for a neural network with one hidden layer.
    # W1 is the weight matrix for the connections between the input layer and the hidden layer.
    # b1 is the bias vector for the hidden layer.
    # W2 is the weight matrix for the connections between the hidden layer and the output layer.
    # b2 is the bias vector for the output layer.

    # We want to include the bias terms here to make sure the model learns and the does not get a bunch of zeros 

    # Here we use np.random.rand to generate random numbers between 0 and 1.
    # By subtracting 0.5, we shift the range to be between -0.5 and 0.5. (convergence during training as weights initialized close to zero
    # lead to faster convergence).

    # The input layer has 784 neurons (for 28x28 pixel images, 784 = 28 * 28),
    # and the hidden layer has 10 neurons (can be changed).
    W1 = np.random.rand(10, 784) - 0.5

    # The bias vector for the hidden layer has 10 elements, one for each neuron in the hidden layer.
    b1 = np.random.rand(10, 1) - 0.5

    # The output layer has 10 neurons (one for each class in a digit classification task).
    # W2 connects 10 neurons in the hidden layer to 10 neurons in the output layer.
    W2 = np.random.rand(10, 10) - 0.5

    # The bias vector for the output layer also has 10 elements.
    b2 = np.random.rand(10, 1) - 0.5

    return W1, b1, W2, b2

In [220]:
def forward_prop(W1, b1, W2, b2, X):
    print(X.shape)
    print(W1.shape)
    # Now we do the linear transformation (the combines the weight and input tensor using the dot product and the weight)
    Z1 = W1.dot(X) + b1
    print(Z1)


In [221]:
W1, b1, W2, b2 = init_params()
forward_prop(W1, b1, W2, b2, X_train)

(784, 42000)
(10, 784)
[[ 4.82240811e+00  1.31175084e+00  4.71412646e+00 ...  9.90380898e-01
   3.66038740e+00  3.66617596e+00]
 [-4.56140711e+00 -2.13028127e-02 -4.00700953e+00 ...  2.78521777e-01
  -1.58566610e+00 -1.04357195e-01]
 [-2.50745772e+00  1.23917386e-02  2.93014353e-01 ... -2.34751310e+00
  -3.35511208e-01  1.51704636e-02]
 ...
 [ 3.90158963e+00  1.91121671e+00  3.09526572e+00 ...  5.41449481e-03
   3.34198307e+00  1.48856654e+00]
 [-5.37211188e+00 -3.01838633e+00  3.05261366e-03 ...  1.44830404e+00
   1.45446942e+00 -2.72772379e+00]
 [-3.22809515e-01  1.86492260e+00  3.13799736e+00 ...  7.16657581e-01
   2.81406029e+00 -1.78467584e+00]]
