In [46]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mnist-in-csv/mnist_test.csv
/kaggle/input/mnist-in-csv/mnist_train.csv


In [49]:
import pandas as ds # Pandas is a libaray that used to work with datasets
import numpy as np # Manipulating arrays
from matplotlib import pyplot as plt #Pyplot exists as a module inside the mathplotlib (not a standalone package)

In [65]:
#using the training data, train our neural network

data = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')

data = np.array(data) #covert pandas dataframe into ndarray- now a 2D array (60,000, 784)



#Assign X and Y 
Y = data[1:10000, 0:1]
X = data[1:10000,1:]
print (Y.shape)
print(X.shape)
m, n = X.shape
print(m,n)

#we want to transpose both of them to be able to work in our calculations
Y = Y.T
X = X.T

X = X / 255.0

(9999, 1)
(9999, 784)
9999 784


In [57]:
#initalize our weights, and bias point as matrix
# Input layer: 784, Hidden layer: 10, Output layer: 0-9

'''
What should be the dimentions of the matrix?

1. Weights 
    a. Input -> Hidden layer (784, 10) 
    b. Hidden -> Output (10, 10)

2. Bias
    a. Hidden layer (10, 1)
    b. Output layer (10, 1)
'''
# randn: produce random numbers between [0,1]
#subtract 0.5 to generate negative values as well [-0.5,0.5]
def initalize_parameters():
    W1 = np.random.rand(10,784) - 0.5 # each row corresponds to the node's weight respect to the other 784 input layers
    W2 = np.random.rand (10,10) - 0.5
    B1 = np.random.rand (10,1) - 0.5
    B2 = np.random.rand (10,1) - 0.5
    return W1, W2, B1, B2

#we want to use an activation function so that our output isn't just a linear combination of the input
def activation_ReLU(Z):
    return np.maximum(Z,0)

# Since we are using ReLU activation (can generate numbers above 1), we need output between 0-1
# we also have to ensure that they are the same relative to eachother
def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A


def forward_propgation(W1,W2,B1,B2,X): #where X is the 784 values of pixel image
    Z1 = W1.dot(X) + B1 #let the hidden layer values W1: (10, 784) X: (784, 1) -> dimentions of (10,1)
    A1 = activation_ReLU(Z1)

    Z2 = W2.dot(A1) + B2 #Output later W2: (10,10) A1: (10,1)-> Z2: (10,1)
    A2 = softMax(Z2)
    return Z1, A1, Z2, A2

#one hot encoding: a way to tell your program which output should be right
def one_hot_encode(Y): #Y is your answer vector in dimentions (Y.size, 1): row, coloumn
    #turn your answer vector into a matrix
    matrix = np.zeros((Y.size, Y.max() + 1))
    matrix[np.arange(Y.size), Y] = 1 #arrange creates a array from 0 - Y.size-1, so then u can use to index
    return matrix.T #matrix is now in dimentions (Y.size(), 10): row, coloumn

# we use backward propogation to change the weights and biases, accoriding to input and expected output data
def backward_propogation(Z1, A1, Z2, A2, W2, Y):
    output_encoded = one_hot_encode(Y)
    #keep track of all the small changes
    dZ2 = A2 - output_encoded #the difference between expected and actual values
    dW2 = 1 / m * dZ2.dot(A1.T)
    dB2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * activation_ReLU(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    dB1 = 1 / m * np.sum(dZ1)
    return dW1, dB1, dW2, dB2

def learn(W1, B1, W2, B2, dW1, dB1, dW2, dB2, alpha):
    #we want to update the biases and weights, by the learning rate: alpha
    W1 = W1 - alpha * dW1
    B1 = B1 - alpha * dB1    
    W2 = W2 - alpha * dW2  
    B2 = B2 - alpha * dB2    
    return W1, B1, W2, B2

In [53]:
#this will get the largest value from the output (what the neural network thinks the number is)
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y) #print the expected and actual
    return np.sum(predictions == Y) / Y.size

In [63]:
def gradient_descent(X,Y,alpha, iterations):
    #start with an inital value for weights and biases
    W1, W2, B1, B2 = initalize_parameters()

    for i in range (iterations):
        #we want to first forward propogate
        Z1, A1, Z2, A2= forward_propgation(W1,W2,B1,B2,X) 
        #back propogation to update out weights and biases
        dW1, dB1, dW2, dB2 = backward_propogation(Z1, A1, Z2, A2, W2, Y)
        #depending on the error - change the value of weights and biases
        W1, b1, W2, b2= learn(W1, B1, W2, B2, dW1, dB1, dW2, dB2, alpha)
        #we only want to print every 100 iterations
        if i%100 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, B1, W2, B2
    

In [67]:
W1, b1, W2, b2 = gradient_descent(X, Y, 0.10, 10000)

Iteration:  0
[0 0 7 ... 0 7 9] [[0 4 1 ... 6 9 7]]
0.059905990599059905
Iteration:  100
[0 6 2 ... 2 1 3] [[0 4 1 ... 6 9 7]]
0.3694369436943694
Iteration:  200
[0 6 1 ... 6 8 7] [[0 4 1 ... 6 9 7]]
0.5457545754575458
Iteration:  300
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.7541754175417542
Iteration:  400
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8085808580858086
Iteration:  500
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8323832383238324
Iteration:  600
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8435843584358436
Iteration:  700
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8538853885388539
Iteration:  800
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8613861386138614
Iteration:  900
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8665866586658666
Iteration:  1000
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8698869886988699
Iteration:  1100
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.8722872287228722
Iteration:  1200
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 9 7]]
0.875087508750875
Iteration:  1300
[0 4 1 ... 6 9 7] [[0 4 1 ... 6 