In [504]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt

#Loading in our dataset

In [505]:
data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [506]:
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#Operations works on arrays, want to convert our data to an array using numpy

In [507]:
data = np.array(data)

#Randomize our data

In [508]:
np.random.shuffle(data)

#Want to get size of our data, m = number of examples, n = number of pixels per example

In [509]:
m, n = data.shape

#Taking first 1000 examples for testing model, rest is for training
#Transpose the data to get into the correct orientation
#Y is the first row of our transposed matrix, labels

In [510]:
data_test = data[0:1000].T
Y_test = data_test[0]
X_test = data_test[1:n]
X_test = X_test / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

#W1 = initalize numbers between 0-1 for every elem. in the array size 784
#b1 = initalize numbers between 0-1 for every elem. in the array size 10


In [511]:
def initial_param():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5 
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5 
    return W1, b1, W2, b2
    

In [512]:
def ReLU_Function(Z):
    return np.maximum(0, Z)

In [513]:
def softmax_func(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

In [514]:
# def forward_prop(W1, b1, W2, b2, W3, b3, X):
#     Z1 = W1.dot(X.T) + b1
#     A1 = ReLU_Function(Z1)
#     Z2 = W2.dot(A1) + b2
#     A2 = ReLU_Function(Z2)
#     Z3 = W3.dot(A2) + b3
#     A3 = softmax_func(Z3)
#     return Z1, A1, Z2, A2, Z3, A3

In [515]:
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1  # Ensure X is transposed for shape alignment
    A1 = ReLU_Function(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax_func(Z2)
    return Z1, A1, Z2, A2


In [516]:
def one_hot(Y):
    hot_y = np.zeros((Y.size, Y.max() + 1))
    hot_y[np.arange(Y.size), Y] = 1
    hot_y = hot_y.T
    return hot_y

In [517]:
def derivative_ReLU(Z):
    return Z > 0

In [518]:
def back_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = Y.size
    hot_y = one_hot(Y)
    
    dz2 = A2 - hot_y
    dw2 = 1 / m * dz2.dot(A1.T)
    db2 = 1 / m * np.sum(dz2)
    
    dz1 = W2.T.dot(dz2) * derivative_ReLU(Z1)
    dw1 = 1 / m * dz1.dot(X.T)
    db1 = 1 / m * np.sum(dz1)
    
    return dw1, db1, dw2, db2
    

In [519]:
def update_func(W1, b1, W2, b2, dw1, db1, dw2, db2,alpha):
    W1 = W1 - alpha*dw1
    b1 = b1 - alpha*db1
    
    W2 = W2 - alpha*dw2
    b2 = b2 - alpha*db2
        
    return W1, b1, W2, b2
    

In [520]:
def get_pred(A2):
    return np.argmax(A2, 0)

In [521]:
def get_acc(pred, Y):
    print(pred, Y)
    return np.sum(pred == Y) / Y.size

In [522]:
def grad_desc(X, Y, epochs, alpha):
    W1, b1, W2, b2 = initial_param()
    for i in range(epochs):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dw1, db1, dw2, db2 = back_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_func(W1, b1, W2, b2, dw1, db1, dw2, db2, alpha)
        if i % 10 == 0:
            print('Iteration #: ', i)
            print('Accuracy: ', get_acc(get_pred(A2), Y))
            
    return W1, b1, W2, b2

In [523]:
W1, b1, W2, b2 = grad_desc(X_train, Y_train, 500, 0.10)

Iteration #:  0
[0 0 0 ... 0 0 7] [9 1 6 ... 2 9 4]
Accuracy:  0.09865853658536586
Iteration #:  10
[0 1 5 ... 0 0 7] [9 1 6 ... 2 9 4]
Accuracy:  0.2080731707317073
Iteration #:  20
[0 1 6 ... 2 4 7] [9 1 6 ... 2 9 4]
Accuracy:  0.30890243902439024
Iteration #:  30
[0 1 6 ... 2 4 7] [9 1 6 ... 2 9 4]
Accuracy:  0.39807317073170734
Iteration #:  40
[8 1 6 ... 2 4 7] [9 1 6 ... 2 9 4]
Accuracy:  0.4719512195121951
Iteration #:  50
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.5261219512195122
Iteration #:  60
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.5698780487804878
Iteration #:  70
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.6066341463414634
Iteration #:  80
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.6355609756097561
Iteration #:  90
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.6596829268292683
Iteration #:  100
[8 1 6 ... 2 9 7] [9 1 6 ... 2 9 4]
Accuracy:  0.6805121951219513
Iteration #:  110
[8 1 6 ... 2 9 9] [9 1 6 ... 2 9 4]
Accuracy:  0.6980975609756097
