In [1]:
## This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt


In [2]:
data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [3]:
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# **split data into values/pixels and into train/test data**

In [4]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

testData = data[0:1000].T
yTest = testData[0]
xTest = testData[1:n]/255.

trainData = data[1000:m].T
yTrain = trainData[0]
xTrain = trainData[1:n]/255.



In [5]:
def init_parameters():
    W1 = np.random.rand(10,784)-0.5 #generates a 10x784 matrix of random values between 0 and 1 
    b1 = np.random.rand(10,1)-0.5
    W2 = np.random.rand(10,10)-0.5
    b2 = np.random.rand(10,1)-0.5
    return W1, b1, W2, b2

In [6]:
def relu(X):
    return np.maximum(0, X) #returns either X or 0, whichever is higher
                            # filters for x or 0 (like ReLu function)

def derivativeRelu(A):
    return A > 0 
    
def softmax(X):
    #y = np.exp(X-np.max(X))
    #return y/(np.exp(y))
    return np.exp(X) / sum(np.exp(X))
        
def oneHot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


In [7]:
def forwardP(W1, b1, W2, b2, X):
    #b1 = np.resize(b1, (10,X[0].size))
    #b2 = np.resize(b2, (10,X[0].size))
    Z1 = W1.dot(X) + b1
    A1 = relu(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def backwardP(Z1, A1, Z2, A2, W2, X,  Y):
    m = Y.size
    oneHotY = oneHot(Y) #correct answers matrix, to be subtracted
    dZ2 = A2 - oneHotY
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * derivativeRelu(Z1)
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)
    return dW1, db1, dW2, db2
    
    
    

In [8]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha*dW1
    b1 = b1 - alpha*db1
    W2 = W2 - alpha*dW2
    b2 = b2 - alpha*db2
    return W1, b1, W2, b2

In [9]:
def get_predictions(A):
    return np.argmax(A, 0) #returns array of indices of max value per column
                            #axis = 0 means per column, axis = 1 => per row
        
def get_accuracy(predictions, A):
    print(predictions, A)
    return np.sum(predictions == A)/A.size #boolean is either 1 or 0, divided by size

def gradient_descent(X, Y, iterations, alpha):
    W1, b1, W2, b2 = init_parameters()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forwardP(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backwardP(Z1, A1, Z2, A2, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("Iteration #: ", i)
            print("Accuracy %: ", get_accuracy(get_predictions(A2), Y))
            #print(dW1[0:9,200])
    return W1, b1, W2, b2



In [10]:
W1, b1, W2, b2 = gradient_descent(xTrain, yTrain, 500, .1)

Iteration #:  0
[1 9 1 ... 9 9 9] [1 1 3 ... 1 4 6]
Accuracy %:  0.1296829268292683
Iteration #:  10
[1 1 1 ... 2 9 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.26402439024390245
Iteration #:  20
[1 1 1 ... 2 9 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.33858536585365856
Iteration #:  30
[1 1 1 ... 3 6 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.4039756097560976
Iteration #:  40
[1 1 1 ... 8 6 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.4609268292682927
Iteration #:  50
[8 1 1 ... 8 6 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.5128780487804878
Iteration #:  60
[8 1 1 ... 8 6 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.5542926829268293
Iteration #:  70
[8 1 1 ... 8 6 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.5889024390243902
Iteration #:  80
[8 1 1 ... 8 9 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.6190731707317073
Iteration #:  90
[8 1 1 ... 8 9 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.6425853658536586
Iteration #:  100
[8 1 1 ... 8 9 6] [1 1 3 ... 1 4 6]
Accuracy %:  0.6620243902439025
Iteration #:  110
[8 1 1 ... 8 9 6] [1 1 3 ... 1 4 6]
Accuracy 

In [11]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forwardP(W1, b1, W2, b2, X)
    return get_predictions(A2)

def test_prediction(i, W1, b1, W2, b2):
    current_image = xTest[:, i, None]
    prediction = make_predictions(xTrain[:, i, None], W1, b1, W2, b2)
    label = yTrain[i]
    print("prediction: ", prediction)
    print("actual: ", label)
    

In [12]:
for i in range(4):
    test_prediction(i, W1, b1, W2, b2)

prediction:  [8]
actual:  1
prediction:  [1]
actual:  1
prediction:  [3]
actual:  3
prediction:  [0]
actual:  0


In [13]:
all_testing_results = make_predictions(xTest, W1, b1, W2, b2)
get_accuracy(all_testing_results, yTest)

[0 2 7 3 5 7 3 5 0 5 0 8 6 8 3 9 5 2 0 5 8 1 5 5 0 9 7 8 3 6 3 7 6 6 2 6 9
 6 0 2 8 4 8 7 5 1 5 6 6 0 1 3 2 5 7 0 4 2 2 8 5 4 3 5 3 7 4 9 0 8 5 3 5 2
 9 8 6 1 5 2 5 7 8 7 0 1 1 1 5 0 9 6 9 9 6 0 8 7 8 6 5 7 1 4 8 3 8 5 0 1 8
 8 6 9 1 9 5 1 2 8 7 7 7 1 7 9 7 1 0 7 6 3 7 2 2 0 1 2 4 1 2 8 6 0 8 4 6 5
 3 4 0 0 1 0 2 3 0 6 0 1 8 1 7 1 5 4 1 6 4 8 4 7 6 7 1 8 2 6 4 5 7 9 3 4 9
 7 6 1 2 4 9 8 9 7 1 5 3 5 5 7 2 5 4 6 6 7 3 3 0 2 2 9 4 8 1 0 1 9 3 6 9 3
 1 3 3 1 9 5 5 9 9 9 2 8 1 7 0 4 5 0 5 1 1 7 2 0 8 7 2 6 0 8 0 7 6 0 4 5 4
 2 8 6 0 1 9 7 1 2 1 2 9 1 7 6 6 2 6 9 1 7 1 1 0 1 6 6 6 6 3 6 7 2 5 7 5 2
 5 3 1 2 7 4 0 9 7 4 8 9 6 1 3 5 7 2 3 8 0 5 8 7 4 2 3 9 0 9 7 8 2 4 5 9 9
 0 7 8 8 0 5 7 3 0 8 1 1 8 5 9 0 2 3 9 6 8 5 0 4 1 0 1 7 7 7 4 3 4 1 8 5 4
 9 1 0 3 5 0 0 5 8 6 2 5 1 0 6 9 9 3 1 0 1 9 4 0 9 7 3 2 3 7 9 0 5 6 0 6 1
 0 6 7 5 4 8 2 8 4 0 6 5 7 4 9 3 2 1 5 3 8 5 6 2 7 0 2 4 7 7 7 7 1 3 0 4 5
 1 3 4 3 0 0 2 5 1 4 8 5 7 7 2 6 3 4 5 8 2 4 8 8 3 8 1 6 0 8 9 7 6 2 9 7 1
 2 1 8 5 7 7 4 0 6 7 0 6 

0.852