In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
data = pd.read_csv('train.csv')
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) 

test_data = data[0:2000].T
Y_test = test_data[0]
X_test = test_data[1:n]
X_test = X_test / 255.

train_data = data[2000:m].T
Y_train = train_data[0]
X_train = train_data[1:n]
X_train = X_train/ 255.
print(X_train)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [3]:
def initialise_parameters():
    W1= np.random.rand(128,784)-0.5
    b1= np.random.rand(128,1)-0.5
    W2= np.random.rand(10,128)-0.5
    b2= np.random.rand(10,1)-0.5
    return W1,b1,W2,b2

def ReLU(Z):
    return np.maximum(0,Z)

def SoftMax(x):
    e_x = np.exp(x - np.max(x, axis=0, keepdims=True))  
    return e_x / np.sum(e_x, axis=0, keepdims=True)   

def forward_propagation(W1,b1,W2,b2,X):
    Z1= W1.dot(X) + b1
    A1= ReLU(Z1)
    Z2= W2.dot(A1) + b2
    A2=  SoftMax(Z2)
    return Z1,A1,Z2,A2

def ReLU_deriv(Z):
    return Z > 0

def one_hot_encoding(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    for i in range(Y.size):
        one_hot_Y[i][Y[i]]=1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y):    # m - number of samples
    Y= one_hot_encoding(Y)
    dZ2 = A2 - Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [4]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1,b1,W2,b2= initialise_parameters()
    for i in range(iterations):
        Z1,A1,Z2,A2 = forward_propagation(W1,b1,W2,b2,X)
        dW1, db1, dW2, db2 = backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 100 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2

In [5]:
W1, b1, W2, b2 = gradient_descent(X_train,Y_train,0.1,2000)

Iteration:  0
[5 2 3 ... 2 2 2] [0 1 2 ... 4 7 1]
0.108325
Iteration:  100
[0 1 8 ... 4 7 1] [0 1 2 ... 4 7 1]
0.824025
Iteration:  200
[0 1 8 ... 4 7 1] [0 1 2 ... 4 7 1]
0.866775
Iteration:  300
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.88415
Iteration:  400
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.89615
Iteration:  500
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.904825
Iteration:  600
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.911275
Iteration:  700
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.91605
Iteration:  800
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.920625
Iteration:  900
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.92465
Iteration:  1000
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.92775
Iteration:  1100
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.930175
Iteration:  1200
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.9329
Iteration:  1300
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.93545
Iteration:  1400
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.937275
Iteration:  1500
[0 1 2 ... 4 7 1] [0 1 2 ... 4 7 1]
0.9394
Iteration:  1600
[0 1 2 ... 4 

In [6]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()
    
print(W1)
print(b1)
print(W2)
print(b2)


[[ 0.37588132  0.13115946  0.38749927 ...  0.49108311  0.40202625
   0.16129671]
 [ 0.40579946  0.36304921 -0.29185671 ...  0.11282162  0.39797769
  -0.42310236]
 [-0.22869294 -0.35207193 -0.48319422 ...  0.06101716 -0.34604576
   0.25474846]
 ...
 [-0.36388456  0.13502101  0.1287832  ... -0.08335446 -0.01770727
  -0.13971217]
 [ 0.14046952  0.0777973  -0.0411963  ... -0.22134573  0.42837989
  -0.42937413]
 [-0.4958482   0.23127896  0.28383976 ...  0.01028253  0.11446039
  -0.47685235]]
[[-0.33598823]
 [ 0.06342529]
 [ 0.34620537]
 [-0.21065724]
 [ 0.17135922]
 [-0.26090279]
 [-0.28755408]
 [ 0.10327768]
 [ 0.47079494]
 [ 0.37775058]
 [ 0.21313678]
 [-0.12416539]
 [-0.12994686]
 [-0.05594615]
 [-0.35939124]
 [ 0.0765567 ]
 [ 0.39106131]
 [ 0.0721638 ]
 [-0.41878913]
 [ 0.38201329]
 [-0.48404429]
 [-0.28044371]
 [-0.37069259]
 [ 0.21078562]
 [ 0.59735689]
 [ 0.41359379]
 [-0.33227345]
 [ 0.3157379 ]
 [ 0.27998518]
 [ 0.24049717]
 [-0.17723016]
 [ 0.22605772]
 [-0.31903401]
 [-0.1181121 

In [7]:
test_pred = make_predictions(X_test, W1, b1, W2, b2)
get_accuracy(test_pred, Y_test)

[4 1 6 ... 3 2 0] [4 1 6 ... 3 2 0]


0.93