In [12]:
import sys
import numpy as np
from numpy import genfromtxt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
np.set_printoptions(threshold=sys.maxsize)

In [2]:
# import Data
mnist_train = genfromtxt("/Users/schand/Documents/mnist_train.csv",delimiter=',')
mnist_test = genfromtxt("/Users/schand/Documents/mnist_test.csv",delimiter=',')

In [3]:
m = mnist_train.shape[0]
m_test = mnist_test.shape[0]

In [17]:
X_train, y_train = mnist_train[:,1:785], mnist_train[:,0]
X_test, y_test = mnist_test[:,1:785], mnist_test[:,0]
X_train, X_test = X_train[:m].T, X_test[:m_test].T
#y_train, y_test = y_train[:m].reshape(1,m), y_test[:m_test].reshape(1,m_test)

In [24]:
# Just building a zero-classifier for now.
y_train_new = np.zeros(y_train.shape)
y_train_new[np.where(y_train == 0.0)[0]] = 1
y_test_new = np.zeros(y_test.shape)
y_test_new[np.where(y_test == 0.0)[0]] = 1

#### Activation Function

In [21]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s
def ReLU(z):
    r = np.maximum(0,z)
    return r
def softmax(z):
    expo = np.exp(z)
    expo_sum = np.sum(np.exp(z))
    return expo/expo_sum
    

#### Loss Function

In [22]:
def compute_loss(Y, Y_hat):

    L = -(1./m) * ( np.sum( np.multiply(np.log(Y_hat),Y) ) + np.sum( np.multiply(np.log(1-Y_hat),(1-Y)) ) )

    return L

#### Train Neural Network

In [27]:
# we are building neral network with three hidden layer(256, 64, 16) with input 784 neurons and 1 output neurons

X = X_train * 0.99/255 + 0.01 # normalize X_train in between 0.01 to 1
Y = y_train_new

n_x = X.shape[0]
n_1 = 784
n_2 = 256
n_3 = 64
n_4 = 16
n_5 = 1

learning_rate = 3

W1 = np.random.randn(n_1, n_x)
b1 = np.zeros((n_1, 1))
W2 = np.random.randn(n_2, n_1)
b2 = np.zeros((n_2, 1))
W3 = np.random.randn(n_3,n_2)
b3 = np.zeros((n_3,1))
W4 = np.random.randn(n_4,n_3)
b4 = np.zeros((n_4,1))
W5 = np.random.randn(n_5,n_4)
b5 = np.zeros((n_5,1))

for i in range(100):
    Z1 = np.matmul(W1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = sigmoid(Z2)
    Z3 = np.matmul(W3, A2) + b3
    A3 = sigmoid(Z3)
    Z4 = np.matmul(W4, A3) + b4
    A4 = sigmoid(Z4)
    Z5 = np.matmul(W5, A4) + b5
    A5 = sigmoid(Z5)

    Y_hat = A5
    cost = compute_loss(Y, Y_hat)
    

    dZ5 = Y_hat-Y
    dW5 = (1./m) * np.matmul(dZ5, A4.T)
    db5 = (1./m) * np.sum(dZ5, axis=1, keepdims=True)
        
    dA4 = np.matmul(W5.T, dZ5)
    dZ4 = dA4 * sigmoid(Z4) * (1 - sigmoid(Z4))
    dW4 = (1./m) * np.matmul(dZ4, A3.T)
    db4 = (1./m) * np.sum(dZ4, axis=1, keepdims=True)

    dA3 = np.matmul(W4.T, dZ4)
    dZ3 = dA3 * sigmoid(Z3) * (1 - sigmoid(Z3))
    dW3 = (1./m) * np.matmul(dZ3, A2.T)
    db3 = (1./m) * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.matmul(W3.T, dZ3)
    dZ2 = dA2 * sigmoid(Z2) * (1 - sigmoid(Z2))
    dW2 = (1./m) * np.matmul(dZ2, A1.T)
    db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dA1 = np.matmul(W2.T, dZ2)
    dZ1 = dA1 * sigmoid(Z1) * (1 - sigmoid(Z1))
    dW1 = (1./m) * np.matmul(dZ1, X.T)
    db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)
    
    W5 = W5 - learning_rate * dW5
    b5 = b5 - learning_rate * db5
    W4 = W4 - learning_rate * dW4
    b4 = b4 - learning_rate * db4
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    if i % 10 == 0:
        print("Epoch", i, "cost: ", cost)
print("Final cost:", cost)

Epoch 0 cost:  0.3576621899656291
Epoch 10 cost:  0.216723618326031
Epoch 20 cost:  0.1526620995591674
Epoch 30 cost:  0.10202617926768559
Epoch 40 cost:  0.08333919296000918
Epoch 50 cost:  0.07377817839997725
Epoch 60 cost:  0.06662565831736365
Epoch 70 cost:  0.060980393080738805
Epoch 80 cost:  0.0563533768454155
Epoch 90 cost:  0.052437476676484754
Final cost: 0.04936507433644391


#### Test Neural Network

In [28]:
Z1 = np.matmul(W1, X) + b1
A1 = sigmoid(Z1)
Z2 = np.matmul(W2, A1) + b2
A2 = sigmoid(Z2)
Z3 = np.matmul(W3, A2) + b3
A3 = sigmoid(Z3)
Z4 = np.matmul(W4, A3) + b4
A4 = sigmoid(Z4)
Z5 = np.matmul(W5, A4) + b5
A5 = sigmoid(Z5)
Y_hat = (A5>0.5)[0,:]

In [29]:
print('Confusion matrix is', confusion_matrix(Y_hat,Y))
print('Accuracy Score is', accuracy_score(Y_hat,Y))

Confusion matrix is [[53762   562]
 [  315  5361]]
Accuracy Score is 0.9853833333333334


##### The final cost is 0.04936507433644391 which is considerably low with accuarcy score 0.9853833333333334. The cost function can be reduced by inceasing the epochs at the same time we should think of overfitting problem. I did only zero-classifier due to limited computaional power.