#### Build a two-layer neural network from scratch In python

Importing Dependencies for Loading MNIST dataset.

In [1]:
from mnist import MNIST
import numpy as np
import pandas as pd

In [2]:
mndata = MNIST('samples')
images, labels = mndata.load_training()
images1, labels1 = mndata.load_testing()

In [3]:
#Converting list into numpy arrays
train_x=np.array(images)
train_y=np.array(labels)
test_x=np.array(images1)
test_y=np.array(labels1)

In [4]:
#Reshaping in Conventional Form
train_x=train_x.T
test_x=test_x.T
train_y=train_y.reshape(1,60000)
test_y=test_y.reshape(1,10000)

In [5]:
#Normalizing
train_x=train_x/255
test_x=test_x/255

In [6]:
X=train_x
y=train_y

OneHot Encoding for Labels

In [8]:
digits = 10 # No. of Classes
m=60000 #Training Instances

Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits,m)
Y_test = np.eye(digits)[test_y.astype('int32')]
Y_test = Y_test.T.reshape(digits,10000)

Sigmoid Functon:

In [9]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

Cost Function for Multiclass:

L(y,ŷ) = −$∑^{n}_{i=0}$$y_{i}$log(ŷi)

In [10]:
def compute_multiclass_loss(Y, Y_hat):

    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    L = -(1/m) * L_sum

    return L

Building NN:

In [11]:
n_x = X.shape[0]
n_h = 64
n_y=10
learning_rate = 0.01

W1 = np.random.randn(n_h, n_x)
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(n_y, n_h)
b2 = np.zeros((n_y, 1))


X = X
Y = Y_new

for i in range(2000):
    ##Forward Prop
    Z1 = np.dot(W1,X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2,A1) + b2
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)

    cost = compute_multiclass_loss(Y, A2)
    ##Back Prop
    dZ2 = A2-Y
    dW2 = (1./m) * np.dot(dZ2, A1.T)
    db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * sigmoid(Z1) * (1 - sigmoid(Z1))
    dW1 = (1./m) * np.dot(dZ1, X.T)
    db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)
    ##Updating Params
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)

Epoch 0 cost:  11.529319896349692
Epoch 100 cost:  6.265106898260446
Epoch 200 cost:  4.61342442837396
Epoch 300 cost:  4.024388119476614
Epoch 400 cost:  3.67585483414976
Epoch 500 cost:  3.391671570527805
Epoch 600 cost:  3.1460399422009364
Epoch 700 cost:  2.931653377692734
Epoch 800 cost:  2.7438029277874
Epoch 900 cost:  2.578678016261339
Epoch 1000 cost:  2.4330622891611826
Epoch 1100 cost:  2.3042152460984147
Epoch 1200 cost:  2.1897950560758668
Epoch 1300 cost:  2.0877984063994544
Epoch 1400 cost:  1.996513297574221
Epoch 1500 cost:  1.914480076993246
Epoch 1600 cost:  1.8404577526345134
Epoch 1700 cost:  1.7733934958622055
Epoch 1800 cost:  1.7123949212497953
Epoch 1900 cost:  1.6567051254957428
Final cost: 1.606169744221766


Validation:

In [12]:
from sklearn.metrics import classification_report, confusion_matrix

In [16]:
Z1 = np.dot(W1, test_x) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)

predictions = np.argmax(A2, axis=0)
labels = np.argmax(Y_test, axis=0)

In [17]:
print(confusion_matrix(predictions, labels))

[[702   3  57  48   7  80  58  16  47  23]
 [  5 974  56  30  27  38  21  15  61   6]
 [ 29  20 384  51  57  23  71  96 113  69]
 [ 21  40 152 458  18 175  61  15  68  24]
 [ 11   9  45  10 482  35  63  64  24 230]
 [ 77  29  40 167  39 301  42  27  66  54]
 [ 54   1  94  75  56  67 564  11  74  14]
 [ 29   4  35  78  60  63  24 585  82 147]
 [ 34  48 133  69  36  65  21  33 400  94]
 [ 18   7  36  24 200  45  33 166  39 348]]


In [18]:
print(classification_report(predictions, labels))

              precision    recall  f1-score   support

           0       0.72      0.67      0.69      1041
           1       0.86      0.79      0.82      1233
           2       0.37      0.42      0.39       913
           3       0.45      0.44      0.45      1032
           4       0.49      0.50      0.49       973
           5       0.34      0.36      0.35       842
           6       0.59      0.56      0.57      1010
           7       0.57      0.53      0.55      1107
           8       0.41      0.43      0.42       933
           9       0.34      0.38      0.36       916

   micro avg       0.52      0.52      0.52     10000
   macro avg       0.51      0.51      0.51     10000
weighted avg       0.53      0.52      0.52     10000

