# Building a Neural Network from Scratch

http://jonathanweisberg.org/post/A%20Neural%20Network%20from%20Scratch%20-%20Part%201/


In this post we’re going to build a neural network from scratch. We’ll train it to recognize hand-written digits, using the famous MNIST data set.

We’ll use just basic Python with NumPy to build our network (no high-level stuff like Keras or TensorFlow). We will dip into scikit-learn, but only to get the MNIST data and to assess our model once its built.

---

#  (MNIST)
## data:  70000

In [1]:
import time
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [2]:
t0 = time.time()

In [3]:
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original')
X, y = mnist["data"], mnist["target"]

In [4]:
print(type(mnist))
#print(mnist.shape)

print(type(X))
print(X.shape)

print(type(y))
print(y.shape)

<class 'sklearn.utils.Bunch'>
<class 'numpy.ndarray'>
(70000, 784)
<class 'numpy.ndarray'>
(70000,)


In [5]:
# We’ll normalize the data to keep our gradients manageable:

X = X / 255

In [6]:
# Then we’ll one-hot encode MNIST’s labels, to get a 10 x 70,000 array.

digits = 10
examples = y.shape[0]

print(y.shape)
y = y.reshape(1, examples)
print(type(y))
print(y.shape)

Y_new = np.eye(digits)[y.astype('int32')]
print(type(Y_new))
print(Y_new.shape)

Y_new = Y_new.T.reshape(digits, examples)
print(type(Y_new))
print(Y_new.shape)

(70000,)
<class 'numpy.ndarray'>
(1, 70000)
<class 'numpy.ndarray'>
(1, 70000, 10)
<class 'numpy.ndarray'>
(10, 70000)


In [7]:
print(type(y))
print(y.shape)

print(type(Y_new))
print(Y_new.shape)

<class 'numpy.ndarray'>
(1, 70000)
<class 'numpy.ndarray'>
(10, 70000)


In [8]:
# Then we re-split, re-shape, and re-shuffle our training set:

m = 60000
m_test = X.shape[0] - m

X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]

shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]

In [9]:
print(type(X_train))
print(X_train.shape)

print(type(Y_train))
print(Y_train.shape)

print(type(X_test))
print(X_test.shape)

print(type(Y_test))
print(Y_test.shape)

<class 'numpy.ndarray'>
(784, 60000)
<class 'numpy.ndarray'>
(10, 60000)
<class 'numpy.ndarray'>
(784, 10000)
<class 'numpy.ndarray'>
(10, 10000)


In [None]:
# A quick check that things are as they should be:

i = 12
plt.imshow(X_train[:,i].reshape(28,28), cmap = matplotlib.cm.binary)
plt.axis("off")
plt.show()
Y_train[:,i]

In [None]:
#So let’s define:

def compute_multiclass_loss(Y, Y_hat):

    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    L = -(1/m) * L_sum

    return L

In [None]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

### one hidden layer

In [None]:
# 4.5 Build & Train

n_x = X_train.shape[0]
n_h = 64
learning_rate = 1

W1 = np.random.randn(n_h, n_x)
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(digits, n_h)
b2 = np.zeros((digits, 1))

X = X_train
Y = Y_train

for i in range(2000):

    Z1 = np.matmul(W1,X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.matmul(W2,A1) + b2
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)

    cost = compute_multiclass_loss(Y, A2)

    dZ2 = A2-Y
    dW2 = (1./m) * np.matmul(dZ2, A1.T)
    db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.matmul(W2.T, dZ2)
    dZ1 = dA1 * sigmoid(Z1) * (1 - sigmoid(Z1))
    dW1 = (1./m) * np.matmul(dZ1, X.T)
    db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Let’s see how we did:

Z1 = np.matmul(W1, X_test) + b1
A1 = sigmoid(Z1)
Z2 = np.matmul(W2, A1) + b2
A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)

predictions = np.argmax(A2, axis=0)
labels = np.argmax(Y_test, axis=0)

index = 0
misclassifiedIndexes = []
for label, predict in zip(labels, predictions):
    if label != predict: 
        misclassifiedIndexes.append(index)
    index +=1

print(confusion_matrix(predictions, labels))
print(classification_report(predictions, labels))

In [None]:
print(len(misclassifiedIndexes))

In [None]:
print("Accuracy:  ", 1 - (818/10000))