In [60]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
np.random.seed(42)

In [61]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [62]:
X_train = X_train.reshape((X_train.shape[0], 28*28)).astype('float32').T
X_test = X_test.reshape((X_test.shape[0], 28*28)).astype('float32').T

In [63]:
X_train = X_train / 255
X_test = X_test / 255

In [64]:
X_train.shape

(784, 60000)

In [65]:
def label_encoder(y):
    zeros = np.zeros((y.shape[0], np.max(y)+1))
    for i, num in enumerate(y):
        zeros[i][np.squeeze(num)] = 1
    return zeros.T

In [28]:
def initialize_parameters(n_x, n_h=10, n_y=10): # (number of inputs, number of units in hidden, number of outputs)
    # Need 2 sets of parameters for 2-layer network
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
    }
    return parameters

In [29]:
def relu(Z):
    return np.maximum(0,Z)

In [30]:
def sigmoid(Z):
    sigmoid = 1 / (1 + np.exp(-Z))
    return sigmoid

In [31]:
def softmax(Z):
    softmax = np.exp(Z) / sum(np.exp(Z))
    return softmax

In [32]:
def forward_propagation(X, params):
    W1, b1, W2, b2 = params.values()

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
    }
    return A2, cache

In [33]:
params = initialize_parameters(X_train.shape[0])
A2, cache = forward_propagation(X_train, params)
A2.shape

(10, 60000)

In [34]:
def compute_cost(A2, y):
    m = y.shape[0] 
    cost = -np.sum(y * np.log(A2)) / m

    return cost

In [35]:
compute_cost(A2, y_train)

13814.147802701613

### Back Propagation

In [36]:
def relu_deriv(Z):
    return Z > 0

In [66]:
def back_propagation(params, cache, X, y):
    W1, b1, W2, b2 = params.values()
    Z1, A1, Z2, A2 = cache.values()
    
    m = X.shape[1]
    y = label_encoder(y)
    
    dZ2 = A2 - y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.dot(W2.T, dZ2) * relu_deriv(Z1)
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    grad_params = {
        'dZ2': dZ2,
        'dW2': dW2,
        'db2': db2,
        'dZ1': dZ1,
        'dW1': dW1,
        'db1': db1
    }

    return grad_params

In [67]:
def update_parameters(params, gradients, learning_rate):
    
    params['W1'] -= learning_rate * gradients['dW1']
    params['b1'] -= learning_rate * gradients['db1']
    params['W2'] -= learning_rate * gradients['dW2']
    params['b2'] -= learning_rate * gradients['db2']

### Putting together the model

In [71]:
def get_predictions(A2):
    return np.argmax(A2,0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [72]:
def model(X, y, learning_rate = 0.1, num_iterations = 3000):
    parameters = initialize_parameters(X.shape[0])
    costs = []
    for i in range(num_iterations):  
        A2, cache = forward_propagation(X, parameters)
        curr_cost = compute_cost(A2,y)

        gradients = back_propagation(parameters, cache, X, y)
        
        update_parameters(parameters, gradients, learning_rate)
        
        if i % 100 == 0:
            costs.append(curr_cost)
            print(f'Cost for iteration {i}: {curr_cost}')
            print(f'Accuracy for iteration {i}: {get_accuracy(get_predictions(A2), y)}')
    return parameters, costs

In [73]:
parameters, costs = model(X_train, y_train, num_iterations = 2500)

Cost for iteration 0: 102.55574267067732
[7 6 6 ... 6 7 6] [5 0 4 ... 5 6 8]
Accuracy for iteration 0: 0.0762
Cost for iteration 100: 113.687410776892
[3 0 4 ... 8 0 6] [5 0 4 ... 5 6 8]
Accuracy for iteration 100: 0.5751
Cost for iteration 200: 212.7216821639505
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 200: 0.82365
Cost for iteration 300: 268.1081262203305
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 300: 0.8677166666666667
Cost for iteration 400: 298.4930574853925
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 400: 0.88375
Cost for iteration 500: 317.6917898051611
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 500: 0.8912166666666667
Cost for iteration 600: 330.76889958870976
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 600: 0.8975
Cost for iteration 700: 340.1318672904291
[3 0 4 ... 5 6 8] [5 0 4 ... 5 6 8]
Accuracy for iteration 700: 0.9014666666666666
Cost for iteration 800: 347.1753057754654
[3 0 4 ... 5 6 8] [

KeyboardInterrupt: 

In [74]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    return A2

def accuracy(y_hat, y):
    return np.sum(y_hat == y) / y.shape[1]

In [75]:
y_hat = predict(parameters, X_test)

In [76]:
print(accuracy(y_hat, y_test))

IndexError: tuple index out of range

In [55]:
df = pd.DataFrame(y_hat)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999
0,0.0004856827,0.0003074045,2e-06,0.9996222,0.0002503206,7.464623e-08,3.661559e-06,5.930513e-08,0.0002033052,1.67819e-05,...,6.645386e-08,6.535278e-06,0.0001170034,0.9968092,5.721046e-09,1.155522e-08,2.443515e-06,1.561463e-07,0.000174,3.090937e-05
1,2.016717e-08,1.260299e-06,0.977771,5.995946e-13,3.081777e-07,0.9881065,1.447871e-06,0.0009422509,2.795576e-07,1.332611e-09,...,9.415699e-08,1.831838e-08,1.748437e-10,1.39375e-14,0.9930713,1.612256e-07,6.584914e-07,1.775654e-07,2.3e-05,1.58943e-13
2,0.002034124,0.9858677,0.01003,4.8935e-05,0.001887505,0.001940448,3.688496e-07,0.0001723185,0.0001845765,2.756038e-07,...,0.0002014379,0.0005907356,2.405008e-06,1.558797e-07,0.004018688,0.998921,0.0002796025,8.724582e-06,1.9e-05,6.800232e-05
3,0.006842393,0.009040977,0.003745,3.693621e-05,0.0003457087,0.0025584,6.6698e-05,0.002901665,2.487063e-07,4.074279e-06,...,0.0005400591,0.03301396,9.008343e-06,3.008626e-07,0.0005074407,0.0009428806,0.9993043,3.620368e-05,0.000468,3.761023e-09
4,2.813634e-06,8.693558e-12,6.4e-05,1.457269e-08,0.963784,8.478498e-06,0.9733093,0.008496978,0.00258363,0.02641582,...,5.290313e-08,3.409153e-08,0.1363392,9.018921e-10,2.389315e-07,1.177902e-11,4.089741e-10,0.9827593,0.000624,8.08656e-07
