In [3]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
np.random.seed(42)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
X_train = X_train.reshape((X_train.shape[0], 28*28)).astype('float32').T
X_test = X_test.reshape((X_test.shape[0], 28*28)).astype('float32').T

In [5]:
X_train = X_train / 255
X_test = X_test / 255

In [6]:
X_train.shape

(784, 60000)

In [5]:
print(y_train.shape)

(60000,)


In [7]:
def label_encoder(y):
    zeros = np.zeros((y.shape[0], np.max(y)+1))
    for i, num in enumerate(y):
        zeros[i][np.squeeze(num)] = 1
    return zeros.T

In [8]:
def initialize_parameters(n_x, n_h=10, n_y=10): # (number of inputs, number of units in hidden, number of outputs)
    # Need 2 sets of parameters for 2-layer network
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
    }
    return parameters

In [9]:
def relu(Z):
    return np.maximum(0,Z)

In [10]:
def sigmoid(Z):
    sigmoid = 1 / (1 + np.exp(-Z))
    return sigmoid

In [11]:
def softmax(Z):
    softmax = np.exp(Z) / sum(np.exp(Z))
    return softmax

In [12]:
def forward_propagation(X, params):
    W1, b1, W2, b2 = params.values()

    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
    }
    return A2, cache

In [33]:
params = initialize_parameters(X_train.shape[0])
A2, cache = forward_propagation(X_train, params)
A2.shape

(10, 60000)

### Back Propagation

In [14]:
def relu_deriv(Z):
    return Z > 0

In [15]:
def back_propagation(params, cache, X, y):
    W1, b1, W2, b2 = params.values()
    Z1, A1, Z2, A2 = cache.values()
    
    m = X.shape[1]
    y = label_encoder(y)
    
    dZ2 = A2 - y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.dot(W2.T, dZ2) * relu_deriv(Z1)
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    grad_params = {
        'dZ2': dZ2,
        'dW2': dW2,
        'db2': db2,
        'dZ1': dZ1,
        'dW1': dW1,
        'db1': db1
    }

    return grad_params

In [16]:
def update_parameters(params, gradients, learning_rate):
    W1 = params['W1'].copy()
    b1 = params['b1'].copy()
    W2 = params['W2'].copy()
    b2 = params['b2'].copy()
    
    params['W1'] = W1 - learning_rate * gradients['dW1']
    params['b1'] = b1 - learning_rate * gradients['db1']
    params['W2'] = W2 - learning_rate * gradients['dW2']
    params['b2'] = b2 - learning_rate * gradients['db2']

### Putting together the model

In [17]:
def get_predictions(A2):
    return np.argmax(A2,0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [30]:
def model(X, y, learning_rate = 0.1, num_iterations = 3000):
    parameters = initialize_parameters(X.shape[0])
    for i in range(num_iterations):  
        A2, cache = forward_propagation(X, parameters)
        curr_cost = compute_cost(A2,y)

        gradients = back_propagation(parameters, cache, X, y)
        
        update_parameters(parameters, gradients, learning_rate)
        
        if i % 10 == 0:
            print(f'Accuracy for iteration {i}: {get_accuracy(get_predictions(A2), y)}')
    return parameters, costs

In [31]:
parameters, costs = model(X_train, y_train, num_iterations = 400)

[7 6 6 ... 6 7 6] [5 0 4 ... 5 6 8]
Accuracy for iteration 0: 0.0762
[7 7 1 ... 1 7 1] [5 0 4 ... 5 6 8]
Accuracy for iteration 10: 0.1533
[7 1 1 ... 1 7 1] [5 0 4 ... 5 6 8]
Accuracy for iteration 20: 0.1518


KeyboardInterrupt: 

In [25]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    return A2

def accuracy(y_hat, y):
    return np.sum(y_hat == y) / y.shape[0]

In [26]:
y_hat = predict(parameters, X_test)

In [29]:
print(get_accuracy(get_predictions(y_hat), y_test))

[7 2 1 ... 4 5 6] [7 2 1 ... 4 5 6]
0.885


In [24]:
y_test.shape

(10000,)