In [59]:
import matplotlib.pyplot as plt
import numpy as np
from keras.datasets import mnist

# Load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert data to float
x_train = x_train.astype(float)
y_train = y_train.astype(float)
x_test = x_test.astype(float)
y_test = y_test.astype(float)

# Flatten the input images
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Standardize the input data
x_train = (x_train - np.mean(x_train)) / np.std(x_train)
x_test = (x_test - np.mean(x_test)) / np.std(x_test)

def init_params(number_of_layers,size_of_layers, output_layer):
    weights = []

    for i in range(number_of_layers):
        if i == 0:
            w = int(size_of_layers[i])
            W1 = np.random.randn(w, x_train.shape[1])
            weights.append(W1)
        elif i == (number_of_layers - 1):
            output = np.random.randn(output_layer, len(weights[i - 1]))
            weights.append(output)
            break
        else:
            w = int(size_of_layers[i])
            W1 = np.random.randn(w, len(weights[i - 1]))
            weights.append(W1)
    return weights

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def normalization(x):
    return x / np.sum(x)

def MSE(actual, predicted):
    n = len(actual)
    mse = np.sum((actual - predicted) ** 2) / n
    return mse

def forward_prop(weights, X):
    Z = []
    A = []
    Z.append(weights[0].dot(X))
    A.append(sigmoid(Z[0]))
    for i in range(1, len(weights)):
        Z.append(weights[i].dot(A[i - 1]))
        A.append(sigmoid(Z[i]))
    A[-1] = normalization(A[-1])
    return Z, A

def sigmoid_deriv(z):
    return sigmoid(z) * (1 - sigmoid(z))

def backward_prop(Z, A, weights, alpha, X, train_label):
    dweights = [0] * len(weights)
    dZ = [0] * len(weights)

    # Output layer
    error = train_label - A[-1]
    dZ[-1] = error * sigmoid_deriv(Z[-1]) #dl/da * da/dz
    dweights[-1] = alpha * np.outer(dZ[-1], A[-2])

    # All hidden layers except the first one
    for i in range(len(weights) - 2, 0, -1):
        dZ[i] = weights[i + 1].T.dot(dZ[i + 1]) * sigmoid_deriv(Z[i])
        dweights[i] = alpha * np.outer(dZ[i], A[i - 1])

    # The first hidden layer
    dZ[0] = weights[1].T.dot(dZ[1]) * sigmoid_deriv(Z[0])
    dweights[0] = alpha * np.outer(dZ[0], X)

    return dweights

def update_params(weights, dweights, alpha):
    for i in range(len(weights)):
        weights[i] = weights[i] + dweights[i]
    return weights

def make_predictions(X, weights):
    _, A = forward_prop(weights, X)
    return A[-1]

def get_accuracy(actual, predicted):
    correct = np.sum(np.argmax(actual , axis=1) == np.argmax(predicted , axis= 1))
    accuracy = correct / len(actual) * 100.0
    return accuracy

def training_fit(X, Y, hidden_layers,size_of_layers):
    
    weights = init_params(hidden_layers,size_of_layers, 10)

    #actual = np.argmax(Y, axis=1)
    alpha, iterations = 0.01,10
    for j in range(iterations):
        print("Iteration:", j)
        predictions = []
        error = 0
        for i in range(len(X)):
            Z, A = forward_prop(weights, X[i])
            error += MSE(A[-1], Y[i])
            dweights = backward_prop(Z, A, weights, alpha, X[i], Y[i])
            weights = update_params(weights, dweights, alpha)
            predictions.append(make_predictions(X[i], weights))
            
        print("Error:", error / len(X))
        print("Training Accuracy:", get_accuracy(Y, predictions), "%")

    return weights

# One-hot encode the target labels
y_encoded_train = np.zeros((y_train.shape[0], 10))
y_encoded_train[np.arange(len(y_train)), y_train.astype(int)] = 1


In [60]:
def test(weights):
    y_encoded_test = np.zeros((y_test.shape[0], 10))
    y_encoded_test[np.arange(len(y_test)), y_test.astype(int)] = 1
    predictions = []
    for i in range(len(x_test)):
        predictions.append(make_predictions(x_test[i], weights))
    print("Test Accuracy:", get_accuracy(y_encoded_test, predictions), "%")
    

In [61]:
weights = training_fit(x_train, y_encoded_train, 2, [20  , 10])

Iteration: 0
Error: 0.07319728296523174
Training Accuracy: 39.955 %
Iteration: 1
Error: 0.044776628230403376
Training Accuracy: 72.74166666666667 %
Iteration: 2
Error: 0.03604315209072853
Training Accuracy: 77.67833333333334 %
Iteration: 3
Error: 0.03150128995555681
Training Accuracy: 80.27333333333333 %
Iteration: 4
Error: 0.028921398279062677
Training Accuracy: 81.62666666666667 %
Iteration: 5
Error: 0.027261392656562952
Training Accuracy: 82.595 %
Iteration: 6
Error: 0.026094203922115138
Training Accuracy: 83.21499999999999 %
Iteration: 7
Error: 0.02522565185647608
Training Accuracy: 83.69333333333333 %
Iteration: 8
Error: 0.024569793373119608
Training Accuracy: 84.04833333333333 %
Iteration: 9
Error: 0.024031312554807886
Training Accuracy: 84.355 %


In [62]:
test(weights)

Test Accuracy: 84.44 %


In [63]:
weights = training_fit(x_train, y_encoded_train, 3, [20 , 10 , 10])

Iteration: 0
Error: 0.07880019212464948
Training Accuracy: 40.038333333333334 %
Iteration: 1
Error: 0.059745422203045635
Training Accuracy: 63.66333333333334 %
Iteration: 2
Error: 0.04622097980165848
Training Accuracy: 73.63166666666666 %
Iteration: 3
Error: 0.0373576229839677
Training Accuracy: 78.86333333333333 %
Iteration: 4
Error: 0.03180508484888139
Training Accuracy: 81.55666666666667 %
Iteration: 5
Error: 0.028244974664994735
Training Accuracy: 83.28333333333333 %
Iteration: 6
Error: 0.025833135228933095
Training Accuracy: 84.46833333333333 %
Iteration: 7
Error: 0.0241282160796598
Training Accuracy: 85.29666666666667 %
Iteration: 8
Error: 0.02288426392353467
Training Accuracy: 85.92666666666666 %
Iteration: 9
Error: 0.02192455588190358
Training Accuracy: 86.38666666666667 %


In [64]:
test(weights)

Test Accuracy: 85.75 %


In [65]:
weights = training_fit(x_train, y_encoded_train, 3, [10 , 20 , 10])

Iteration: 0
Error: 0.0772424236909884
Training Accuracy: 40.733333333333334 %
Iteration: 1
Error: 0.0530564220394624
Training Accuracy: 68.11666666666667 %
Iteration: 2
Error: 0.039600970622651006
Training Accuracy: 77.45 %
Iteration: 3
Error: 0.032397544424319934
Training Accuracy: 81.42166666666667 %
Iteration: 4
Error: 0.028184195690423765
Training Accuracy: 83.52833333333334 %
Iteration: 5
Error: 0.02562394996615574
Training Accuracy: 84.69166666666666 %
Iteration: 6
Error: 0.02410553095713617
Training Accuracy: 85.32 %
Iteration: 7
Error: 0.02328568457475208
Training Accuracy: 85.61833333333333 %
Iteration: 8
Error: 0.022646024815252352
Training Accuracy: 85.76833333333333 %
Iteration: 9
Error: 0.022355650856213723
Training Accuracy: 85.81333333333333 %


In [66]:
test(weights)

Test Accuracy: 85.74000000000001 %
