In [43]:
import numpy as np

In [44]:
from keras.datasets import mnist
from keras.utils import np_utils

In [45]:
from sklearn.metrics import classification_report

In [46]:
# inherit from base class Layer
class FCLayer:
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
        self.input = None
        self.output = None 

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [47]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        self.input = None
        self.output = None 

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [48]:
import numpy as np

# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;


In [49]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [59]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [60]:
# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [61]:
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255

In [62]:
y_train = np_utils.to_categorical(y_train)

In [63]:
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)


In [64]:
# Network
net = Network()
net.add(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

In [65]:

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...

net.use(mse, mse_prime)
print('Training the model')


Training the model


In [66]:
net.fit(x_train, y_train, epochs=10, learning_rate=0.1)

epoch 1/10   error=0.039714
epoch 2/10   error=0.019463
epoch 3/10   error=0.015540
epoch 4/10   error=0.013287
epoch 5/10   error=0.011739
epoch 6/10   error=0.010673
epoch 7/10   error=0.009794
epoch 8/10   error=0.009084
epoch 9/10   error=0.008481
epoch 10/10   error=0.007998


In [67]:
y_train_pred = net.predict(x_train)
y_train_pred = np.argmax(y_train_pred,axis=-1).reshape(-1,)

In [68]:
y_train = np.argmax(y_train,axis=-1)
print('Training Data Performance :\n')
print(classification_report(y_train,y_train_pred))

Training Data Performance :

              precision    recall  f1-score   support

           0       0.97      0.99      0.98      5923
           1       0.99      0.98      0.99      6742
           2       0.98      0.97      0.97      5958
           3       0.97      0.95      0.96      6131
           4       0.97      0.98      0.97      5842
           5       0.94      0.97      0.96      5421
           6       0.98      0.98      0.98      5918
           7       0.98      0.97      0.98      6265
           8       0.96      0.95      0.96      5851
           9       0.95      0.95      0.95      5949

    accuracy                           0.97     60000
   macro avg       0.97      0.97      0.97     60000
weighted avg       0.97      0.97      0.97     60000



In [69]:
y_test_pred = net.predict(x_test)
y_test_pred = np.argmax(y_test_pred,axis=-1).reshape(-1,)

In [70]:
y_test = np.argmax(y_test,axis=-1)
print('Test Data Performance :\n')
print(classification_report(y_test,y_test_pred))

Test Data Performance :

              precision    recall  f1-score   support

           0       0.96      0.99      0.97       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.94      0.96      1032
           3       0.93      0.95      0.94      1010
           4       0.95      0.96      0.96       982
           5       0.93      0.96      0.94       892
           6       0.96      0.97      0.97       958
           7       0.97      0.94      0.96      1028
           8       0.95      0.93      0.94       974
           9       0.94      0.92      0.93      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

