In [1]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [2]:
import numpy as np

# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [3]:


# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [4]:
import numpy as np

# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

In [5]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

In [6]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagati on
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [7]:
from keras.datasets import mnist
from keras import utils

ModuleNotFoundError: No module named 'keras'

In [36]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [37]:
x_train = x_train.reshape(60000, 1, 28*28)

In [38]:
x_test = x_test.reshape(10000, 1, 28*28)

In [39]:
x_train = x_train/255

In [40]:
x_test = x_test/255

In [41]:
y_train = utils.to_categorical(y_train)

In [42]:
model_1 = Network()
model_1.add(FCLayer(28*28, 10))
model_1.add(ActivationLayer(tanh, tanh_prime))

In [43]:
model_1.use(mse,mse_prime)

In [44]:
model_1.fit(x_train,y_train,10, 0.0003)

epoch 1/10   error=0.666038
epoch 2/10   error=0.542137
epoch 3/10   error=0.479088
epoch 4/10   error=0.431169
epoch 5/10   error=0.391267
epoch 6/10   error=0.358236
epoch 7/10   error=0.330878
epoch 8/10   error=0.307824
epoch 9/10   error=0.287984
epoch 10/10   error=0.270645


In [45]:
def eval(model, x, y):
  prediction = model.predict(x)
  acc = 0
  for i in range(len(y)):
    pred = np.argmax(prediction[i][0])
    if pred == y[i]:
      acc += 1

  return acc/len(y)

In [46]:
eval(model_1, x_test, y_test)

0.5393

In [47]:
model_2 = Network()
model_2.add(FCLayer(28*28, 20))
model_2.add(ActivationLayer(tanh, tanh_prime))
model_2.add(FCLayer(20, 10))
model_2.add(ActivationLayer(tanh, tanh_prime))

In [48]:
model_2.use(mse,mse_prime)

In [49]:
model_2.fit(x_train,y_train,10, 0.0003)

epoch 1/10   error=0.264866
epoch 2/10   error=0.112547
epoch 3/10   error=0.088710
epoch 4/10   error=0.081281
epoch 5/10   error=0.077621
epoch 6/10   error=0.075291
epoch 7/10   error=0.073546
epoch 8/10   error=0.072098
epoch 9/10   error=0.070822
epoch 10/10   error=0.069663


In [50]:
eval(model_2, x_test, y_test)

0.4808

In [51]:
model_3 = Network()
model_3.add(FCLayer(28*28, 20))
model_3.add(ActivationLayer(tanh, tanh_prime))
model_3.add(FCLayer(20, 15))
model_3.add(ActivationLayer(tanh, tanh_prime))
model_3.add(FCLayer(15, 10))
model_3.add(ActivationLayer(tanh, tanh_prime))

In [52]:
model_3.use(mse,mse_prime)

In [53]:
model_3.fit(x_train,y_train,10, 0.0003)

epoch 1/10   error=0.175813
epoch 2/10   error=0.092494
epoch 3/10   error=0.083693
epoch 4/10   error=0.079647
epoch 5/10   error=0.076697
epoch 6/10   error=0.074275
epoch 7/10   error=0.072222
epoch 8/10   error=0.070457
epoch 9/10   error=0.068918
epoch 10/10   error=0.067552


In [54]:
eval(model_3, x_test, y_test)

0.5592

# Assignmet 5 Work

In [55]:
model_4 = Network()
model_4.add(FCLayer(28*28, 10))
model_4.add(ActivationLayer(tanh, tanh_prime))
model_4.use(mse,mse_prime)
model_4.fit(x_train,y_train,10, 0.0003)
eval(model_4, x_test, y_test)

epoch 1/10   error=0.663749
epoch 2/10   error=0.547334
epoch 3/10   error=0.481176
epoch 4/10   error=0.430406
epoch 5/10   error=0.388244
epoch 6/10   error=0.354013
epoch 7/10   error=0.326046
epoch 8/10   error=0.302461
epoch 9/10   error=0.282209
epoch 10/10   error=0.264593


0.533

In [55]:
 # learning_rate = 0.003 - 0.0003, epoch = 5 - 20

In [56]:
model_5 = Network()
model_5.add(FCLayer(28*28, 10))
model_5.add(ActivationLayer(tanh, tanh_prime))
model_5.use(mse,mse_prime)
model_5.fit(x_train,y_train,5, 0.003)
eval(model_5, x_test, y_test)

epoch 1/5   error=0.432023
epoch 2/5   error=0.218201
epoch 3/5   error=0.144634
epoch 4/5   error=0.108433
epoch 5/5   error=0.087931


0.7322

In [63]:
model_6 = Network()
model_6.add(FCLayer(28*28, 10))
model_6.add(ActivationLayer(tanh, tanh_prime))
model_6.use(mse,mse_prime)
model_6.fit(x_train,y_train,10, 0.003)
eval(model_6, x_test, y_test)

epoch 1/10   error=0.417652
epoch 2/10   error=0.209368
epoch 3/10   error=0.139392
epoch 4/10   error=0.105417
epoch 5/10   error=0.086211
epoch 6/10   error=0.074387
epoch 7/10   error=0.066681
epoch 8/10   error=0.061434
epoch 9/10   error=0.057726
epoch 10/10   error=0.055021


0.8096

In [62]:
model_7 = Network()
model_7.add(FCLayer(28*28, 10))
model_7.add(ActivationLayer(tanh, tanh_prime))
model_7.use(mse,mse_prime)
model_7.fit(x_train,y_train,20, 0.003)
eval(model_7, x_test, y_test)

epoch 1/20   error=0.404282
epoch 2/20   error=0.205108
epoch 3/20   error=0.140616
epoch 4/20   error=0.107719
epoch 5/20   error=0.088320
epoch 6/20   error=0.076117
epoch 7/20   error=0.068075
epoch 8/20   error=0.062556
epoch 9/20   error=0.058631
epoch 10/20   error=0.055751
epoch 11/20   error=0.053579
epoch 12/20   error=0.051900
epoch 13/20   error=0.050575
epoch 14/20   error=0.049509
epoch 15/20   error=0.048638
epoch 16/20   error=0.047915
epoch 17/20   error=0.047307
epoch 18/20   error=0.046791
epoch 19/20   error=0.046347
epoch 20/20   error=0.045963


0.8234

In [64]:
model_8 = Network()
model_8.add(FCLayer(28*28, 10))
model_8.add(ActivationLayer(tanh, tanh_prime))
model_8.use(mse,mse_prime)
model_8.fit(x_train,y_train,20, 0.002)
eval(model_8, x_test, y_test)

epoch 1/20   error=0.469549
epoch 2/20   error=0.276007
epoch 3/20   error=0.195512
epoch 4/20   error=0.150295
epoch 5/20   error=0.121597
epoch 6/20   error=0.102458
epoch 7/20   error=0.089214
epoch 8/20   error=0.079749
epoch 9/20   error=0.072797
epoch 10/20   error=0.067571
epoch 11/20   error=0.063562
epoch 12/20   error=0.060432
epoch 13/20   error=0.057947
epoch 14/20   error=0.055946
epoch 15/20   error=0.054312
epoch 16/20   error=0.052962
epoch 17/20   error=0.051833
epoch 18/20   error=0.050879
epoch 19/20   error=0.050065
epoch 20/20   error=0.049364


0.8174

In [65]:
model_9 = Network()
model_9.add(FCLayer(28*28, 10))
model_9.add(ActivationLayer(tanh, tanh_prime))
model_9.use(mse,mse_prime)
model_9.fit(x_train,y_train,15, 0.002)
eval(model_9, x_test, y_test)

epoch 1/15   error=0.453458
epoch 2/15   error=0.263784
epoch 3/15   error=0.187924
epoch 4/15   error=0.145686
epoch 5/15   error=0.119296
epoch 6/15   error=0.101479
epoch 7/15   error=0.088870
epoch 8/15   error=0.079686
epoch 9/15   error=0.072851
epoch 10/15   error=0.067667
epoch 11/15   error=0.063667
epoch 12/15   error=0.060530
epoch 13/15   error=0.058032
epoch 14/15   error=0.056014
epoch 15/15   error=0.054364


0.8118

In [71]:
model_10 = Network()
model_10.add(FCLayer(28*28, 10))
model_10.add(ActivationLayer(tanh, tanh_prime))
model_10.use(mse,mse_prime)
model_10.fit(x_train,y_train,20, 0.001)
eval(model_10, x_test, y_test)

epoch 1/20   error=0.563057
epoch 2/20   error=0.379469
epoch 3/20   error=0.294327
epoch 4/20   error=0.240519
epoch 5/20   error=0.203598
epoch 6/20   error=0.176575
epoch 7/20   error=0.155807
epoch 8/20   error=0.139318
epoch 9/20   error=0.125943
epoch 10/20   error=0.114940
epoch 11/20   error=0.105795
epoch 12/20   error=0.098136
epoch 13/20   error=0.091676
epoch 14/20   error=0.086195
epoch 15/20   error=0.081520
epoch 16/20   error=0.077510
epoch 17/20   error=0.074054
epoch 18/20   error=0.071060
epoch 19/20   error=0.068454
epoch 20/20   error=0.066176


0.7821

In [74]:
model_11 = Network()
model_11.add(FCLayer(28*28, 10))
model_11.add(ActivationLayer(tanh, tanh_prime))
model_11.use(mse,mse_prime)
model_11.fit(x_train,y_train,15, 0.003)
eval(model_11, x_test, y_test)

epoch 1/15   error=0.433091
epoch 2/15   error=0.217418
epoch 3/15   error=0.144607
epoch 4/15   error=0.108563
epoch 5/15   error=0.088049
epoch 6/15   error=0.075445
epoch 7/15   error=0.067267
epoch 8/15   error=0.061724
epoch 9/15   error=0.057827
epoch 10/15   error=0.055000
epoch 11/15   error=0.052890
epoch 12/15   error=0.051278
epoch 13/15   error=0.050018
epoch 14/15   error=0.049013
epoch 15/15   error=0.048198


0.8201

In [76]:
model_12 = Network()
model_12.add(FCLayer(28*28, 10))
model_12.add(ActivationLayer(tanh, tanh_prime))
model_12.use(mse,mse_prime)
model_12.fit(x_train,y_train,10, 0.003)
eval(model_12, x_test, y_test)

epoch 1/10   error=0.417471
epoch 2/10   error=0.211008
epoch 3/10   error=0.142466
epoch 4/10   error=0.108739
epoch 5/10   error=0.089119
epoch 6/10   error=0.076716
epoch 7/10   error=0.068465
epoch 8/10   error=0.062767
epoch 9/10   error=0.058709
epoch 10/10   error=0.055738


0.802

# Custom class to find best combination

In [81]:
def bold_text(text):
  return "\033[1m" + text + "\033[0m"

In [87]:
learning_rates = [0.003, 0.001, 0.002, 0.0005, 0.0004, 0.0003]
epochs = [5, 10, 15, 20]
accuracy_1 = 0
lr_1 = None
epochs_1 = None

for lr in learning_rates:
    for ep in epochs:
        model = Network()
        model.add(FCLayer(28*28, 10))
        model.add(ActivationLayer(tanh, tanh_prime))
        model.use(mse, mse_prime)
        model.fit(x_train, y_train, ep, lr)
        accuracy = eval(model, x_test, y_test)
        print(bold_text("Model with Learning Rate: {} and Epochs: {} has Accuracy: {}  " .format(lr, ep, accuracy)))
        if accuracy > accuracy_1:
            accuracy_1 = accuracy
            lr_1 = lr
            epochs_1 = ep

print(bold_text("Model with Learning Rate: {}, Epochs: {} has best Accuracy: {}".format(lr_1, epochs_1, accuracy_1)))

epoch 1/5   error=0.415367
epoch 2/5   error=0.207765
epoch 3/5   error=0.140478
epoch 4/5   error=0.106692
epoch 5/5   error=0.087154
[1mModel with Learning Rate: 0.003 and Epochs: 5 has Accuracy: 0.7405  [0m
epoch 1/10   error=0.413581
epoch 2/10   error=0.208386
epoch 3/10   error=0.140258
epoch 4/10   error=0.106257
epoch 5/10   error=0.086652
epoch 6/10   error=0.074544
epoch 7/10   error=0.066683
epoch 8/10   error=0.061357
epoch 9/10   error=0.057612
epoch 10/10   error=0.054891
[1mModel with Learning Rate: 0.003 and Epochs: 10 has Accuracy: 0.8081  [0m
epoch 1/15   error=0.414798
epoch 2/15   error=0.207773
epoch 3/15   error=0.139511
epoch 4/15   error=0.105921
epoch 5/15   error=0.086618
epoch 6/15   error=0.074646
epoch 7/15   error=0.066838
epoch 8/15   error=0.061528
epoch 9/15   error=0.057781
epoch 10/15   error=0.055049
epoch 11/15   error=0.053000
epoch 12/15   error=0.051424
epoch 13/15   error=0.050183
epoch 14/15   error=0.049187
epoch 15/15   error=0.048374
[1

# Conclusion

I have performed various combinations of Learning rate and Epochs sizes on model, among all of them I have observed that high epoch size is performing well as compared to lesss epochs and this applies to learning rate as well.
Model with Learning Rate 0.003 and Epoch 20 gave the highest accuracy of ~0.83