In [1]:
import numpy as np

* Base Layer

In [2]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def fwd_prop(self, input):
        raise NotImplementedError

    def back_prop(self, op_error, lr_rate):
        raise NotImplementedError

* Defining the Fully Connected Layer and the functions for forward and backward propogation

In [3]:
class FullyConLayer(Layer):
    def __init__(self, ip_size, op_size):
        self.weights = np.random.rand(ip_size, op_size) - 0.5
        self.bias = np.random.rand(1, op_size) - 0.5

    def fwd_prop(self, ip_data):
        self.input = ip_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def back_prop(self, op_error, lr_rate):
        input_error = np.dot(op_error, self.weights.T)
        w_error = np.dot(self.input.T, op_error)
        self.weights = self.weights - lr_rate * w_error
        self.bias = self.bias - lr_rate * op_error
        return input_error

* Defining the Activation layer and the functions for forward and backward propogation

In [4]:
class ActLayer(Layer):
    def __init__(self, act_func, act_func_prime):
        self.act_func = act_func
        self.act_func_prime = act_func_prime

    def fwd_prop(self, ip_data):
        self.input = ip_data
        self.output = self.act_func(self.input)
        return self.output

    def back_prop(self, op_error, lr_rate):
        return self.act_func_prime(self.input) * op_error

* Defining the Activation functions for forward and Backward derivatives

In [5]:
import numpy as np

# active function and it's backward derivative
def tanh(x):
    return np.tanh(x);

def tanh_back(x):
    return 1-np.tanh(x)**2;

#------------------------------------------------
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_back(x):
    return np.exp(-x) / (1 + np.exp(-x))**2

#------------------------------------------------

def relu(x):
    return np.maximum(x, 0)

def relu_back(x):
    return np.array(x >= 0).astype('int')

* Defining the class for Cross Entropy loss

In [6]:
import numpy as np

class CrossEntro:
    def forward(self, X, y):
        self.m = y.shape[0]
        self.p = softmax(X)
        cross_entropy = -np.log(self.p[range(self.m), y])
        loss = np.sum(cross_entropy) / self.m
        return loss
    
    def backward(self, X, y):
        y_idx = y.argmax()        
        grad = softmax(X)
        grad[range(self.m), y] -= 1
        grad /= self.m
        return grad
    
def mse(y, y_pred):
    return np.mean(np.power(y-y_pred, 2));

def mse_back(y, y_pred):
    return 2*(y_pred-y)/y.size;

* Defining the Sequential Class which is the pipeline incorporates all the defined functions

In [7]:
class Sequential:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    def add(self, layer):
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    def predict(self, ip_data):
        samples = len(ip_data)
        result = []

        for i in range(samples):
            output = ip_data[i]
            for layer in self.layers:
                output = layer.fwd_prop(output)
            result.append(output)

        return result

    def fit(self, x_train, y_train, epochs, lr_rate):
        samples = len(x_train)

        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.fwd_prop(output)

                err += self.loss(y_train[j], output)

                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.back_prop(error, lr_rate)

            err /= samples
            print(f'epoch: {i+1}/{epochs} error={err}')

* Training the model on XOR data and testing

In [68]:
# training data
X_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# Initialise Squential class
XOR_model = Sequential()
XOR_model.add(FullyConLayer(2, 3))
XOR_model.add(ActLayer(tanh, tanh_back))
XOR_model.add(FullyConLayer(3, 1))
XOR_model.add(ActLayer(tanh, tanh_back))

# initializing the losses and training the model
XOR_model.use(mse, mse_back)
XOR_model.fit(X_train, y_train, epochs=1000, lr_rate=0.1)

epoch: 1/1000 error=0.34337213312371573
epoch: 2/1000 error=0.30623378872505536
epoch: 3/1000 error=0.2981638018921073
epoch: 4/1000 error=0.29507896912321085
epoch: 5/1000 error=0.2933269776264407
epoch: 6/1000 error=0.2920372481553512
epoch: 7/1000 error=0.29093649123150006
epoch: 8/1000 error=0.28992011404512313
epoch: 9/1000 error=0.28894145445385744
epoch: 10/1000 error=0.2879768550505277
epoch: 11/1000 error=0.28701279188784307
epoch: 12/1000 error=0.2860405418969009
epoch: 13/1000 error=0.28505379026990757
epoch: 14/1000 error=0.2840475025652433
epoch: 15/1000 error=0.28301737803415944
epoch: 16/1000 error=0.28195958162756785
epoch: 17/1000 error=0.28087061355105297
epoch: 18/1000 error=0.2797472484308859
epoch: 19/1000 error=0.27858651078830715
epoch: 20/1000 error=0.27738567032088235
epoch: 21/1000 error=0.2761422487495466
epoch: 22/1000 error=0.27485403406705927
epoch: 23/1000 error=0.27351910001848273
epoch: 24/1000 error=0.2721358295840214
epoch: 25/1000 error=0.27070294162

epoch: 528/1000 error=0.000524470803680082
epoch: 529/1000 error=0.000523020090659819
epoch: 530/1000 error=0.0005215769143112409
epoch: 531/1000 error=0.0005201412175312359
epoch: 532/1000 error=0.0005187129437836846
epoch: 533/1000 error=0.0005172920370925365
epoch: 534/1000 error=0.000515878442034918
epoch: 535/1000 error=0.0005144721037344193
epoch: 536/1000 error=0.0005130729678544239
epoch: 537/1000 error=0.0005116809805915535
epoch: 538/1000 error=0.0005102960886692117
epoch: 539/1000 error=0.0005089182393312039
epoch: 540/1000 error=0.0005075473803354793
epoch: 541/1000 error=0.0005061834599479246
epoch: 542/1000 error=0.0005048264269362803
epoch: 543/1000 error=0.0005034762305641165
epoch: 544/1000 error=0.0005021328205849148
epoch: 545/1000 error=0.0005007961472362251
epoch: 546/1000 error=0.0004994661612339041
epoch: 547/1000 error=0.0004981428137664498
epoch: 548/1000 error=0.000496826056489378
epoch: 549/1000 error=0.0004955158415197301
epoch: 550/1000 error=0.000494212121

In [69]:
# testing the model 
y_pred = XOR_model.predict(X_train)
print('Predictions:\n', np.round(np.array(y_pred).reshape(-1,1)).astype(int))

print('Actuals:\n', y_train)

Predictions:
 [[0]
 [1]
 [1]
 [0]]
Actuals:
 [[[0]]

 [[1]]

 [[1]]

 [[0]]]


* Saving the model for future use

In [41]:
import pickle

def saving_model(nn_model, file_name):
    pickle.dump(nn_model, open(file_name, 'wb'))

saving_model(XOR_model, 'XOR_solved.w')

## Training the model on MNIST dataset

In [89]:
from keras.datasets import mnist
from keras.utils import np_utils

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# train dataset preprocessing
X_train = X_train.reshape(X_train.shape[0], 1, 28*28)
X_train = X_train.astype('float32')
X_train /= 255
y_train = np_utils.to_categorical(y_train)

# test dataset preprocessing
X_test = X_test.reshape(X_test.shape[0], 1, 28*28)
X_test = X_test.astype('float32')
X_test /= 255
y_test = np_utils.to_categorical(y_test)

# Defining the neural network layers
mnist_model = Sequential()
mnist_model.add(FullyConLayer(28*28, 100))
mnist_model.add(ActLayer(tanh, tanh_back))
mnist_model.add(FullyConLayer(100, 50))
mnist_model.add(ActLayer(tanh, tanh_back))
mnist_model.add(FullyConLayer(50, 10))
mnist_model.add(ActLayer(tanh, tanh_back))

# train the model on mnist train set
mnist_model.use(mse, mse_back)
mnist_model.fit(X_train[0:1000], y_train[0:1000], epochs=35, lr_rate=0.1)


epoch: 1/35 error=0.24551294372210195
epoch: 2/35 error=0.10305116556541337
epoch: 3/35 error=0.08191568016366141
epoch: 4/35 error=0.06943232875969899
epoch: 5/35 error=0.059627881984354744
epoch: 6/35 error=0.05121772837278282
epoch: 7/35 error=0.044077427930826384
epoch: 8/35 error=0.0379760802899684
epoch: 9/35 error=0.03324317847177355
epoch: 10/35 error=0.02959530849113631
epoch: 11/35 error=0.026709091533013117
epoch: 12/35 error=0.02424382419818279
epoch: 13/35 error=0.022286899928113703
epoch: 14/35 error=0.020465167402540178
epoch: 15/35 error=0.018862694463439644
epoch: 16/35 error=0.017502447817286458
epoch: 17/35 error=0.016416733008211677
epoch: 18/35 error=0.015483241105835476
epoch: 19/35 error=0.014633016221215803
epoch: 20/35 error=0.013934790773665747
epoch: 21/35 error=0.01331859206029423
epoch: 22/35 error=0.0127628361397744
epoch: 23/35 error=0.012244954031920871
epoch: 24/35 error=0.011777108836217114
epoch: 25/35 error=0.011316551868634813
epoch: 26/35 error=0.0

In [155]:
# test the model on mnist test set and print some sample

y_pred = mnist_model.predict(X_test)
y_pred = np.argmax((np.array(y_pred).reshape(10000, 10)), axis=1)
print("predicted values : ")
print(y_pred[0:10], end="\n")
print("true values : ")
print(np.argmax(y_test, axis=1)[0:10])


# Checking the accuracy of the model

def check_accuracy(y_true, y_pred):
    return np.mean(y_pred == y_true)

print('Accuracy:', check_accuracy(np.argmax(y_test, axis=1), y_pred))

predicted values : 
[7 2 1 0 4 1 4 9 2 9]
true values : 
[7 2 1 0 4 1 4 9 5 9]
Accuracy: 0.7818


* checking the accuracy of the model

* Changing the learning rate to 0.01 and training the model for more epochs 40

In [154]:
# Defining the neural network layers
mnist_model_2 = Sequential()
mnist_model_2.add(FullyConLayer(28*28, 100))
mnist_model_2.add(ActLayer(tanh, tanh_back))
mnist_model_2.add(FullyConLayer(100, 50))
mnist_model_2.add(ActLayer(tanh, tanh_back))
mnist_model_2.add(FullyConLayer(50, 10))
mnist_model_2.add(ActLayer(tanh, tanh_back))

# train the model on mnist train set
mnist_model_2.use(mse, mse_back)
mnist_model_2.fit(X_train[0:1000], y_train[0:1000], epochs=40, lr_rate=0.01)

epoch: 1/40 error=0.55007203959056
epoch: 2/40 error=0.3470325520855173
epoch: 3/40 error=0.23637848214423227
epoch: 4/40 error=0.1713917627989513
epoch: 5/40 error=0.132059362235035
epoch: 6/40 error=0.10778082849118847
epoch: 7/40 error=0.09207234727115031
epoch: 8/40 error=0.08148698100746875
epoch: 9/40 error=0.07405286292757546
epoch: 10/40 error=0.06859740860174293
epoch: 11/40 error=0.06443102975098167
epoch: 12/40 error=0.061144011056642224
epoch: 13/40 error=0.058471193338339125
epoch: 14/40 error=0.05623404486532755
epoch: 15/40 error=0.0543141935670257
epoch: 16/40 error=0.05263214199342901
epoch: 17/40 error=0.05113422995334454
epoch: 18/40 error=0.04978395753884527
epoch: 19/40 error=0.04855489899288085
epoch: 20/40 error=0.047425833531899095
epoch: 21/40 error=0.04637882831600851
epoch: 22/40 error=0.04539897752284636
epoch: 23/40 error=0.044474327909048546
epoch: 24/40 error=0.043595621596876175
epoch: 25/40 error=0.042755946299708886
epoch: 26/40 error=0.041950382618991

In [156]:
# test the model on mnist test set and print some sample

y_pred = mnist_model_2.predict(X_test)
y_pred = np.argmax((np.array(y_pred).reshape(10000, 10)), axis=1)
print("predicted values : ")
print(y_pred[0:10], end="\n")
print("true values : ")
print(np.argmax(y_test, axis=1)[0:10])

# Checking the accuracy of the model

print('Accuracy:', check_accuracy(np.argmax(y_test, axis=1), y_pred))

predicted values : 
[7 0 1 0 4 1 5 5 6 9]
true values : 
[7 2 1 0 4 1 4 9 5 9]
Accuracy: 0.6906


* Changing the learning rate 0.001 and adding more layers & training the model for less epochs

In [165]:
# Defining the neural network layers
mnist_model_3 = Sequential()
mnist_model_3.add(FullyConLayer(28*28, 100))
mnist_model_3.add(ActLayer(tanh, tanh_back))
mnist_model_3.add(FullyConLayer(100, 50))
mnist_model_3.add(ActLayer(tanh, tanh_back))
mnist_model_3.add(FullyConLayer(50, 25))
mnist_model_3.add(ActLayer(tanh, tanh_back))
mnist_model_3.add(FullyConLayer(25, 10))
mnist_model_3.add(ActLayer(tanh, tanh_back))

# train the model on mnist train set
mnist_model_3.use(mse, mse_back)
mnist_model_3.fit(X_train[0:1000], y_train[0:1000], epochs=20, lr_rate=0.01)

epoch: 1/20 error=0.36159267598667993
epoch: 2/20 error=0.171647170308955
epoch: 3/20 error=0.11275175699638579
epoch: 4/20 error=0.09178352959424493
epoch: 5/20 error=0.08199428462243918
epoch: 6/20 error=0.07625534423797073
epoch: 7/20 error=0.07226927442912434
epoch: 8/20 error=0.06919006596561302
epoch: 9/20 error=0.06664330891267237
epoch: 10/20 error=0.06444743392892953
epoch: 11/20 error=0.06251688088222211
epoch: 12/20 error=0.06081045780787019
epoch: 13/20 error=0.059295734069074776
epoch: 14/20 error=0.05793730667478839
epoch: 15/20 error=0.056700512101865566
epoch: 16/20 error=0.055555817149054124
epoch: 17/20 error=0.054480019660823185
epoch: 18/20 error=0.05345567602977527
epoch: 19/20 error=0.05247013845101409
epoch: 20/20 error=0.051514719551094455


In [166]:
# test the model on mnist test set and print some sample

y_pred = mnist_model_3.predict(X_test)
y_pred = np.argmax((np.array(y_pred).reshape(10000, 10)), axis=1)
print("predicted values : ")
print(y_pred[0:10], end="\n")
print("true values : ")
print(np.argmax(y_test, axis=1)[0:10])

# Checking the accuracy of the model
print('Accuracy:', check_accuracy(np.argmax(y_test, axis=1), y_pred))

predicted values : 
[7 5 1 0 4 1 4 9 2 7]
true values : 
[7 2 1 0 4 1 4 9 5 9]
Accuracy: 0.5915


* training the model again with sigmoid activation function

In [167]:
# Defining the neural network layers
mnist_model_4 = Sequential()
mnist_model_4.add(FullyConLayer(28*28, 100))
mnist_model_4.add(ActLayer(sigmoid, sigmoid_back))
mnist_model_4.add(FullyConLayer(100, 50))
mnist_model_4.add(ActLayer(sigmoid, sigmoid_back))
mnist_model_4.add(FullyConLayer(50, 25))
mnist_model_4.add(ActLayer(sigmoid, sigmoid_back))
mnist_model_4.add(FullyConLayer(25, 10))
mnist_model_4.add(ActLayer(sigmoid, sigmoid_back))

# train the model on mnist train set
mnist_model_4.use(mse, mse_back)
mnist_model_4.fit(X_train[0:1000], y_train[0:1000], epochs=20, lr_rate=0.1)

epoch: 1/20 error=0.10188016787406068
epoch: 2/20 error=0.08983237934743203
epoch: 3/20 error=0.08959921507096821
epoch: 4/20 error=0.08933994046723384
epoch: 5/20 error=0.08904034327853698
epoch: 6/20 error=0.08868626803355563
epoch: 7/20 error=0.08825999041501283
epoch: 8/20 error=0.08773804307679056
epoch: 9/20 error=0.08708775148106555
epoch: 10/20 error=0.08626424334029699
epoch: 11/20 error=0.08521328001007204
epoch: 12/20 error=0.08389061107611422
epoch: 13/20 error=0.08230236911121741
epoch: 14/20 error=0.08052926277379262
epoch: 15/20 error=0.07867198214261856
epoch: 16/20 error=0.07675761179579568
epoch: 17/20 error=0.0747190989236287
epoch: 18/20 error=0.07245211216316803
epoch: 19/20 error=0.0698900724758252
epoch: 20/20 error=0.06705998549722457


In [168]:
# test the model on mnist test set and print some sample

y_pred = mnist_model_4.predict(X_test)
y_pred = np.argmax((np.array(y_pred).reshape(10000, 10)), axis=1)
print("predicted values : ")
print(y_pred[0:10], end="\n")
print("true values : ")
print(np.argmax(y_test, axis=1)[0:10])

# Checking the accuracy of the model
print('Accuracy:', check_accuracy(np.argmax(y_test, axis=1), y_pred))mnist_model_4

predicted values : 
[7 6 1 0 7 1 7 7 6 7]
true values : 
[7 2 1 0 4 1 4 9 5 9]
Accuracy: 0.5614


## The good fit model trained on learning rate of 0.1 with 35 epochs as it is giving 78% of the accuracy which is highest compared to other models