<a href="https://colab.research.google.com/github/karanm21/IITG_Research_Intern/blob/main/NN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# base class

class Layer:
    def __init__(self):
        self.input= None
        self.output= None

    # computes the output y for a given input x
    def forward_propagation( self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update params if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [None]:
import numpy as np

In [None]:
class FCLayer(Layer):
    # input_size= no. of input neurons
    # output_size= no. of output neurons

    def __init__(self, input_size, output_size):
        self.weights= np.random.rand(input_size, output_size)- 0.5
        self.bias= np.random.rand(1, output_size)- 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input= input_data
        self.output= np.dot(self.input, self.weights)+ self.bias

        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY.
    # Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error= np.dot(output_error, self.weights.T)
        weights_error= np.dot(self.input.T, output_error)

        # dbias= output_error

        # update params
        self.weights= self.weights- learning_rate* weights_error
        self.bias= self.bias- learning_rate*output_error

        return input_error

In [None]:
class ActivationLayer( Layer):
    def __init__(self, activation, activation_prime):
        self.activation= activation
        self.activation_prime= activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input= input_data
        self.output= self.activation(self.input)

        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no 'learnable' parameters.
    def backward_propagation( self, output_error, learning_rate):
        return self.activation_prime(self.input)* output_error

In [None]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1- np.tanh(x)**2

In [None]:
# loss function and its derivative

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred- y_true)/y_true.size

In [None]:
class Network:
    def __init__(self):
        self.layers= []
        self.loss= None
        self.loss_prime= None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss= loss
        self.loss_prime= loss_prime

    # predict output for a given input
    def predict(self, input_data):

        # sample dimension
        samples= len(input_data)
        result= []

        # run network over all samples
        for i in range(samples):
            #forward propagation
            output= input_data[i]

            for layer in self.layers:
                output= layer.forward_propagation(output)

            result.append(output)

        return result

    # training the network
    def fit(self, x_train, y_train, epochs, learning_rate):

        # sample dimension
        samples= len(x_train)

        #training loop
        for i in range(epochs):
            err= 0

            for j in range(samples):
                # forward propagation

                output= x_train[j]

                for layer in self.layers:
                    output= layer.forward_propagation(output)

                # compute loss
                err= err+ self.loss(y_train[j], output)

                error= self.loss_prime(y_train[j], output)

                for layer in reversed(self.layers):
                    error= layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples

            err = err/samples
            print('epoch %d/%d  error= %f' %(i+1, epochs,err))


In [None]:
# Solve XOR:

In [None]:
# training data
x_train= np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train= np.array([[[0]], [[1]], [[1]], [[0]]])

In [None]:
#network
net= Network()
net.add(FCLayer(2,3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))


In [None]:
#train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate= 0.1)

epoch 1/1000  error= 0.333777
epoch 2/1000  error= 0.299037
epoch 3/1000  error= 0.291114
epoch 4/1000  error= 0.288379
epoch 5/1000  error= 0.287154
epoch 6/1000  error= 0.286483
epoch 7/1000  error= 0.286051
epoch 8/1000  error= 0.285733
epoch 9/1000  error= 0.285476
epoch 10/1000  error= 0.285255
epoch 11/1000  error= 0.285055
epoch 12/1000  error= 0.284870
epoch 13/1000  error= 0.284695
epoch 14/1000  error= 0.284528
epoch 15/1000  error= 0.284368
epoch 16/1000  error= 0.284214
epoch 17/1000  error= 0.284065
epoch 18/1000  error= 0.283921
epoch 19/1000  error= 0.283782
epoch 20/1000  error= 0.283647
epoch 21/1000  error= 0.283517
epoch 22/1000  error= 0.283391
epoch 23/1000  error= 0.283270
epoch 24/1000  error= 0.283152
epoch 25/1000  error= 0.283038
epoch 26/1000  error= 0.282928
epoch 27/1000  error= 0.282822
epoch 28/1000  error= 0.282719
epoch 29/1000  error= 0.282620
epoch 30/1000  error= 0.282524
epoch 31/1000  error= 0.282431
epoch 32/1000  error= 0.282342
epoch 33/1000  er

In [None]:
out= net.predict(x_train)
print(out)

[array([[0.52253822]]), array([[0.52558047]]), array([[0.5161126]]), array([[0.51919202]])]


In [None]:
# Solve MNIST:

In [None]:
from keras.datasets import mnist
from keras.utils import np_utils

In [None]:
(x_train, y_train), (x_test, y_test)= mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
x_train[0].shape[0]

28

In [None]:
x_train= x_train.reshape(x_train.shape[0], 1, 28*28)
x_train= x_train.astype('float32')

x_train= x_train/255

In [None]:
#encoding

y_train= np_utils.to_categorical(y_train)

In [None]:
# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

In [None]:
# network

net= Network()
net.add(FCLayer(28*28, 100))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50,10))
net.add(ActivationLayer(tanh, tanh_prime))

In [None]:
#train:

net.use(mse, mse_prime)
net.fit(x_train[:1000], y_train[:1000], epochs=50, learning_rate=0.1)

epoch 1/50  error= 0.252711
epoch 2/50  error= 0.107296
epoch 3/50  error= 0.084824
epoch 4/50  error= 0.073104
epoch 5/50  error= 0.062761
epoch 6/50  error= 0.054203
epoch 7/50  error= 0.046867
epoch 8/50  error= 0.041059
epoch 9/50  error= 0.036314
epoch 10/50  error= 0.032416
epoch 11/50  error= 0.029023
epoch 12/50  error= 0.025685
epoch 13/50  error= 0.023020
epoch 14/50  error= 0.020995
epoch 15/50  error= 0.019249
epoch 16/50  error= 0.017627
epoch 17/50  error= 0.016315
epoch 18/50  error= 0.015120
epoch 19/50  error= 0.014064
epoch 20/50  error= 0.013281
epoch 21/50  error= 0.012280
epoch 22/50  error= 0.011536
epoch 23/50  error= 0.011069
epoch 24/50  error= 0.010610
epoch 25/50  error= 0.010355
epoch 26/50  error= 0.009894
epoch 27/50  error= 0.009490
epoch 28/50  error= 0.009237
epoch 29/50  error= 0.008922
epoch 30/50  error= 0.008743
epoch 31/50  error= 0.008353
epoch 32/50  error= 0.008156
epoch 33/50  error= 0.007915
epoch 34/50  error= 0.007849
epoch 35/50  error= 0.0

In [None]:
out= net.predict(x_test[0: 3])
out

[array([[-0.02159159,  0.00926768, -0.1385066 , -0.04340005,  0.05735532,
          0.10577981,  0.0884016 ,  0.97567502, -0.03194809, -0.07650442]]),
 array([[ 0.85287586, -0.00926977,  0.92485616,  0.09373894, -0.77022251,
         -0.08088378,  0.97554185, -0.10494602,  0.79455169, -0.25517857]]),
 array([[ 2.55549997e-02,  9.83872607e-01, -3.83235730e-02,
         -6.48982174e-04,  1.57093055e-02,  4.25262480e-02,
          2.23632490e-02,  2.12398496e-03, -4.51333440e-02,
         -4.30510247e-03]])]

In [None]:
y_test[:3]

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)