## Project #1
## Create a Neural Network In Python

In [None]:
import numpy as np

# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [None]:
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [None]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

# activation function and its derivative
def sigmoid(x):
    return  1/(1+np.exp(-x));

def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))

#implimenting leaky relu function to get the highest accuracy out of it
def relu(x):
  return x*(x>0)

def relu_prime(x):
  return 1*(x>0)

# error function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [None]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculating average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

## Test your model on IRIS  dataset and MNIST Dataset

#**MNIST Dataset**

In [None]:
from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encoding output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 150))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(sigmoid, sigmoid_prime))
net.add(FCLayer(150, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(sigmoid, sigmoid_prime))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(sigmoid, sigmoid_prime))

# train on 1000 samples
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:500])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:500])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
epoch 1/35   error=0.102105
epoch 2/35   error=0.086834
epoch 3/35   error=0.082682
epoch 4/35   error=0.077596
epoch 5/35   error=0.071962
epoch 6/35   error=0.066172
epoch 7/35   error=0.060570
epoch 8/35   error=0.055426
epoch 9/35   error=0.050922
epoch 10/35   error=0.047086
epoch 11/35   error=0.043826
epoch 12/35   error=0.041033
epoch 13/35   error=0.038602
epoch 14/35   error=0.036453
epoch 15/35   error=0.034526
epoch 16/35   error=0.032782
epoch 17/35   error=0.031190
epoch 18/35   error=0.029728
epoch 19/35   error=0.028378
epoch 20/35   error=0.027126
epoch 21/35   error=0.025961
epoch 22/35   error=0.024873
epoch 23/35   error=0.023856
epoch 24/35   error=0.022903
epoch 25/35   error=0.022008
epoch 26/35   error=0.021165
epoch 27/35   error=0.020371
epoch 28/35   error=0.019621
epoch 29/35   error=0.018913
epoch 30/35   error=0.018244
epoch 31/35   error=0.017611
epoch 32/35   erro

In [None]:
accuracy=0
for i in range(len(out)):
  if np.argmax(out[i])==np.argmax(y_test[i]):
    accuracy+=1

accuracy/len(out)*100

83.39999999999999

**Using sigmoid function as activation function and two hidden layers, by testing on 500 samples the accuracy is found to be 82.3%**

##**Testing ReLu as an activation function**
Using the same number of layers used on MNIST, to be able to compare the accuracy of that of ReLu function in comparison with the sigmoid function.

In [None]:
from keras.utils.generic_utils import re

net = Network()
net.add(FCLayer(28*28, 150))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(150, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(relu, relu_prime))

# train on 1000 samples
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:500])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:500])

epoch 1/35   error=1.288204
epoch 2/35   error=0.100000
epoch 3/35   error=0.100000
epoch 4/35   error=0.100000
epoch 5/35   error=0.100000
epoch 6/35   error=0.100000
epoch 7/35   error=0.100000
epoch 8/35   error=0.100000
epoch 9/35   error=0.100000
epoch 10/35   error=0.100000
epoch 11/35   error=0.100000
epoch 12/35   error=0.100000
epoch 13/35   error=0.100000
epoch 14/35   error=0.100000
epoch 15/35   error=0.100000
epoch 16/35   error=0.100000
epoch 17/35   error=0.100000
epoch 18/35   error=0.100000
epoch 19/35   error=0.100000
epoch 20/35   error=0.100000
epoch 21/35   error=0.100000
epoch 22/35   error=0.100000
epoch 23/35   error=0.100000
epoch 24/35   error=0.100000
epoch 25/35   error=0.100000
epoch 26/35   error=0.100000
epoch 27/35   error=0.100000
epoch 28/35   error=0.100000
epoch 29/35   error=0.100000
epoch 30/35   error=0.100000
epoch 31/35   error=0.100000
epoch 32/35   error=0.100000
epoch 33/35   error=0.100000
epoch 34/35   error=0.100000
epoch 35/35   error=0.1

In [None]:
accuracy=0
for i in range(len(out)):
  if np.argmax(out[i])==np.argmax(y_test[i]):
    accuracy+=1

accuracy/len(out)*100

8.4

That's an accuracy of 8.4% :D I really am not sure about such low accuracy is it because i implemented the ReLu function incorrectly or it is supposed to be like this, I have read that there's a leaky ReLu function that improves its accuracy, maybe that is the problem

#**IRIS Dataset**

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd

# load iris dataset
X, y = datasets.load_iris(return_X_y=True)
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.5)

X_train=X_train.reshape(X_train.shape[0], 1, 4)
X_train = X_train.astype('float32')
Y_train = np_utils.to_categorical(Y_train)

X_test=X_test.reshape(X_test.shape[0], 1, 4)
X_test = X_test.astype('float32')
Y_test=np_utils.to_categorical(Y_test)


# Network
net = Network()
net.add(FCLayer(4,4))                # input_shape=(1, 4)    ;   output_shape=(1, 4)
net.add(ActivationLayer(sigmoid, sigmoid_prime))
net.add(FCLayer(4,3))                   # input_shape=(1, 4)      ;   output_shape=(1, 3)
net.add(ActivationLayer(sigmoid, sigmoid_prime))
net.use(mse, mse_prime)

net.fit(X_train, Y_train, epochs=50, learning_rate=0.4)

outX = net.predict(X_test)
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(Y_test)

epoch 1/50   error=0.238265
epoch 2/50   error=0.203472
epoch 3/50   error=0.161071
epoch 4/50   error=0.126772
epoch 5/50   error=0.111305
epoch 6/50   error=0.103892
epoch 7/50   error=0.099413
epoch 8/50   error=0.096172
epoch 9/50   error=0.093526
epoch 10/50   error=0.091176
epoch 11/50   error=0.088952
epoch 12/50   error=0.086730
epoch 13/50   error=0.084439
epoch 14/50   error=0.082074
epoch 15/50   error=0.079670
epoch 16/50   error=0.077258
epoch 17/50   error=0.074859
epoch 18/50   error=0.072492
epoch 19/50   error=0.070178
epoch 20/50   error=0.067940
epoch 21/50   error=0.065801
epoch 22/50   error=0.063779
epoch 23/50   error=0.061889
epoch 24/50   error=0.060136
epoch 25/50   error=0.058519
epoch 26/50   error=0.057041
epoch 27/50   error=0.055707
epoch 28/50   error=0.054512
epoch 29/50   error=0.053440
epoch 30/50   error=0.052482
epoch 31/50   error=0.051654
epoch 32/50   error=0.050872
epoch 33/50   error=0.049308
epoch 34/50   error=0.047905
epoch 35/50   error=0.0

In [None]:
accuracy=0
for i in range(len(outX)):
  if np.argmax(outX[i])==np.argmax(Y_test[i]):
    accuracy+=1

accuracy/len(outX)*100

96.0

**Using sigmoid function as activation function and two hidden layers, by testing on IRIS dataset the accuracy is found to be 96.0%**

##**Testing ReLu as an activation function**
Using the same number of layers used on IRIS dataset, to be able to compare the accuracy of that of ReLu function in comparison with the sigmoid function.

In [None]:
# Network
net = Network()
net.add(FCLayer(4,4))                # input_shape=(1, 4)    ;   output_shape=(1, 4)
net.add(ActivationLayer(relu, relu_prime))
net.add(FCLayer(4,3))                   # input_shape=(1, 4)      ;   output_shape=(1, 3)
net.add(ActivationLayer(relu, relu_prime))
net.use(mse, mse_prime)

net.fit(X_train, Y_train, epochs=50, learning_rate=0.4)

outX = net.predict(X_test)
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(Y_test)

epoch 1/50   error=0.347161
epoch 2/50   error=0.333333
epoch 3/50   error=0.333333
epoch 4/50   error=0.333333
epoch 5/50   error=0.333333
epoch 6/50   error=0.333333
epoch 7/50   error=0.333333
epoch 8/50   error=0.333333
epoch 9/50   error=0.333333
epoch 10/50   error=0.333333
epoch 11/50   error=0.333333
epoch 12/50   error=0.333333
epoch 13/50   error=0.333333
epoch 14/50   error=0.333333
epoch 15/50   error=0.333333
epoch 16/50   error=0.333333
epoch 17/50   error=0.333333
epoch 18/50   error=0.333333
epoch 19/50   error=0.333333
epoch 20/50   error=0.333333
epoch 21/50   error=0.333333
epoch 22/50   error=0.333333
epoch 23/50   error=0.333333
epoch 24/50   error=0.333333
epoch 25/50   error=0.333333
epoch 26/50   error=0.333333
epoch 27/50   error=0.333333
epoch 28/50   error=0.333333
epoch 29/50   error=0.333333
epoch 30/50   error=0.333333
epoch 31/50   error=0.333333
epoch 32/50   error=0.333333
epoch 33/50   error=0.333333
epoch 34/50   error=0.333333
epoch 35/50   error=0.3

In [None]:
accuracy=0
for i in range(len(outX)):
  if np.argmax(outX[i])==np.argmax(Y_test[i]):
    accuracy+=1

accuracy/len(outX)*100

26.666666666666668

26.6%😭

## Refrence
https://becominghuman.ai/understanding-neural-networks-2-the-math-of-neural-networks-in-3-equations-6085fd3f09df
https://towardsdatascience.com/how-to-define-a-neural-network-as-a-mathematical-function-f7b820cde3f
