[View in Colaboratory](https://colab.research.google.com/github/msk180001/NueralNetwork/blob/master/NeuralNetworkAssignment3.ipynb)

In [0]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [0]:
class NeuralNet:
    def __init__(self, train, header = True, h1 = 4, h2 = 2):
        np.random.seed(1)
        # train refers to the training dataset
        # test refers to the testing dataset
        # h1 and h2 represent the number of nodes in 1st and 2nd hidden layers

        raw_input = pd.read_csv(train, sep = ',', header=None)
        #  Remember to implement the preprocess method
        train_dataset = self.preprocess(raw_input)
        ncols = len(train_dataset.columns)
        nrows = len(train_dataset.index)
        self.X = train_dataset.iloc[:, 0:(ncols - 1)].values.reshape(nrows, ncols - 1)
        self.y = train_dataset.iloc[:, (ncols - 1)].values.reshape(nrows, 1)
        #splitting the data into training and test set
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X,self.y)
        #
        # Find number of input and output layers from the dataset
        #
        input_layer_size = len(self.X_train[0])
        if not isinstance(self.y_train[0], np.ndarray):
            output_layer_size = 1
        else:
            output_layer_size = len(self.y_train[0])

        # assign random weights to matrices in network
        # number of weights connecting layers = (no. of nodes in previous layer) x (no. of nodes in following layer)
        self.w01 = 2 * np.random.random((input_layer_size, h1)) - 1
        self.X01 = self.X_train
        self.delta01 = np.zeros((input_layer_size, h1))
        self.w12 = 2 * np.random.random((h1, h2)) - 1
        self.X12 = np.zeros((len(self.X_train), h1))

        self.delta12 = np.zeros((h1, h2))
        self.w23 = 2 * np.random.random((h2, output_layer_size)) - 1
        self.X23 = np.zeros((len(self.X_train), h2))
        self.delta23 = np.zeros((h2, output_layer_size))
        self.deltaOut = np.zeros((output_layer_size, 1))
    # sigmoid activation function
    # tanh activation function
    # ReLu activation function

    def __activation(self, x, activation):
        if activation == "sigmoid":
            return self.__sigmoid(x)
        elif activation == "tanh":
            return self.__tanh(x)
        else:
            return self.__Relu(x)


    #
    # Derivative for tanh, ReLu and sigmoid activation function
    #

    def __activation_derivative(self, x, activation):
        if activation == "sigmoid":
            self.__sigmoid_derivative(x)
        elif activation == "tanh":
            self.__tanh_derivative(x)

        else:
            self.__ReLu_derivative(x)

    def __sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    # derivative of sigmoid function, indicates confidence about existing weight

    def __sigmoid_derivative(self, x):
        return x * (1 - x)


    def __tanh(self,x):
        return (2*(self.__sigmoid(2*x))-1)
        
    def __tanh_derivative(self, x):
        return (2*self.__sigmoid_derivative(2*x))
        
    def __Relu(self,x):
        x[x <= 0] = 0
        return x

    def __ReLu_derivative(self,x):
        x[x <= 0] = 0
        x[x > 0] = 1
        return x
    #
    #  Write code for pre-processing the dataset, which would include standardization, normalization,
    #   categorical to numerical, etc
    #


    def preprocess(self, X):
        # replacing the ? values with numpy NaN
        X = X.replace('[?]', np.nan, regex=True)
        # filling the NaN values by some dummy values
        X = X.fillna('0xx0')
        ncols = len(X.columns)
        nrows = len(X.index)
        
        # data set encoding
        for col in range(ncols):
            X[col] = X[col].astype('category')
            X[col] = X[col].cat.codes
            
        scaler = StandardScaler()
        # standardization of data set
        X=scaler.fit_transform(X)
        # returning the dataframe for X
        X=pd.DataFrame(X)

        return X

    # Below is the training function

    def train(self,activation, max_iterations = 1000, learning_rate = 0.1):
        for iteration in range(max_iterations):
            out = self.forward_pass(activation)
            error = 0.5 * np.power((out - self.y_train), 2)
            self.backward_pass(out, activation)
            update_layer2 = learning_rate * self.X23.T.dot(self.deltaOut)
            update_layer1 = learning_rate * self.X12.T.dot(self.delta23)
            update_input = learning_rate * self.X01.T.dot(self.delta12)

            self.w23 += update_layer2
            self.w12 += update_layer1
            self.w01 += update_input

        print("After " + str(max_iterations) + " iterations, the total error is " + str(np.sum(error)))
        print("The final weight vectors are (starting from input to output layers)")
        print(self.w01)
        print(self.w12)
        print(self.w23)

    def forward_pass(self,activation):
        # pass our inputs through our neural network
        in1 = np.dot(self.X_train, self.w01)
        self.X12 = self.__activation(in1,activation)
        in2 = np.dot(self.X12, self.w12)
        self.X23 = self.__activation(in2, activation)
        in3 = np.dot(self.X23, self.w23)
        out = self.__activation(in3,activation)
        return out

    # for calculating output for test data set
    def testForwardPass(self,activation):
        # pass our inputs through our neural network
        in1 = np.dot(self.X_test, self.w01)
        self.X12 = self.__activation(in1,activation)
        in2 = np.dot(self.X12, self.w12)
        self.X23 = self.__activation(in2,activation)
        in3 = np.dot(self.X23, self.w23)
        out = self.__activation(in3,activation)
        return out



    def backward_pass(self, out, activation):
        # pass our inputs through our neural network
        self.compute_output_delta(out, activation)
        self.compute_hidden_layer2_delta(activation)
        self.compute_hidden_layer1_delta(activation)

    # TODO: Implement other activation functions

    def compute_output_delta(self, out, activation):
        if activation == "sigmoid":
            delta_output = (self.y_train - out) * (self.__sigmoid_derivative(out))
        elif activation == "tanh":
            delta_output = (self.y_train - out) * (self.__tanh_derivative(out))
        else:
            delta_output = (self.y_train - out) * (self.__Relu(out))

        self.deltaOut = delta_output

    # TODO: Implement other activation functions

    def compute_hidden_layer2_delta(self, activation):
        if activation == "sigmoid":
            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__sigmoid_derivative(self.X23))
        elif activation == "tanh":
            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__tanh_derivative(self.X23))
        else:
            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__ReLu_derivative(self.X23))

        self.delta23 = delta_hidden_layer2

    # TODO: Implement other activation functions

    def compute_hidden_layer1_delta(self, activation):
        if activation == "sigmoid":
            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__sigmoid_derivative(self.X12))
        elif activation == "tanh":
            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__tanh_derivative(self.X12))
        else:
            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__ReLu_derivative(self.X12))

        self.delta12 = delta_hidden_layer1

    # TODO: Implement other activation functions

    def compute_input_layer_delta(self, activation):
        if activation == "sigmoid":
            delta_input_layer = np.multiply(self.__sigmoid_derivative(self.X01), self.delta01.dot(self.w01.T))
        elif activation == "tanh":
            delta_input_layer = np.multiply(self.__tanh_derivative(self.X01), self.delta01.dot(self.w01.T))
        else:
            delta_input_layer = np.multiply(self.__ReLu_derivative(self.X01), self.delta01.dot(self.w01.T))


            self.delta01 = delta_input_layer

    # TODO: Implement the predict function for applying the trained model on the  test dataset.
    # You can assume that the test dataset has the same format as the training dataset
    # You have to output the test error from this function

    # predict function for test data set
    def predict(self,activation):
        out = self.testForwardPass(activation);
        error = 0.5 * np.power((out - self.y_test), 2)
        return np.sum(error)


if __name__ == "__main__":
    # plug in the URL of data set in parameter of NeuralNet function
    neural_network = NeuralNet("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data")
    #Dataset URL'S
    #https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
    #https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
    #https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data
    # activation function implemented
    # 1. tanh
    # 2. sigmoid
    # 3. ReLu
    
    #Plug in the activation function in parameter of train function.
    neural_network.train("sigmoid")
    testError = neural_network.predict("sigmoid")
    print("Test Error: "+ str(testError))

After 1000 iterations, the total error is 39.06004289477169
The final weight vectors are (starting from input to output layers)
[[-2.32839342  0.34854218  0.46360673  1.12103261]
 [ 0.84028522  0.22355759 -0.85352422 -0.15839862]
 [ 3.08555386 -1.81236435  1.49154536 -1.6478605 ]
 [ 2.41118277 -0.89845492  0.60004566 -2.12984024]]
[[-6.07502845 -5.42959156]
 [ 5.0155346   2.17705345]
 [-1.99181053 -3.03800078]
 [ 7.07171862  1.1664247 ]]
[[-6.89357088]
 [-2.73780885]]
Test Error: 12.577219998339707
