In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [20]:


class NeuralNet:
    def __init__(self, dataFile, header=True, h=4):

        # Take in the file
        raw_input = pd.read_csv(dataFile)
        # TODO checked: Remember to implement the preprocess method
        processed_data = self.preprocessData(raw_input)

        # Splitting the data to train and test
        self.train_dataset, self.test_dataset = train_test_split(
            processed_data)

        # Getting number of columns and rows in train data set
        ncols = len(self.train_dataset.columns)
        nrows = len(self.train_dataset.index)

        # Getting the Xs and Ys
        self.X = self.train_dataset.iloc[:, [13,29,30,31]].values.reshape(nrows, 4)
        self.y = self.train_dataset.iloc[:,[32]].values.reshape(nrows, 1)

        # Find number of input and output layers from the dataset
        input_layer_size = len(self.X[1])
        if not isinstance(self.y[0], np.ndarray):
            self.output_layer_size = 1
        else:
            self.output_layer_size = len(self.y[0])

        # assign random weights to matrices in network
        # number of weights connecting layers = (no. of nodes in previous layer) x (no. of nodes in following layer)
        self.W_hidden = 2 * np.random.random((input_layer_size, h)) - 1
        self.Wb_hidden = 2 * np.random.random((1, h)) - 1

        self.W_output = 2 * np.random.random((h, self.output_layer_size)) - 1
        self.Wb_output = np.ones((1, self.output_layer_size))

        self.deltaOut = np.zeros((self.output_layer_size, 1))
        self.deltaHidden = np.zeros((h, 1))
        self.h = h

    # Preprocessing raw data
    def preprocessData(self, data):
        data = data.dropna()
        data = data.drop_duplicates()

        # Creating correlation matrix
        cor_matrix = data.corr().abs()

        # Selecting upper triangle of correlation matrix
        upper_tri = cor_matrix.where(
            np.triu(np.ones(cor_matrix.shape), k=1).astype(np.bool))

        # Finding index of feature columns with correlation greater than 0.95
        to_drop = [column for column in upper_tri.columns if any(
            upper_tri[column] > 0.95)]

        # Dropping Marked Features
        data = data.drop(data.columns[to_drop], axis=1)
        return data

    # TODOchecked marked
    def __activation(self, x, activation):
        if activation == "sigmoid":
            self.__sigmoid(self, x)
        if activation == "ReLu":
            self.__relu(self, x)
        if activation == "tanh":
            self.__tanh(self, x)

    # activation methods
    def __sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def __relu(self, x):
        return np.maximum(0, x)

    def __tanh(self, x):
        return np.tanh(x)

    # TODO checked: Define the derivative function for tanh, ReLu and their derivatives
    def __activation_derivative(self, x, activation):
        if activation == "sigmoid":
            self.__sigmoid_derivative(self, x)
        elif activation == "ReLu":
            self.__relu_derivative(self, x)
        elif activation == "tanh":
            self.__tanh_derivative(self, x)

    # derivative of sigmoid function, indicates confidence about existing weight
    def __sigmoid_derivative(self, x):
        return x * (1 - x)

    def __relu_derivative(self, x):
        return np.greater(0, x)

    def __tanh_derivative(self, x):
        return 1 - np.tanh(x) ** 2

    # TRAINNING METHODS
    def train(self, max_iterations=60000, learning_rate=0.25):
        for iteration in range(max_iterations):
            out = self.forward_pass(activation="sigmoid")
            out = self.forward_pass(activation="ReLu")
            out = self.forward_pass(activation="tanh")

            error = 0.5 * np.power((out - self.y), 2)
            # TODO: I have coded the sigmoid activation, you have to do the rest
            self.backward_pass(out, activation="sigmoid")
            self.backward_pass(out, activation="ReLu")
            self.backward_pass(out, activation="tanh")

            update_weight_output = learning_rate * \
                                   np.dot(self.X_hidden.T, self.deltaOut)
            update_weight_output_b = learning_rate * \
                                     np.dot(np.ones((np.size(self.X, 0), 1)).T, self.deltaOut)

            update_weight_hidden = learning_rate * \
                                   np.dot(self.X.T, self.deltaHidden)
            update_weight_hidden_b = learning_rate * \
                                     np.dot(np.ones((np.size(self.X, 0), 1)).T, self.deltaHidden)

            self.W_output += update_weight_output
            self.Wb_output += update_weight_output_b
            self.W_hidden += update_weight_hidden
            self.Wb_hidden += update_weight_hidden_b

        print("After " + str(max_iterations) +
              " iterations, the total error is " + str(np.sum(error)))
        print("The final weight vectors are (starting from input to output layers) \n" + str(self.W_hidden))
        print("The final weight vectors are (starting from input to output layers) \n" + str(self.W_output))

        print("The final bias vectors are (starting from input to output layers) \n" +
              str(self.Wb_hidden))
        print("The final bias vectors are (starting from input to output layers) \n" +
              str(self.Wb_output))

    def forward_pass(self, activation):
        # pass our inputs through our neural network
        in_hidden = np.dot(self.X, self.W_hidden) + self.Wb_hidden
        # TODO: I have coded the sigmoid activation, you have to do the rest
        if activation == "sigmoid":
            self.X_hidden = self.__sigmoid(in_hidden)
        in_output = np.dot(self.X_hidden, self.W_output) + self.Wb_output
        if activation == "sigmoid":
            out = self.__sigmoid(in_output)

        if activation == "ReLu":
            self.X_hidden = self.__relu(in_hidden)
        in_output = np.dot(self.X_hidden, self.W_output) + self.Wb_output
        if activation == "ReLu":
            out = self.__relu(in_output)

        if activation == "tanh":
            self.X_hidden = self.__tanh(in_hidden)
        in_output = np.dot(self.X_hidden, self.W_output) + self.Wb_output
        if activation == "tanh":
            out = self.__tanh(in_output)

        return out

    def backward_pass(self, out, activation):
        # pass our inputs through our neural network
        self.compute_output_delta(out, activation)
        self.compute_hidden_delta(activation)

    # TODO: Implement other activation functions

    def compute_output_delta(self, out, activation):
        if activation == "sigmoid":
            delta_output = (self.y - out) * (self.__sigmoid_derivative(out))
        elif activation == "ReLu":
            delta_output = (self.y - out) * (self.__relu_derivative(out))
        elif activation == "tanh":
            delta_output = (self.y - out) * (self.__tanh_derivative(out))

        self.deltaOut = delta_output

    def compute_hidden_delta(self, activation):
        if activation == "sigmoid":
            delta_hidden_layer = (self.deltaOut.dot(
                self.W_output.T)) * (self.__sigmoid_derivative(self.X_hidden))
        elif activation == "ReLu":
            delta_hidden_layer = (self.deltaOut.dot(
                self.W_output.T)) * (self.__relu_derivative(self.X_hidden))
        elif activation == "tanh":
            delta_hidden_layer = (self.deltaOut.dot(
                self.W_output.T)) * (self.__tanh_derivative(self.X_hidden))

        self.deltaHidden = delta_hidden_layer

    # TODO: Implement the predict function for applying the trained model on the  test dataset.
    # You can assume that the test dataset has the same format as the training dataset
    # You have to output the test error from this function

    def predict(self, header=True):        
        test_cols = len(self.test_dataset.columns)
        test_rows = len(self.test_dataset.index)
        X = self.test_dataset.iloc[:, [13,29,30,31]].values.reshape(test_rows, 4)
        y = self.test_dataset.iloc[:,[32]].values.reshape(test_rows, 1)
        X = np.array(X)
        W_hidden1 = np.array(self.W_hidden)
        y_hat = np.dot(X, W_hidden1) + self.Wb_hidden
        return y_hat

In [21]:
if __name__ == "__main__":
    # Get data
    trainData = "https://raw.githubusercontent.com/daniel-le18/dataset/master/student-mat.csv"

    # Passing data to NeuralNet
    neural_network = NeuralNet(trainData)

    # Train data
    neural_network.train()

    # Predict
    testError = neural_network.predict()
    print("Test Values = " + str(testError))



After 60000 iterations, the total error is 16060.0
The final weight vectors are (starting from input to output layers) 
[[ 1.37065173e+14  1.37070073e+14 -1.37068532e+14 -1.37081171e+14]
 [ 3.92889187e+14  3.92903232e+14 -3.92898816e+14 -3.92935044e+14]
 [ 7.79087734e+14  7.79115586e+14 -7.79106829e+14 -7.79178669e+14]
 [ 7.91493294e+14  7.91521589e+14 -7.91512693e+14 -7.91585676e+14]]
The final weight vectors are (starting from input to output layers) 
[[ 17562695.66737175]
 [ 17563009.57934659]
 [-17562910.88075263]
 [-17563720.58987682]]
The final bias vectors are (starting from input to output layers) 
[[ 6.47691020e+13  6.47714174e+13 -6.47706894e+13 -6.47766617e+13]]
The final bias vectors are (starting from input to output layers) 
[[17563720.96786926]]


ValueError: operands could not be broadcast together with shapes (99,4) (4,4) 