# Multi Layer Perceptron

In [1]:
# importing Libraries
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
import pandas as pd
import numpy as np

# Defining a class with required functions to train our neural model

In [2]:
# import the necessary packages
class NeuralNetwork:
    
# initializes the weights and frames the network architecture    
    def __init__(self, layers, alpha=0.1):
        self.W = []
        self.layers = layers
        self.alpha = alpha
        for i in np.arange(0, len(layers) - 2):
            """here we randomly initialize a weight matrix connecting the
               number of nodes in each respective layer together,
               adding an extra node for the bias"""
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[i]))
            """here,the last two layers are a special case where the input
               connections need a bias term but the output does not"""
        w = np.random.randn(layers[-2] + 1, layers[-1])
        self.W.append(w / np.sqrt(layers[-2]))
        
        
# constructing and returning a string that represents the network architecture
    def __repr__(self):
        return "NeuralNetwork: {}".format("-".join(str(l) for l in self.layers))
        
#here we compute and return the sigmoid activation value for a given input value
    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))    

#here compute the derivative of the sigmoid function ASSUMING that x has 
#already been passed through the 'sigmoid' function
    def sigmoid_deriv(self, x):
        return x * (1 - x)
    
# here we insert a column of 1's as the last entry in the feature
#this allows us to treat the biasas a trainable parameter within the weight matrix
    def fit(self, X, y, epochs=1000, displayUpdate=100):
        X = np.c_[X, np.ones((X.shape[0]))]
        # it loop over the desired number of epochs
        for epoch in np.arange(0, epochs):
            #  it loop over each individual data point and train our network on it
            for (x, target) in zip(X, y):
                self.fit_partial(x, target)
            # it checks to see if we should display a training update
            if epoch == 0 or (epoch + 1) % displayUpdate == 0:
                loss = self.calculate_loss(X, y)
        return self.W
                
                
#here we construct our list of output activations for each layer as our data point 
#flows through the network; the first activation is a special case -- it's just the input
#feature vector itself

    def fit_partial(self, x, y):
        A = [np.atleast_2d(x)]

    # FEEDFORWARD:
        # it loop over the layers in the network
        for layer in np.arange(0, len(self.W)):
            """feedforward the activation at the current layer by
               taking the dot product between the activation and
               the weight matrix -- this is called the "net input"
               to the current layer"""
            net = A[layer].dot(self.W[layer])
            # here we compute the "net output" is simply applying our
            # nonlinear activation function to the net input
            out = self.sigmoid(net)
            # once we have the net output, we add it to our list of
            # activations
            A.append(out)
            
        # BACKPROPAGATION
        # the first phase of backpropagation is to compute the
        # difference between our *prediction* (the final output
        # activation in the activations list) and the true target
        # value
        error = A[-1] - y
        # from here, we need to apply the chain rule and build our
        # list of deltas 'D'; the first entry in the deltas is
        # simply the error of the output layer times the derivative
        # of our activation function for the output value
        D = [error * self.sigmoid_deriv(A[-1])]
        # once you understand the chain rule it becomes super easy
        # to implement with a 'for' loop -- simply loop over the
        # layers in reverse order (ignoring the last two since we
        # already have taken them into account)
        
        for layer in np.arange(len(A) - 2, 0, -1):
            # the delta for the current layer is equal to the delta
            # of the *previous layer* dotted with the weight matrix
            # of the current layer, followed by multiplying the delta
            # by the derivative of the nonlinear activation function
            # for the activations of the current layer
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)
        # since we looped over our layers in reverse order we need to
        # reverse the deltas
        D = D[::-1]
        # WEIGHT UPDATE PHASE
        # it loop over the layers
        for layer in np.arange(0, len(self.W)):
            # we update our weights by taking the dot product of the layer
            # activations with their respective deltas, then multiplying
            # this value by some small learning rate and adding to our
            # weight matrix -- this is where the actual "learning" takes
            # place
            self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])

            
# here, we initialize the output prediction as the input features -- this
# value will be (forward) propagated through the network to  obtain the final prediction

    def predict(self, X, addBias=True):
        p = np.atleast_2d(X)
        # check to see if the bias column should be added
        if addBias:
            # here we insert a column of 1's as the last entry in the feature
            # matrix (bias)
            p = np.c_[p, np.ones((p.shape[0]))]
        # loop over our layers in the network
        for layer in np.arange(0, len(self.W)):
            # here we compute the output prediction is as simple as taking
            # the dot product between the current activation value 'p'
            # and the weight matrix associated with the current layer,
            # then passing this value through a nonlinear activation
            # function
            p = self.sigmoid(np.dot(p, self.W[layer]))
        # returns the predicted value
        return p

# makes predictions for the input data points then computethe loss    
    def calculate_loss(self, X, targets):
        targets = np.atleast_2d(targets)
        predictions = self.predict(X, addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)
        # return the loss
        return loss

# Loading the data and implementing the model

In [3]:
#Load the data
dataset =  pd.read_csv('train_data.csv')
dl =  pd.read_csv('train_labels.csv')

In [4]:
#Converting the given inputs to float 
data = np.float64(dataset)

# Converting the target class into an integer(0,1,2,3).
df=(dl["1.000000000000000000e+00"]*1+dl["0.000000000000000000e+00"]*2+dl["0.000000000000000000e+00.1"]*3+dl["0.000000000000000000e+00.2"]*4).astype('int')
df = list(map(lambda el:[el], df-1))
target = np.int64(df)
print("[INFO] samples: {}, dim: {}".format(data.shape[0],data.shape[1]))

[INFO] samples: 24753, dim: 784


In [5]:
# construct the training and Validation splits
(trainX, ValX, trainY, ValY) = train_test_split(data,target, test_size=0.20)

# convert the labels from integers to vectors
trainY = LabelBinarizer().fit_transform(trainY)
ValY = LabelBinarizer().fit_transform(ValY)
print("[INFO] samples: {}, dim: {}".format(trainY.shape[0],trainY.shape[1]))

[INFO] samples: 19802, dim: 4


## In our Model, we input

        Input Layers = 1
        Hidden Layer = 1
        Output Layer = 1
        Epoches.     = 5
        Hidden Layer nodes = 1000


In [6]:
# training the network
print("[INFO] training network...")
nn = NeuralNetwork([trainX.shape[1], 1000, 4])
print("[INFO] {}".format(nn))
weights = nn.fit(trainX, trainY, epochs=5)

[INFO] training network...
[INFO] NeuralNetwork: 784-1000-4


In [7]:
# evaluate the network with validation set

print("[INFO] evaluating network...")
predictions = nn.predict(ValX)
predictions = predictions.argmax(axis=1)
print(classification_report(ValY.argmax(axis=1), predictions))


[INFO] evaluating network...
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1217
           1       0.98      0.99      0.99      1368
           2       0.96      0.97      0.97      1196
           3       0.98      0.96      0.97      1170

    accuracy                           0.98      4951
   macro avg       0.98      0.98      0.98      4951
weighted avg       0.98      0.98      0.98      4951



#### Our Validation accuracy is 98%

### Now saving our weights and model into a .npy file

In [8]:
np.save( 'weights.npy' , weights )

  return array(a, dtype, copy=False, order=order, subok=True)
