In [1]:
import numpy as np

In [2]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

In [165]:
class NeuralNetwork:
    def __init__(self, x, y, learning_rate=2, reg=False, reg_parameter=0.5):
        self.input          =   x
        self.weights1       =   np.random.rand(self.input.shape[1],4) 
        self.weights2       =   np.random.rand(4,3)
        self.weights3       =   np.random.rand(3,1)
        self.y              =   y
        self.output         =   np.zeros(y.shape)
        self.learn_rate     =   learning_rate
        self.reg            =   reg
        self.reg_parameter  =   reg_parameter
        
    def feedforward(self):
        # Computing A_1
        self.z_1 = np.dot(self.input, self.weights1)
        self.layer1 = sigmoid(self.z_1)
        
        # Computing A_2
        self.z_2 = np.dot(self.layer1, self.weights2)
        self.layer2 = sigmoid(self.z_2)
        
        # Computing A_3
        self.z_3 = np.dot(self.layer2, self.weights3)
        self.output = sigmoid(self.z_3)
        
    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to w1, w2 and w3

        # No regularization
        if(self.reg==False):
            d_error1 = 2*(self.y - self.output) * sigmoid_derivative(self.z_3)
            d_weights3 = np.dot(self.layer2.T, d_error1)
            
            d_error2 = np.dot(d_error1, self.weights3.T) * sigmoid_derivative(self.z_2)
            d_weights2 = np.dot(self.layer1.T, d_error2)
            
            d_error3 = np.dot(d_error2, self.weights2.T) * sigmoid_derivative(self.z_1)
            d_weights1 = np.dot(self.input.T, d_error3)

            # update the weights
            self.weights3 -= self.learn_rate*d_weights3
            self.weights2 -= self.learn_rate*d_weights2
            self.weights1 -= self.learn_rate*d_weights1


        # Using regularization
        else:

            d_error1 = 2*(self.y - self.output) * sigmoid_derivative(self.z_3)
            reg_term1 = np.dot((self.reg_parameter/2), self.weights3)
            d_weights3 = np.dot(self.layer2.T, d_error1) + reg_term1

            d_error2 = np.dot(d_error1, self.weights3.T) * sigmoid_derivative(self.z_2)
            reg_term2 = np.dot((self.reg_parameter/2), self.weights2)
            d_weights2 = np.dot(self.layer1.T, d_error2) + reg_term2
            
            
            d_error3 = np.dot(d_error2, self.weights2.T) * sigmoid_derivative(self.z_1)
            reg_term3 = np.dot((self.reg_parameter/2), self.weights1)
            d_weights1 = np.dot(self.input.T, d_error3) + reg_term3

            # update the weights
            self.weights3 -= self.learn_rate*d_weights3
            self.weights2 -= self.learn_rate*d_weights2
            self.weights1 -= self.learn_rate*d_weights1
        
        
    def predict(self, x):
        self.input = x
        self.feedforward()
        return self.output
        

In [166]:
x = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

In [167]:
x_test = np.array([[1, 0, 1],
                   [0, 0, 0],
                   [1, 1, 0],
                   [1, 1, 1]])

In [168]:
y = np.array([0, 1, 1, 0])
y = y.reshape([4,1])

In [169]:
my_nn = NeuralNetwork(x,y)

In [170]:
my_nn.feedforward()
my_nn.backprop()

In [171]:
niter = 20
for i in range(0, niter):
    my_nn.feedforward()
    my_nn.backprop()
    

In [172]:
my_nn.weights3

array([[2.59050154],
       [2.15122381],
       [2.10594455]])

In [173]:
my_nn.predict(x_test)

array([[0.99784531],
       [0.99604441],
       [0.99779067],
       [0.99815775]])