### Import packages

In [29]:
import numpy as np
import time

### Define the Sigmoid Function

In [30]:
#Sigmoid Function
def sigmoid(x):
    
    s = 1.0/(1 + np.exp(-x))
    
    return s

### Define the Sigmoid Derivative Function (Backpropagation) 

In [31]:
#Function to calculate derivative of the Sigmoid
def sigmoid_derivative(s):
    
    ds = s * (1 - s)
    
    return ds

### Input and Output variables

In [32]:
#Define the XOR inputs and outputs
XOR_input = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

### Initialize the Weights and Bias 

In [33]:
#Define Weight and Bias of 1st Hidden Layer
w1 = np.random.uniform(size = [2, 2], low = 0.0, high = 1.0)
b1 = np.random.uniform(size = [1, 2], low = 0.0, high = 1.0)

#Define Weight and Bias of Output Neuron
w2 = np.random.uniform(size = [2, 1], low = 0.0, high = 1.0)
b2 = np.random.uniform(size = [1, 1], low = 0.0, high = 1.0)

In [34]:
print(w1)

[[0.71392619 0.19168285]
 [0.68863783 0.18098271]]


### Loop for 100001 times for training

In [35]:
#Define the learning rate
learning_rate = 0.01

s = time.clock()


for i in range(100001):
    
    #Forward Propagation
    
    #Compute the forward propagation of first hidden layer
    A1 = np.dot(XOR_input, w1) + b1
    #Activation Function
    Z1 = sigmoid(A1)
    
    #Compute the forward propagation
    A2 = np.dot(Z1, w2) + b2
    #Activation Function for output neuron
    Yhat = sigmoid(A2)
    
    #Backpropagation
    
    #Calculate error    
    E = Y - Yhat
    #Derivative of sigmoid at output layer
    DE = E * sigmoid_derivative(Yhat)
    
    #Calculate error 
    E2 = DE.dot(w2.T)
    #Derivative at hidden layer
    DE2 = E2 * sigmoid_derivative(Z1)
    
    #Update weights and bias of output layer
    w2 += Z1.T.dot(DE) * learning_rate
    b2 += np.sum(DE, axis = 0, keepdims = True) * learning_rate
    
    #Update weights and bias at hidden layer
    w1 += XOR_input.T.dot(DE2) * learning_rate
    b1 += np.sum(DE2, axis = 0, keepdims = True) * learning_rate
    
    #Printing the cost and epoch at each thousandth epoch
    if i % 1000 == 0:
        print('Epoch = {} \n Error = {}'.format(i, E))

e = time.clock()

print('Final prediction = {}'.format(Yhat))
print('Time Elapsed = {}'.format(e-s))    

  after removing the cwd from sys.path.


Epoch = 0 
 Error = [[-0.8538211 ]
 [ 0.13213061]
 [ 0.13167598]
 [-0.87717805]]
Epoch = 1000 
 Error = [[-0.50744671]
 [ 0.49044562]
 [ 0.49043072]
 [-0.51043945]]
Epoch = 2000 
 Error = [[-0.50002064]
 [ 0.4972714 ]
 [ 0.49725005]
 [-0.50391655]]
Epoch = 3000 
 Error = [[-0.49903521]
 [ 0.49716558]
 [ 0.49712421]
 [-0.50466263]]
Epoch = 4000 
 Error = [[-0.49804944]
 [ 0.49697622]
 [ 0.49691693]
 [-0.5055187 ]]
Epoch = 5000 
 Error = [[-0.49700002]
 [ 0.49674786]
 [ 0.49667326]
 [-0.5064433 ]]
Epoch = 6000 
 Error = [[-0.49586826]
 [ 0.49647431]
 [ 0.49638738]
 [-0.50744784]]
Epoch = 7000 
 Error = [[-0.49463341]
 [ 0.49614765]
 [ 0.49605171]
 [-0.50854533]]
Epoch = 8000 
 Error = [[-0.49327148]
 [ 0.49575826]
 [ 0.495657  ]
 [-0.50975016]]
Epoch = 9000 
 Error = [[-0.49175438]
 [ 0.49529453]
 [ 0.49519192]
 [-0.51107837]]
Epoch = 10000 
 Error = [[-0.49004871]
 [ 0.49474232]
 [ 0.49464263]
 [-0.51254798]]
Epoch = 11000 
 Error = [[-0.48811426]
 [ 0.49408447]
 [ 0.4939922 ]
 [-0.5141



### Observing the weight and bias matrices

In [36]:
w1

array([[5.75449282, 3.74088391],
       [5.7534533 , 3.74066222]])

In [37]:
b1

array([[-2.40777407, -5.73248172]])

In [38]:
w2

array([[ 7.52400619],
       [-8.1527696 ]])

In [39]:
b2

array([[-3.39544762]])

### Testing the neural network

In [40]:
# Input -> 0, 0
# Output -> 0
A1 = np.dot([[0, 0]], w1) + b1
Z1 = sigmoid(A1)

A2 = np.dot(Z1, w2) + b2
Yhat = sigmoid(A2)

print(Yhat)

[[0.0573013]]


In [41]:
# Input -> 1, 0
# Output -> 1
A1 = np.dot([[1, 0]], w1) + b1
Z1 = sigmoid(A1)

A2 = np.dot(Z1, w2) + b2
Yhat = sigmoid(A2)

print(Yhat)

[[0.94753275]]
