### Import packages

In [1]:
import numpy as np
import time

### Define the Sigmoid Function

In [2]:
#Sigmoid Function
def sigmoid(x):
    
    s = 1.0/(1 + np.exp(-x))
    
    return s

### Define the Sigmoid Derivative Function (Backpropagation) 

In [3]:
#Function to calculate derivative of the Sigmoid
def sigmoid_derivative(s):
    
    ds = s * (1 - s)
    
    return ds

### Input and Output variables

In [4]:
#Define the XOR inputs and outputs
XOR_input = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

### Initialize the Weights and Bias 

In [5]:
#Define Weight and Bias of 1st Hidden Layer
w1 = np.random.uniform(size = [2, 2])
b1 = np.random.uniform(size = [1, 2])

#Define Weight and Bias of Output Neuron
w2 = np.random.uniform(size = [2, 1])
b2 = np.random.uniform(size = [1, 1])

### Loop for 100001 times for training

In [6]:
#Define the learning rate
learning_rate = 0.01

s = time.clock()


for i in range(100001):
    
    #Forward Propagation
    
    #Compute the forward propagation of first hidden layer
    A1 = np.dot(XOR_input, w1) + b1
    #Activation Function
    Z1 = sigmoid(A1)
    
    #Compute the forward propagation
    A2 = np.dot(Z1, w2) + b2
    #Activation Function for output neuron
    Yhat = sigmoid(A2)
    
    #Backpropagation
    
    #Calculate error    
    E = Y - Yhat
    #Derivative of sigmoid at output layer
    DE = E * sigmoid_derivative(Yhat)
    
    #Calculate error 
    E2 = DE.dot(w2.T)
    #Derivative at hidden layer
    DE2 = E2 * sigmoid_derivative(Z1)
    
    #Update weights and bias of output layer
    w2 += Z1.T.dot(DE) * learning_rate
    b2 += np.sum(DE, axis = 0, keepdims = True) * learning_rate
    
    #Update weights and bias at hidden layer
    w1 += XOR_input.T.dot(DE2) * learning_rate
    b1 += np.sum(DE2, axis = 0, keepdims = True) * learning_rate
    
    #Printing the cost and epoch at each thousandth epoch
    if i % 1000 == 0:
        print('Epoch = {} \n Error = {}'.format(i, E))

e = time.clock()

print('Final prediction = {}'.format(Yhat))
print('Time Elapsed = {}'.format(e-s))    

Epoch = 0 
 Error = [[-0.74195473]
 [ 0.24959364]
 [ 0.24808825]
 [-0.75942342]]
Epoch = 1000 
 Error = [[-0.49832786]
 [ 0.49840965]
 [ 0.49686893]
 [-0.50615891]]
Epoch = 2000 
 Error = [[-0.4960636 ]
 [ 0.50038258]
 [ 0.49892911]
 [-0.50435006]]
Epoch = 3000 
 Error = [[-0.4957527 ]
 [ 0.50033138]
 [ 0.49895708]
 [-0.50463413]]
Epoch = 4000 
 Error = [[-0.49543877]
 [ 0.50026655]
 [ 0.49896638]
 [-0.50494741]]
Epoch = 5000 
 Error = [[-0.49510815]
 [ 0.50019968]
 [ 0.49896886]
 [-0.50527908]]
Epoch = 6000 
 Error = [[-0.4947588 ]
 [ 0.50013005]
 [ 0.49896401]
 [-0.50563108]]
Epoch = 7000 
 Error = [[-0.49438851]
 [ 0.50005673]
 [ 0.49895113]
 [-0.50600568]]
Epoch = 8000 
 Error = [[-0.49399479]
 [ 0.49997867]
 [ 0.49892931]
 [-0.50640543]]
Epoch = 9000 
 Error = [[-0.49357481]
 [ 0.49989461]
 [ 0.49889747]
 [-0.50683321]]
Epoch = 10000 
 Error = [[-0.49312529]
 [ 0.49980309]
 [ 0.49885427]
 [-0.5072923 ]]
Epoch = 11000 
 Error = [[-0.49264249]
 [ 0.49970238]
 [ 0.49879807]
 [-0.5077

### Observing the weight and bias matrices

In [7]:
w1

array([[3.47768534, 5.73630316],
       [3.48062822, 5.7527168 ]])

In [8]:
b1

array([[-5.3154707 , -2.34812277]])

In [9]:
w2

array([[-7.80067818],
       [ 7.15732431]])

In [10]:
b2

array([[-3.1871882]])

### Testing the neural network

In [11]:
# Input -> 0, 0
# Output -> 0
A1 = np.dot([[0, 0]], w1) + b1
Z1 = sigmoid(A1)

A2 = np.dot(Z1, w2) + b2
Yhat = sigmoid(A2)

print(Yhat)

[[0.06906676]]


In [12]:
# Input -> 1, 0
# Output -> 1
A1 = np.dot([[1, 0]], w1) + b1
Z1 = sigmoid(A1)

A2 = np.dot(Z1, w2) + b2
Yhat = sigmoid(A2)

print(Yhat)

[[0.93494111]]
