### Import packages

In [1]:
import numpy as np
import time

### Define the Sigmoid Function and it's derivative

In [2]:
#Sigmoid Function
def sigmoid(x):
    
    s = 1.0/(1 + np.exp(-x))
    
    return s

#Function to calculate derivative of the Sigmoid
def sigmoid_derivative(s):
    
    ds = s * (1 - s)
    
    return ds

### Define the TanH Function and it's derivative

In [3]:
#TanH Function
def tanh(x):
    
    t = np.tanh(x)
    
    return t

#Function to calculate derivative of the TanH
def tanh_derivative(t):
    
    dt = 1 - t**2 
    
    return dt

### Define the ReLU Function and it's derivative

In [4]:
#ReLU Function
def ReLU(x):
    
    r = np.maximum(0, x)
    
    return r

#Function to calculate derivative of the ReLU
def ReLU_derivative(r):
    
    if r.all() > 0:
        dr = 1
    if r.all() < 0:
        dr = 0
    
    return dr

### Input and output values

In [5]:
#Define the XOR inputs and outputs
XOR_input = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

### Initialize Weights and Bias

In [6]:
#Define Weight and Bias of 1st Hidden Layer
w1 = np.random.uniform(size = [2, 2])
b1 = np.random.uniform(size = [1, 2])

#Define Weight and Bias of Output Neuron
w2 = np.random.uniform(size = [2, 1])
b2 = np.random.uniform(size = [1, 1])

### Loop for 100001 times for training, uncomment the lines for choosing your preferred activation function (Sigmoid/ReLU/Tanh)

In [7]:
#Define the learning rate
learning_rate = 0.01

s = time.clock()


for i in range(100001):
    
    #Forward Propagation
    
    #Compute the forward propagation of first hidden layer
    A1 = np.dot(XOR_input, w1) + b1
    #Activation Function
    #Choose between Sigmoid/ReLU/tanh
    Z1 = sigmoid(A1)
    #Z1 = ReLU(A1)
    #Z1 = tanh(A1)
    
    #Compute the forward propagation
    A2 = np.dot(Z1, w2) + b2
    #Activation Function for output neuron
    #Choose between Sigmoid/ReLU/tanh
    #Yhat = sigmoid(A2)
    #Yhat = ReLU(A2)
    Yhat = tanh(A2)
    
    #Backpropagation
    
    #Calculate error    
    E = Y - Yhat
    #Derivative of sigmoid at output layer
    #Choose between Sigmoid/ReLU/tanh
    #DE = E * sigmoid_derivative(Yhat)
    #DE = E * ReLU_derivative(Yhat)
    DE = E * tanh_derivative(Yhat)
    
    #Calculate error 
    E2 = DE.dot(w2.T)
    #Derivative at hidden layer
    #Choose between Sigmoid/ReLU/tanh
    DE2 = E2 * sigmoid_derivative(Z1)
    #DE = E * ReLU_derivative(Yhat)
    #DE2 = E * tanh_derivative(Yhat)
    
    #Update weights and bias of output layer
    w2 += Z1.T.dot(DE) * learning_rate
    b2 += np.sum(DE, axis = 0, keepdims = True) * learning_rate
    
    #Update weights and bias at hidden layer
    w1 += XOR_input.T.dot(DE2) * learning_rate
    b1 += np.sum(DE2, axis = 0, keepdims = True) * learning_rate
    
    #Printing the cost and epoch at each thousandth epoch
    if i % 1000 == 0:
        print('Epoch = {} \n Error = {}'.format(i, E))

e = time.clock()

print('Final prediction = {}'.format(Yhat))
print('Time Elapsed = {}'.format(e-s)) 

  after removing the cwd from sys.path.


Epoch = 0 
 Error = [[-0.41234119]
 [ 0.49979367]
 [ 0.53347261]
 [-0.53208444]]
Epoch = 1000 
 Error = [[-0.41814235]
 [ 0.47881676]
 [ 0.49887444]
 [-0.56363829]]
Epoch = 2000 
 Error = [[-0.38732537]
 [ 0.47958514]
 [ 0.48244395]
 [-0.57523572]]
Epoch = 3000 
 Error = [[-0.33761012]
 [ 0.47613446]
 [ 0.45872056]
 [-0.58975142]]
Epoch = 4000 
 Error = [[-0.26910993]
 [ 0.46593374]
 [ 0.42689016]
 [-0.6053232 ]]
Epoch = 5000 
 Error = [[-0.19693981]
 [ 0.45146699]
 [ 0.39176214]
 [-0.61422664]]
Epoch = 6000 
 Error = [[-0.14344922]
 [ 0.4412499 ]
 [ 0.36314289]
 [-0.60420729]]
Epoch = 7000 
 Error = [[-0.11563498]
 [ 0.4364614 ]
 [ 0.34645496]
 [-0.56815995]]
Epoch = 8000 
 Error = [[-0.10673392]
 [ 0.42283175]
 [ 0.34255755]
 [-0.49791184]]
Epoch = 9000 
 Error = [[-0.10468494]
 [ 0.37802299]
 [ 0.33968338]
 [-0.38306181]]
Epoch = 10000 
 Error = [[-0.09006188]
 [ 0.30996926]
 [ 0.30208777]
 [-0.25257389]]
Epoch = 11000 
 Error = [[-0.06456137]
 [ 0.24331   ]
 [ 0.24247077]
 [-0.1530

Epoch = 97000 
 Error = [[-0.00052806]
 [ 0.01850384]
 [ 0.01859771]
 [-0.00084499]]
Epoch = 98000 
 Error = [[-0.00052129]
 [ 0.01838201]
 [ 0.01847522]
 [-0.00083377]]
Epoch = 99000 
 Error = [[-0.00051468]
 [ 0.01826242]
 [ 0.01835497]
 [-0.00082282]]
Epoch = 100000 
 Error = [[-0.00050823]
 [ 0.01814499]
 [ 0.0182369 ]
 [-0.00081215]]
Final prediction = [[5.08227819e-04]
 [9.81855014e-01]
 [9.81763103e-01]
 [8.12146016e-04]]
Time Elapsed = 22.288644


