### Importing Libraries <br>
Import __numpy__ library for matrix manipulations and __math__ for defining sigmoid function  

In [31]:
import numpy as k
import math

### Define the Sigmoid function

In [32]:
def sigmoid(x):
    return 1/(1+math.exp(-x))

### Function to find the derivative of sigmoid

In [34]:
def derivative_sigmoid(y):
    derivative = sigmoid(y)*(1-sigmoid(y))
    return derivative

In [35]:
print(derivative_sigmoid(1))

0.19661193324148185


### Initializing Input Variables(X) and output variable(Y) using numpy array.

In [36]:
X=k.array([[1,0,1,0],[1,0,1,1],[0,1,0,1]])
Y=k.array([[1],[1],[0]])

### Initializing Weights and biases with random values generated by built-in random() function.

In [37]:
Wh=k.random.rand(4,3)
wout=k.random.rand(3,1)
bh=k.random.rand(1,3)
bout=k.random.rand(1,1)

In [38]:
print(Wh, wout, bh, bout)

[[ 0.65915104  0.03445008  0.73383636]
 [ 0.97574663  0.84346261  0.77737003]
 [ 0.43325089  0.69201282  0.86267384]
 [ 0.706186    0.58752663  0.04859208]] [[ 0.28891578]
 [ 0.90022544]
 [ 0.88719054]] [[ 0.28773861  0.6550478   0.84366073]] [[ 0.8725344]]


### calculate hidden layer input

In [39]:
hidden_layer_input = k.dot(X,Wh)+bh
print(hidden_layer_input)

[[ 1.38014054  1.3815107   2.44017093]
 [ 2.08632653  1.96903733  2.48876301]
 [ 1.96967123  2.08603705  1.66962284]]


### calculate nonlinear activation on hidden linear input using sigmoid function

In [40]:
hidden_layer_activation=k.vectorize(sigmoid)(hidden_layer_input)
print(hidden_layer_activation)

[[ 0.79901357  0.79923352  0.91983969]
 [ 0.88956707  0.87750768  0.9233503 ]
 [ 0.8775758   0.88953863  0.84152553]]


### perform linear and nonlinear transformation of hidden layer at output layer

In [41]:
output_layer_input = k.dot(hidden_layer_activation,wout)+bout
output = k.vectorize(sigmoid)(output_layer_input)
print(output_layer_input)
print(output)

[[ 2.63894545]
 [ 2.73868675]
 [ 2.67345869]]
[[ 0.93332637]
 [ 0.93927123]
 [ 0.93544222]]


### calculate the gradient of error at output layer


In [42]:
Error = Y-output
print(Error)

[[ 0.06667363]
 [ 0.06072877]
 [-0.93544222]]


### compute slope at output and hidden layer

In [43]:
slope_output_layer = k.vectorize(derivative_sigmoid)(output)
slope_hidden_layer= k.vectorize(derivative_sigmoid)(hidden_layer_activation)
print(slope_output_layer)
print(slope_hidden_layer)

[[ 0.20258509]
 [ 0.20205984]
 [ 0.20239832]]
[[ 0.21398984  0.21397197  0.20377094]
 [ 0.20640222  0.20743805  0.20346304]
 [ 0.20743222  0.20640467  0.21048396]]


### compute delta at output layer

In [44]:
lr=0.1
d_output = Error*slope_output_layer*lr
print(d_output)

[[ 0.00135071]
 [ 0.00122708]
 [-0.01893319]]


### calculate Error at hidden layer

In [45]:
wout_transpose = k.transpose(wout)
Error_hidden_layer=k.dot(d_output, wout_transpose)
print(Error_hidden_layer)

[[ 0.00039024  0.00121594  0.00119834]
 [ 0.00035452  0.00110465  0.00108866]
 [-0.0054701  -0.01704414 -0.01679735]]


### compute delta hidden layer

In [46]:
d_hidden_layer = Error_hidden_layer * slope_hidden_layer
print(d_hidden_layer)

[[  8.35075971e-05   2.60177506e-04   2.44185980e-04]
 [  7.31745551e-05   2.29146995e-04   2.21501622e-04]
 [ -1.13467463e-03  -3.51799055e-03  -3.53557283e-03]]


### update weight at both output and hidden layer

In [47]:
hidden_layer_activation_transpose=k.transpose(hidden_layer_activation)
wout=wout+k.dot(hidden_layer_activation_transpose, d_output) *lr 
print(wout)
X_transpose = k.transpose(X)
Wh = Wh + k.dot(X_transpose, d_hidden_layer) *lr 
print(Wh)

[[ 0.28747133]
 [ 0.89875689]
 [ 0.88583481]]
[[ 0.65916671  0.03449901  0.73388293]
 [ 0.97563316  0.84311082  0.77701648]
 [ 0.43326656  0.69206175  0.86272041]
 [ 0.70607985  0.58719774  0.04826067]]


### update biases at both output and hidden layer

In [48]:
bh = bh + k.sum(d_hidden_layer, axis=0)*lr
bout=bout+ k.sum(d_output, axis=0)*lr
print(bh,bout)

[[ 0.28764081  0.65474494  0.84335374]] [[ 0.87089886]]
