Preparing the dataframe for training

In [111]:
import numpy as np
import pandas as pd
x_train = np.array([
    [0,0],
    [1,1],
    [1,0],
    [0,1]
])
y_train = np.array([
    [0],
    [1],
    [0],
    [0]
])
r,c = x_train.shape
# y_train.shape
#Combining the features and respective outputs
data = np.hstack((x_train,y_train))
#Converting it into dataframe 
df = pd.DataFrame(data, columns=["x1","x2","y"])
print(df)
print(x_train[0][1])

   x1  x2  y
0   0   0  0
1   1   1  1
2   1   0  0
3   0   1  0
0


Preparing the weight matrix for Wxh (between input and hidden layer) and Who (between hidden and output layer)

In [112]:
#defining the number of neurons in each layer to make it more systematic. The ANN consists of 3 layers, in input layer : 2, hidden layer : 3, output layer : 1 neurons respectively
x_n = 2
h_n = 3
o_n = 1

#defining the weight matrix 
np.random.seed(42)
Wxh = np.random.randn(x_n,h_n)   #randn (n) for normalization  shape (2,3)
Who = np.random.randn(h_n,o_n)   #randn (n) for normalization  shape (3,1)
print(Wxh)

[[ 0.49671415 -0.1382643   0.64768854]
 [ 1.52302986 -0.23415337 -0.23413696]]


Equations for forward propagation 

We have 4 forward propagation equations. In the hidden layer, we have sigmoid activation function, then in the output layer we have sigmoid activation func as well.

In [113]:
def sigmoid(x):
    result = 1/(1 + np.exp(-x))
    return result 

In [114]:
def unactivated_hidden(x_train,Wxh):
    ua_h = x_train @ Wxh         #shape = (4,3)
    return ua_h

def activated_hidden():
    ua_h = unactivated_hidden(x_train,Wxh)
    a_h = sigmoid(ua_h)          #shape = (4,3)
    return a_h

def unactivated_output(Who):
    a_h = activated_hidden()
    ua_o = a_h @ Who             #shape = (4,1)
    return ua_o

def activated_output():
    ua_o = unactivated_output(Who)
    output = sigmoid(ua_o)       #shape = (4,1)
    return output



Cost error used here : Mean Squared Error

In [115]:
def cost(output,y_train):
    sq_term = np.square(output - y_train)
    mse = np.mean(sq_term)
    return mse



Backward Propagation

In [116]:
def transpose(x):
      r,c = x.shape
      L = []
      
      for i in range(c):
            l = []
            for j in range(r):
                  l.append(x[j][i])
            L.append(l)
            del(l)
      result = np.array(L)
      return result
      
# print(transpose(x_train))

sigmoid derivative funtion

In [117]:
def sigmoid_derivative(x):
    num = np.exp(-x)
    term = 1 + np.exp(-x)
    denom = term ** 2
    return num/denom
# result = sigmoid_derivative(x_train)
# print(result)

In [118]:
#finding gradients wrt Wxh and Who, considering j is the mean squared error
#to find out first, wrt Wxh, we need to find dj/do, do/dua_o, du_o/da_h, da_h/dua_h, dua_h/dWxh
def gradient_hiddden(y_train):
    first_term = (activated_output() - y_train) * sigmoid_derivative(unactivated_output(Who))             #(4,1)
    second_term = first_term @ transpose(Who)                                                             #(4,3)
    third_term = second_term * sigmoid_derivative(unactivated_hidden(x_train,Wxh))                        #(4,3)
    djdWxh = transpose(x_train) @ third_term
    return djdWxh

def gradient_output(y_train):
    first_term = (activated_output() - y_train) * sigmoid_derivative(unactivated_output(Who))             #(4,1)
    djdWho = transpose(activated_hidden()) @ first_term                                                   #(3,1)
    return djdWho
# first_term = (activated_output() - y_train) * sigmoid_derivative(unactivated_output(Who))             #(4,1)
# second_term = first_term @ transpose(Who)                                                             #(4,3)
# third_term = second_term * sigmoid_derivative(unactivated_hidden(x_train,Wxh))                        #(4,3)
# djdWxh = transpose(x_train) @ third_term                                                              #(2,3)

# djdWho = transpose(activated_hidden()) @ first_term                                                                  #(3,1)

# djdWho.shape

Learning the weights

In [119]:
alpha = 0.01
def update_Wxh(alpha,Wxh):
    Wxh = Wxh - (alpha * gradient_hiddden(y_train))
    return Wxh

def update_Who(alpha,Who):
    Who = Who - (alpha * gradient_output(y_train))
    return Who




Training the artificial neural network

In [120]:
output = activated_output()
costs = []
for i in range(500):
    Wxh = update_Wxh(alpha,Wxh)
    Who = update_Who(alpha,Who)
    output = activated_output()
    if (i%100==0):
        new_cost = cost(output,y_train)
        costs.append(new_cost)
print(costs)


[np.float64(0.43637137864188325), np.float64(0.3682834782470167), np.float64(0.30857108709280534), np.float64(0.26555873673709374), np.float64(0.23824305939461687)]
