# Implementation of Recurrent Neural Network(RNN) from scratch using only Numpy...

In [1]:
# Only thing we need is Numpy...
import numpy as np

In [2]:
# Generate a Random seed...
# np.andom.seed() provides an essential input that enables NumPy
# to generate pseudo-random numbers for random processes.
np.random.seed(1234)

In [3]:
# Some sample data for our RNN.
# We take input value 'x' and
# target value 'y'

x = np.array([
    [1,0,0],
    [1,1,0],
    [1,1,1]
])

y = np.array([
    [1],
    [2],
    [3]
])

In [4]:
# Declare the starting Weights - however - 
# the best set of weights would be wx = 1 and wrec = 1

wx = [0.2]
wrec = [1.5]

In [5]:
# Declare Hyper Parameters...

# Set the number of epoch
number_of_epoch = 50000

# Set the number of training data...
number_of_training_data = 3

# Set the learning rate
learning_rate_x = 0.02
learning_rate_rec = 0.0006

In [6]:
# Initialize in the form of np arrays

states = np.zeros((3,4))
grad_over_time = np.zeros((3,4))

# print('States: ', states)
# print('\nGradients over time: ', grad_over_time)

In [8]:
# Start the Training

for iter in range(number_of_epoch):

    # Feed Forward of the network
    layer_1 = x[:,0] * wx + states[:,0] * wrec
    states[:,1] = layer_1

    layer_2 = x[:,1] * wx + states[:,1] * wrec
    states[:,2] = layer_2

    layer_3 = x[:,2] * wx + states[:,2] * wrec
    states[:,3] = layer_3
 
    cost = np.square(states[:,3] - y).sum() / number_of_training_data

    grad_out = (states[:,3] - np.squeeze(y)) * 2 / number_of_training_data
    grad_over_time[:,3] = grad_out
    grad_over_time[:,2] = grad_over_time[:,3] * wrec
    grad_over_time[:,1] = grad_over_time[:,2] * wrec

    # NOTE: Do Not really need grad_over_time[:,0]
    grad_over_time[:,0] = grad_over_time[:,1] * wrec

     
    grad_wx = np.sum(grad_over_time[:,3] * x[:,2] + 
                     grad_over_time[:,2] * x[:,1]  + 
                     grad_over_time[:,1] * x[:,0])

    grad_rec = np.sum(grad_over_time[:,3] * states[:,2] + 
                      grad_over_time[:,2] * states[:,1]  + 
                      grad_over_time[:,1] * states[:,0])
    
    # Updating weights...
    wx = wx - learning_rate_x * grad_wx
    wrec = wrec - learning_rate_rec * grad_rec

    if (iter%1000 == 0):
        print('\nCurrent Epoch: ',iter, '  current predition :' ,layer_3)


Current Epoch:  0   current predition : [1. 2. 3.]

Current Epoch:  1000   current predition : [1. 2. 3.]

Current Epoch:  2000   current predition : [1. 2. 3.]

Current Epoch:  3000   current predition : [1. 2. 3.]

Current Epoch:  4000   current predition : [1. 2. 3.]

Current Epoch:  5000   current predition : [1. 2. 3.]

Current Epoch:  6000   current predition : [1. 2. 3.]

Current Epoch:  7000   current predition : [1. 2. 3.]

Current Epoch:  8000   current predition : [1. 2. 3.]

Current Epoch:  9000   current predition : [1. 2. 3.]

Current Epoch:  10000   current predition : [1. 2. 3.]

Current Epoch:  11000   current predition : [1. 2. 3.]

Current Epoch:  12000   current predition : [1. 2. 3.]

Current Epoch:  13000   current predition : [1. 2. 3.]

Current Epoch:  14000   current predition : [1. 2. 3.]

Current Epoch:  15000   current predition : [1. 2. 3.]

Current Epoch:  16000   current predition : [1. 2. 3.]

Current Epoch:  17000   current predition : [1. 2. 3.]

Curr

In [9]:
# The architecture
layer_1 = x[:,0] * wx + states[:,0] * wrec
states[:,1] = layer_1

layer_2 = x[:,1] * wx + states[:,1] * wrec
states[:,2] = layer_2

layer_3 = x[:,2] * wx + states[:,2] * wrec
states[:,3] = layer_3

In [10]:
# Final Output and rounded resutls

print('Ground Truth: ',layer_3)
print('Rounded Truth: ',np.round(layer_3))
print("Final weight X : ",wx)
print("Final weight Rec : ",wrec)

Ground Truth:  [1. 2. 3.]
Rounded Truth:  [1. 2. 3.]
Final weight X :  [1.]
Final weight Rec :  [1.]


----------------------That's it for this cycle-----------------------

## Let's do the same thing with activations...

In [11]:
# Here we use the Logorithmic activation function...

def log(x):
    return 1 / ( 1+ np.exp( -1 * x ))

def d_log(x):
    return log(x) * (1 - log(x))

In [12]:
# Training data remains same here for 'x' & 'y'...

In [13]:
# Declare Hyper Parameters...

wx = np.random.randn()
wrec = np.random.randn()

number_of_epoch = 15000

lr_wx = 0.001
lr_wrec = 0.001

In [14]:
state = np.zeros((x.shape[0],x.shape[1] + 1))
grad_over_time = np.zeros((x.shape))

In [15]:
# Train the model...

for iter in range(number_of_epoch):

    state_1_in  = state[:,0]*wrec + x[:,0]*wx
    state_1_out = log(state_1_in)
    state[:,1] = state_1_out

    state_2_in  = state[:,1]*wrec + x[:,1]*wx
    state_2_out = log(state_2_in)
    state[:,2] = state_2_out

    state_3_in  = state[:,2]*wrec + x[:,2]*wx
    state[:,3] = state_3_in

    cost = np.square(state[:,3] - np.squeeze(y)).sum() / len(x)

    if iter % 1000 == 0:
        print("Current iter : ", iter, " Current cost: ", cost)

    grad_over_time[:,2] = (state[:,3] - np.squeeze(y)) * (2/len(x))
    grad_over_time[:,1] = grad_over_time[:,2] * wrec  * d_log(state_2_in)
    grad_over_time[:,0] = grad_over_time[:,1] * wrec  * d_log(state_1_in)

    grad_wx = np.sum(grad_over_time[:,2]*x[:,2]+
                    grad_over_time[:,1]*x[:,1]+
                    grad_over_time[:,0]*x[:,0])

    grad_wrec = np.sum(grad_over_time[:,2]*state[:,2]+
                    grad_over_time[:,1]*state[:,1]+
                    grad_over_time[:,0]*state[:,0])

    wx = wx - lr_wx * grad_wx
    wrec = wrec - lr_wrec * grad_wrec

Current iter :  0  Current cost:  5.847872199212813
Current iter :  1000  Current cost:  0.9951704943204343
Current iter :  2000  Current cost:  0.1419578070514743
Current iter :  3000  Current cost:  0.11273245295695294
Current iter :  4000  Current cost:  0.10307442764032371
Current iter :  5000  Current cost:  0.09869720852304842
Current iter :  6000  Current cost:  0.09670312471301536
Current iter :  7000  Current cost:  0.09579497336489533
Current iter :  8000  Current cost:  0.09538147825666778
Current iter :  9000  Current cost:  0.09519323778665428
Current iter :  10000  Current cost:  0.09510755212566056
Current iter :  11000  Current cost:  0.0950685515584827
Current iter :  12000  Current cost:  0.09505080101461244
Current iter :  13000  Current cost:  0.09504272238954642
Current iter :  14000  Current cost:  0.09503904573259021


In [16]:
# Final Output

state_1_in  = state[:,0]*wrec + x[:,0]*wx
state_1_out = log(state_1_in)
state[:,1] = state_1_out

state_2_in  = state[:,1]*wrec + x[:,1]*wx
state_2_out = log(state_2_in)
state[:,2] = state_2_out

state_3_in  = state[:,2]*wrec + x[:,2]*wx
state[:,3] = state_3_in

In [17]:
# What we got here?

print("Final output Raw: ",state_3_in)
print("Final output Rounded: ",np.round(state_3_in))
print("Ground Truth : ",y.T)

Final output Raw:  [1.36735465 1.61291427 3.01809184]
Final output Rounded:  [1. 2. 3.]
Ground Truth :  [[1 2 3]]
