# RNN from scratch, only using numpy
#### By: Teddy Ordoñez
##### Source: https://pythonalgos.com/build-a-recurrent-neural-network-from-scratch-in-python-3/

In [6]:
# imports
import math
import numpy as np
import matplotlib.pyplot as plt

#### Defining RNN architecture

The RNN will have a learning rate of 0.001, the sequence length of the sin wave is 50, the max number os epochs will be 25, the hidden dimension will have a size of 100, output dimension will have a size of 1, backpropagating the error for every sequence, with a max and min values of 10 and -10, respectively

In [10]:
learning_rate = 0.0001
sequence_length = 50
epochs_max = 25
hidden_dimension = 100
output_dimension = 1
bptt = 5   # backpropagating the error, change value to sequence_length (50)
min_clip_val = -10
max_clip_val = 10

#### Activation Function

For this RNN, we will be using Sigmoid Function as our activation function

In [9]:
def sigmoid(x):
    return 1 / (1+np.exp(-x))

#### Loss Calculation

Creating a Loss Calculation function. Which will receive input (**X**) and result (**Y**) matrices, input to hidden layer weigths (**U**), hidden to output layer weigths (**V**) and hidden-to-hidden weigths (**W**). 

In [11]:
# Where X = data matrix, Y = output matrix, U = input to hidden weigths, V = hidden to output weights and W = hidden to hidden weights
def loss_calculation(X, Y, U, V, W):
    loss = 0.0
    
    for i in range(Y.shape[0]):
        x, y = X[i], Y[i]   # x and y will represent a specific data point
        previous_activation = np.zeros((hidden_dimension, 1))   # previous activation needs to have the same size as the hidden dimension

        for timestep in range(sequence_length): # Sequence length determines the timestep
            new_input = np.zeros(x.shape)   # New input will hold every data point in x with the shape of x. Doing this for every step in the sequence, forwards pass.
            new_input[timestep] = x[timestep]   # New input now has the same value os data entry for that timestep. New input has a single input for that timestep
            multiplied_u = np.dot(U, new_input) # Multipliying the inputs times the weights
            multiplied_w = np.dot(W, previous_activation)   # Multiplying the previous activation values times the hidden-to-hidden weigths
            sum_mulu_mulw = multiplied_u + multiplied_w     # Suming the products of the inputs and activations with their respective weigths
            new_activation = sigmoid(sum_mulu_mulw)     # Activating that sum
            multiplied_v = np.dot(V, new_activation)    # Multiplying the activated values time the weights of hidden-to-output layer
            previous_activation = new_activation        # The current activations becomes the previous activation for the next iteration
        
        loss_per_input = float((y - multiplied_v) ** 2 / 2)     # Calculating the Mean Squared Error (MSE)
        loss += loss_per_input      # Adding the input loss to the total loss

    return loss, new_activation     # Returning the total loss and activation values

#### Layer Activation Calculation

With this function we will be calculating the activation values of the recurrent layers created by the recurrance relation of the RNN. This function receives input matrix (**x**), input to hidden weights (**U**), hidden to output weights (**V**), hidden to hidden weights (**W**) and the previous activation values(**previous_activation**)

In [12]:
# Where X = data matrix, U = input to hidden weigths, V = hidden to output weights and W = hidden to hidden weights and previous_activation = previous activation for the final layer
def layer_activation_calc(x, U, V, W, previous_activation):
    layers = []     # Creating a list of empty layers before iterating for each timestep
    for timestep in range(sequence_length):
        new_input = np.zeros(x.shape)   # New input will begin with 0 in a x-like shape 
        new_input[timestep] = x[timestep]  # New input now has the same value os data entry for that timestep. New input has a single input for that timestep
        multiplied_u = np.dot(U, new_input)     # Multiplying inputs times their weights in relation to hidden layer
        multiplied_w = np.dot(V, previous_activation)    # Multipliying previous activation times hidden to output layer weights
        sum_mulu_mulw = multiplied_u + multiplied_w      # Adding both results
        activation = sigmoid(sum_mulu_mulw)     # Activating result
        multiplied_v = np.dot(V, activation)    # Multiplying the activated results times hidden to output weights
        layers.append({'activation' : activation, 'previous_activation' : previous_activation})     # Creating a dictionary containing the new activation and previous activation values
        previous_activation = activation    # Updating previous activation with the new activation

    return layers, multiplied_u, multiplied_w, multiplied_v     # Returning the recurrent layers, and multiplied U, W, and V matrices