<a href="https://colab.research.google.com/github/saranshikens/Basic-ML/blob/main/Recurrent_Neural_Network_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**IMPLEMENTING RECURRENT NEURAL NETWORKS FROM SCRATCH**

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
import os
import numpy as np
import pandas as pd
path = kagglehub.dataset_download('rakannimer/air-passengers')

print('Data source import complete.')

csv_file_path = None
for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith(".csv"):
            csv_file_path = os.path.join(root, file)
            break
    if csv_file_path:
        break

if csv_file_path:
    data = pd.read_csv(csv_file_path)
    print("CSV file loaded successfully.")
else:
    print("No CSV file found in the downloaded directory.")


Data source import complete.
CSV file loaded successfully.


In [None]:
class RNN:
  def __init__(self, input_size, hidden_size, output_size, lr, n_iter):
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.lr = lr
    self.n_iter = n_iter
    self.initialize()

  def initialize(self):
    self.W_input_hidden = np.random.randn(self.hidden_size, self.input_size)*0.01
    self.W_hidden_hidden = np.random.randn(self.hidden_size, self.hidden_size)*0.01
    self.W_hidden_output = np.random.randn(self.output_size, self.hidden_size)*0.01
    self.b_hidden = np.zeros((self.hidden_size, 1))
    self.b_output = np.zeros((self.output_size, 1))

  def tanh(self, X):
    return np.tanh(X)

  def tanh_deriv(self, X):
    return 1 - np.tanh(X)**2

  def forward_prop(self, inputs):
    hidden_layer = np.zeros((self.hidden_size, 1))
    hidden_layers = {0: hidden_layer}
    for time in range(len(inputs)):
      X = inputs[time].reshape(-1, 1)
      hidden_layer = self.tanh(np.dot(self.W_input_hidden, X) + np.dot(self.W_hidden_hidden, hidden_layer) + self.b_hidden)
      hidden_layers[time+1] = hidden_layer
    y_pred = np.dot(self.W_hidden_output, hidden_layer) + self.b_output
    return y_pred, hidden_layers

  def backward_prop(self, inputs, target, y_pred, hidden_layers):
    d_W_hidden_output = np.zeros_like(self.W_hidden_output)
    d_W_hidden_hidden = np.zeros_like(self.W_hidden_hidden)
    d_W_input_hidden = np.zeros_like(self.W_input_hidden)
    d_b_hidden = np.zeros_like(self.b_hidden)
    d_b_output = np.zeros_like(self.b_output)
    d_hidden_layer_next = np.zeros_like(hidden_layers[0])

    d_y = y_pred - target.reshape(-1,1)
    d_W_hidden_output += np.dot(d_y, hidden_layers[len(inputs)].T)
    d_b_output += d_y

    for time in reversed(range(len(inputs))):
      d_hidden = np.dot(self.W_hidden_output.T, d_y) + d_hidden_layer_next
      d_tanh = d_hidden * self.tanh_deriv(hidden_layers[time+1])
      d_b_hidden += d_tanh
      d_W_input_hidden += np.dot(d_tanh, inputs[time].reshape(1,-1))
      d_W_hidden_hidden += np.dot(d_tanh, hidden_layers[time].T)
      d_hidden_layer_next = np.dot(self.W_hidden_hidden.T, d_tanh)

    return d_W_input_hidden, d_W_hidden_hidden, d_W_hidden_output, d_b_hidden, d_b_output

  def update(self, d_W_input_hidden, d_W_hidden_hidden, d_W_hidden_output, d_b_hidden, d_b_output):
    self.W_input_hidden -= self.lr * d_W_input_hidden
    self.W_hidden_hidden -= self.lr * d_W_hidden_hidden
    self.W_hidden_output -= self.lr * d_W_hidden_output
    self.b_hidden -= self.lr * d_b_hidden
    self.b_output -= self.lr * d_b_output

  def train(self, X, y):
    for i in range(self.n_iter):
      loss = 0
      for x_seq, y_true in zip(X, y):
        y_pred, hidden_layers = self.forward_prop(x_seq)
        loss += np.mean((y_pred - y_true.reshape(-1,1))**2)
        gradients = self.backward_prop(x_seq, y_true, y_pred, hidden_layers)
        self.update(*gradients)
      if i%10 == 0:
        print(f"Iteration {i}: MSE Loss = {loss/len(X):.4f}")

  def predict(self, X_seq):
    y_pred, _ = self.forward_prop(X_seq)
    return y_pred.flatten()

**PREPROCESSING THE DATA**

In [None]:
data['Month'] = pd.to_datetime(data['Month'])
data.set_index('Month', inplace=True)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data['#Passengers'] = scaler.fit_transform(data[['#Passengers']])

**CREATING SEQUENCES**

In [None]:
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 12
data = data['#Passengers'].values
X, y = create_sequences(data, seq_length)

split = int(0.8 * len(X))
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

X_train_seq = [x.reshape(-1, 1) for x in X_train]
X_test_seq = [x.reshape(-1, 1) for x in X_test]

**TRAINING THE MODEL**

In [None]:
rnn = RNN(input_size=1, hidden_size=16, output_size=1, lr=0.01, n_iter=100)
rnn.train(X_train_seq, y_train)

Iteration 0: MSE Loss = 0.0588
Iteration 10: MSE Loss = 0.0287
Iteration 20: MSE Loss = 0.0201
Iteration 30: MSE Loss = 0.0058
Iteration 40: MSE Loss = 0.0061
Iteration 50: MSE Loss = 0.0058
Iteration 60: MSE Loss = 0.0057
Iteration 70: MSE Loss = 0.0081
Iteration 80: MSE Loss = 0.0090
Iteration 90: MSE Loss = 0.0091
