<a href="https://colab.research.google.com/github/PhatHuynhTranSon99/Neural-Networks-From-Scratch/blob/main/RNN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# About this notebook

In this notebook, I will implement a simple neural network architecture: Recurrent Neural Network from scratch to perform a NLP task: sentiment analysis on small number of sentences

# Library import

In [None]:
import numpy as np

# Data generation

In [None]:
# Dataset creation
# Create a dataset for sentiment analysis
train_data = {
  'good': True,
  'bad': False,
  'happy': True,
  'sad': False,
  'not good': False,
  'not bad': True,
  'not happy': False,
  'not sad': True,
  'very good': True,
  'very bad': False,
  'very happy': True,
  'very sad': False,
  'i am happy': True,
  'this is good': True,
  'i am bad': False,
  'this is bad': False,
  'i am sad': False,
  'this is sad': False,
  'i am not happy': False,
  'this is not good': False,
  'i am not bad': True,
  'this is not sad': True,
  'i am very happy': True,
  'this is very good': True,
  'i am very bad': False,
  'this is very sad': False,
  'this is very happy': True,
  'i am good not bad': True,
  'this is good not bad': True,
  'i am bad not good': False,
  'i am good and happy': True,
  'this is not good and not happy': False,
  'i am not at all good': False,
  'i am not at all bad': True,
  'i am not at all happy': False,
  'this is not at all sad': True,
  'this is not at all happy': False,
  'i am good right now': True,
  'i am bad right now': False,
  'this is bad right now': False,
  'i am sad right now': False,
  'i was good earlier': True,
  'i was happy earlier': True,
  'i was bad earlier': False,
  'i was sad earlier': False,
  'i am very bad right now': False,
  'this is very good right now': True,
  'this is very sad right now': False,
  'this was bad earlier': False,
  'this was very good earlier': True,
  'this was very bad earlier': False,
  'this was very happy earlier': True,
  'this was very sad earlier': False,
  'i was good and not bad earlier': True,
  'i was not good and not happy earlier': False,
  'i am not at all bad or sad right now': True,
  'i am not at all good or happy right now': False,
  'this was not happy and not good earlier': False,
}

test_data = {
  'this is happy': True,
  'i am good': True,
  'this is not happy': False,
  'i am not good': False,
  'this is not bad': True,
  'i am not sad': True,
  'i am very good': True,
  'this is very bad': False,
  'i am very sad': False,
  'this is bad not good': False,
  'this is good and happy': True,
  'i am not good and not happy': False,
  'i am not at all sad': True,
  'this is not at all good': False,
  'this is not at all bad': True,
  'this is good right now': True,
  'this is sad right now': False,
  'this is very bad right now': False,
  'this was good earlier': True,
  'i was not happy and not good earlier': False,
}

# Firstly, calculate the vocabulary size of training and test set
word_to_index = {}
current_index = 0

for sentence in train_data:
  # Split sentences to get words
  words = sentence.split()

  # Put into word to index
  for word in words:
    if word not in word_to_index:
      word_to_index[word] = current_index
      current_index += 1

for sentence in test_data:
  # Split sentences to get words
  words = sentence.split()

  # Put into word to index
  for word in words:
    if word not in word_to_index:
      word_to_index[word] = current_index
      current_index += 1

# Function to one-hot-encode word
def encode(word, word_to_index, vocab_size):
  # Create a numpy array
  encoding = np.zeros(vocab_size)

  # Place 1 into position of word
  encoding[word_to_index[word]] = 1

  return encoding

# Build the dataset from each sentences:
def build_dataset(data, word_to_index, vocab_size):
  # Initialize an array
  dataset = []

  # For each sentence
  for sentence in data:
    # Initialize current X and y
    current_X = []
    
    # Get the label 
    label = data[sentence]
    current_y = int(label)

    # Split into words
    words = sentence.split()

    # One-hot-encode each word and put it in the database
    for word in words:
      current_X.append(encode(word, word_to_index, vocab_size))

    # Then add X and Y into dataset
    dataset.append((current_X, current_y))

  return dataset

# Create datasets for training and testing
train_dataset = build_dataset(train_data, word_to_index, vocab_size=current_index)
test_dataset = build_dataset(test_data, word_to_index, vocab_size=current_index)

# Define dimensions
input_size = len(word_to_index)
hidden_size = 64

# Utility functions

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def tanh(x):
  return np.tanh(x)

# Logistic regression layer

In [None]:
class LogisticRegression:
  def __init__(self, input_size, learning_rate):
    self.input_size = input_size
    self.learning_rate = learning_rate
    self.u = np.random.randn(input_size)
    self.b_y = 0

  def forward(self, h):
    self.h = h;
    z = self.u.dot(h) + self.b_y
    y_hat = sigmoid(z)
    self.y_hat = y_hat
    return y_hat

  def backward(self, y):
    dz = self.y_hat - y
    du = dz * self.h
    dh = dz * self.u
    db_y = dz

    self.u -= self.learning_rate * du
    self.b_y -= self.learning_rate * db_y

    return dh

# RNN layer


In [None]:
class RNN:
  def __init__(self, input_size, hidden_size, learning_rate):
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.learning_rate = learning_rate

    self.W_hh = np.random.randn(hidden_size, hidden_size) / 1000
    self.W_xh = np.random.randn(hidden_size, input_size) / 1000
    self.b_h = np.zeros(hidden_size)

  def forward(self, inputs):
    self.x = inputs
    self.h = []

    h = np.zeros(self.hidden_size)
    self.h.append(h)

    for x in inputs:
      h = tanh(self.W_hh @ h + self.W_xh @ x + self.b_h)
      self.h.append(h)

    return h

  def backward(self, dh_T):
    dW_hh = np.zeros_like(self.W_hh)
    dW_xh = np.zeros_like(self.W_xh)
    db_h = np.zeros_like(self.b_h)

    input_length = len(self.x)

    dh = dh_T
    for t in reversed(range(input_length)):
      da = dh * (1 - self.h[t + 1]**2)

      dW_hh += np.outer(da, self.h[t])
      dW_xh += np.outer(da, self.x[t])
      db_h += da 

      dh = da @ self.W_hh

    self.W_hh -= self.learning_rate * dW_hh
    self.W_xh -= self.learning_rate * dW_xh
    self.b_h -= self.learning_rate * db_h

# Putting things together

In [None]:
rnn = RNN(input_size, 64, 0.01)
log = LogisticRegression(64, 0.01)


def train(rnn_layer, logistic_regression_layer, epochs = 2000):
  for i in range(epochs):
    loss = 0
    train_correct = 0
    test_correct = 0

    # Loop through items in train dataset and train
    for x, y in train_dataset:
      h_T = rnn.forward(x)
      y_hat = log.forward(h_T)

      current_loss = - y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat)
      loss += current_loss

      if (y == 0 and y_hat < 0.5) or (y == 1 and y_hat >= 0.5):
        train_correct += 1

      dh_T = log.backward(y)
      rnn.backward(dh_T)

    # Calculate test accuracy
    for x, y in test_dataset:
      h_T = rnn.forward(x)
      y_hat = log.forward(h_T)

      if (y == 0 and y_hat < 0.5) or (y == 1 and y_hat >= 0.5):
        test_correct += 1

    if i % 100 == 0 or i == 1999:
      print(f"Epochs: {i}")
      print(f"Loss: {loss / len(train_dataset)}")
      print(f"Train Accuracy: {train_correct / len(train_dataset)}")
      print(f"Test Accuracy: {test_correct / len(test_dataset)}")
      print("--------------------")

train(rnn, log)

Epochs: 0
Loss: 0.8520817873878638
Train Accuracy: 0.39655172413793105
Test Accuracy: 0.5
--------------------
Epochs: 100
Loss: 0.7482468796809268
Train Accuracy: 0.43103448275862066
Test Accuracy: 0.55
--------------------
Epochs: 200
Loss: 0.5228762863833267
Train Accuracy: 0.6551724137931034
Test Accuracy: 0.65
--------------------
Epochs: 300
Loss: 0.42500039945212914
Train Accuracy: 0.7241379310344828
Test Accuracy: 0.7
--------------------
Epochs: 400
Loss: 0.03182226590207598
Train Accuracy: 1.0
Test Accuracy: 1.0
--------------------
Epochs: 500
Loss: 0.010655381822194141
Train Accuracy: 1.0
Test Accuracy: 1.0
--------------------
Epochs: 600
Loss: 0.006112877237748109
Train Accuracy: 1.0
Test Accuracy: 1.0
--------------------
Epochs: 700
Loss: 0.004207682869785306
Train Accuracy: 1.0
Test Accuracy: 1.0
--------------------
Epochs: 800
Loss: 0.0031769381321984772
Train Accuracy: 1.0
Test Accuracy: 1.0
--------------------
Epochs: 900
Loss: 0.0025367817047869824
Train Accuracy