In [None]:
import numpy as np
import random

In [None]:
train_data = {
  'good': True,
  'bad': False,
  'happy': True,
  'sad': False,
  'not good': False,
  'not bad': True,
  'not happy': False,
  'not sad': True,
  'very good': True,
  'very bad': False,
  'very happy': True,
  'very sad': False,
  'i am happy': True,
  'this is good': True,
  'i am bad': False,
  'this is bad': False,
  'i am sad': False,
  'this is sad': False,
  'i am not happy': False,
  'this is not good': False,
  'i am not bad': True,
  'this is not sad': True,
  'i am very happy': True,
  'this is very good': True,
  'i am very bad': False,
  'this is very sad': False,
  'this is very happy': True,
  'i am good not bad': True,
  'this is good not bad': True,
  'i am bad not good': False,
  'i am good and happy': True,
  'this is not good and not happy': False,
  'i am not at all good': False,
  'i am not at all bad': True,
  'i am not at all happy': False,
  'this is not at all sad': True,
  'this is not at all happy': False,
  'i am good right now': True,
  'i am bad right now': False,
  'this is bad right now': False,
  'i am sad right now': False,
  'i was good earlier': True,
  'i was happy earlier': True,
  'i was bad earlier': False,
  'i was sad earlier': False,
  'i am very bad right now': False,
  'this is very good right now': True,
  'this is very sad right now': False,
  'this was bad earlier': False,
  'this was very good earlier': True,
  'this was very bad earlier': False,
  'this was very happy earlier': True,
  'this was very sad earlier': False,
  'i was good and not bad earlier': True,
  'i was not good and not happy earlier': False,
  'i am not at all bad or sad right now': True,
  'i am not at all good or happy right now': False,
  'this was not happy and not good earlier': False,
}

test_data = {
  'this is happy': True,
  'i am good': True,
  'this is not happy': False,
  'i am not good': False,
  'this is not bad': True,
  'i am not sad': True,
  'i am very good': True,
  'this is very bad': False,
  'i am very sad': False,
  'this is bad not good': False,
  'this is good and happy': True,
  'i am not good and not happy': False,
  'i am not at all sad': True,
  'this is not at all good': False,
  'this is not at all bad': True,
  'this is good right now': True,
  'this is sad right now': False,
  'this is very bad right now': False,
  'this was good earlier': True,
  'i was not happy and not good earlier': False,
}

In [None]:
# get dictionary or vocabulary
vocab = list(set([word for phrase in train_data.keys() for word in phrase.split(' ') ]))
vocab

['at',
 'sad',
 'bad',
 'and',
 'now',
 'not',
 'happy',
 'am',
 'all',
 'good',
 'is',
 'was',
 'right',
 'i',
 'or',
 'this',
 'very',
 'earlier']

In [None]:
word_to_idx = {w: i for i,w in enumerate(vocab)}
idx_to_word = {i: w for i,w in enumerate(vocab)}

print(word_to_idx)
print(idx_to_word)

{'at': 0, 'sad': 1, 'bad': 2, 'and': 3, 'now': 4, 'not': 5, 'happy': 6, 'am': 7, 'all': 8, 'good': 9, 'is': 10, 'was': 11, 'right': 12, 'i': 13, 'or': 14, 'this': 15, 'very': 16, 'earlier': 17}
{0: 'at', 1: 'sad', 2: 'bad', 3: 'and', 4: 'now', 5: 'not', 6: 'happy', 7: 'am', 8: 'all', 9: 'good', 10: 'is', 11: 'was', 12: 'right', 13: 'i', 14: 'or', 15: 'this', 16: 'very', 17: 'earlier'}


In [None]:
def softmax(y):
  return np.exp(y)/(np.sum(np.exp(y)))

## later
def softmax_der(p, y_true):
  ''' y_true shape: output_size,1'''
  return np.where(y_true==1, p-1, p)

<div style="display: flex;">
    <img src="imgs/RNN1.jpg" alt="RNN1" style="width:400px; margin-right: 10px;"/>
    <img src="imgs/RNN2.jpg" alt="RNN2" style="width:400px;"/>
</div>


<div style="display: flex;">
    <img src="imgs/RNN3.jpg" alt="RNN3" style="width:400px; margin-right: 10px;"/>
    <img src="imgs/RNN4.jpg" alt="RNN4" style="width:400px;"/>
</div>


In [None]:
class RNN:
  def __init__(self, vocab_size, hidden_size, output_size):
    self.vocab_size = vocab_size
    self.hidden_size = hidden_size
    self.output_size = output_size

    # W&B
    self.Wxh = np.random.randn(self.hidden_size, self.vocab_size)/1000
    self.Whh = np.random.randn(self.hidden_size, self.hidden_size)/1000
    self.Why = np.random.randn(self.output_size, self.hidden_size)/1000
    self.bh = np.random.randn(self.hidden_size, 1)
    self.by = np.random.randn(self.output_size, 1)

  def forward(self, inputs):
    h = np.zeros((hidden_size, 1))

    # caching for backprop
    self.last_inputs = inputs
    self.hs = {0:h}

    for i,input in enumerate(inputs):
      h = np.tanh(np.dot(self.Wxh,input) + np.dot(self.Whh, h) + self.bh)
      self.hs[i+1] = h

    # final y
    last_key = list(self.hs.keys())[-1]
    self.h_last = self.hs[last_key]
    y = np.dot(self.Why, self.h_last)+self.by

    return y

  def backprop(self, dL_dy, lr):

    # easiest to calculate dL_dWhy and dL_dby - because in many-to-one these 2 happen only at last step

    # y = Why*h_last + by
    # dL_dWhy = dL_dy * dy_dWhy ; dy_dWhy  = h_last
    dL_dWhy = np.dot(dL_dy, self.h_last.T)

    # dL_dby = dL_dy * dy_dby ; dy_dby  = 1
    dL_dby = dL_dy

    # difficult to calculate: dL_dWxh, dL_dWhh and dL_dbh - because in  many-to-one they happen across all timesteps
    # For these 3 - we perform BPTT

    n = len(self.last_inputs)
    dL_dWxh = np.zeros(self.Wxh.shape)
    dL_dWhh = np.zeros(self.Whh.shape)
    dL_dbh = np.zeros(self.bh.shape)

    #dy_dh_last = self.Why  ## last

    # dL_dh = dL_dy * dy_dh ; for last : dy_dh_last = self.Why
    dL_dhn = np.dot(self.Why.T, dL_dy) #(64,1) # basically dL_dhn
    dL_dh = dL_dhn # we will keep updating dL_dh and for last time step it is dL_dhn

    for t in reversed(range(n)):
      temp = (1-self.hs[t+1]**2)*dL_dh # (64,1)
      dL_dbh += temp
      dL_dWhh += temp@self.hs[t].T #(64,64)
      dL_dWxh += temp@self.last_inputs[t].T

      # Next dL/dh = dL/dh * (1 - h^2) * Whh
      dL_dh = self.Whh @ temp

    # clip gradients to prevent exploding and vanishing gradient
    for d in [dL_dWxh, dL_dWhh, dL_dWhy, dL_dbh, dL_dby]:
      np.clip(d, -1, 1, out=d)

    # Update weights and biases using gradient descent.
    self.Whh -= lr * dL_dWhh
    self.Wxh -= lr * dL_dWxh
    self.Why -= lr * dL_dWhy
    self.bh -= lr * dL_dbh
    self.by -= lr * dL_dby



In [None]:
def phrase_to_x(text):
  x = []
  for w in text:
    one_hot = np.zeros((vocab_size,1))
    idx = word_to_idx[w]
    one_hot[idx] = 1
    x.append(one_hot)
  return x

def processData(data):
  X = []
  Y = []
  Y_ = [] # one-hot
  for phrase,sentiment in list(data.items()):
    words = phrase.split(' ')
    x = phrase_to_x(words)
    X.append(x)
    label = int(sentiment)
    Y.append(label)
    y_ = np.zeros((output_size,1))
    y_[label] = 1
    Y_.append(y_)
  return X, Y, Y_


In [None]:
vocab_size = len(word_to_idx)
output_size = 2
hidden_size = 64
train_X, train_Y, train_Y_ = processData(train_data)
test_X, test_Y, test_Y_ = processData(test_data)

In [None]:
def loop(model, X,Y, backprop=True):
  num_correct = 0
  loss = 0
  for x,y_true in zip(X,Y):
    logit = model.forward(x)
    probab = softmax(logit)
    num_correct+=int(np.argmax(probab)==np.argmax(y_true))
    dL_dy = np.where(y_true==1, probab-1, probab)
    loss -= np.log(np.dot(probab.T, y_true)) # -ln(Pc) if c = true class
    if backprop:
      model.backprop(dL_dy=dL_dy, lr=0.02)

  return loss/len(X), num_correct




In [None]:
model = RNN(vocab_size=len(vocab), hidden_size=64, output_size=2)

epochs = 1000
for epoch in range(epochs):
  train_loss, train_acc = loop(model, train_X, train_Y_, True)
  test_loss, test_acc = loop(model, test_X, test_Y_, False)
  if epoch%50==0:
    print(f'Epoch: {epoch}, train_loss = {train_loss}, train_acc = {train_acc}')
    print(f'Epoch: {epoch}, test_loss = {test_loss}, test_acc = {test_acc}')

Epoch: 0, train_loss = [[0.81082529]], train_acc = 23
Epoch: 0, test_loss = [[0.77319327]], test_acc = 10
Epoch: 50, train_loss = [[0.74207345]], train_acc = 21
Epoch: 50, test_loss = [[0.72460972]], test_acc = 10
Epoch: 100, train_loss = [[0.73354868]], train_acc = 25
Epoch: 100, test_loss = [[0.72111949]], test_acc = 10
Epoch: 150, train_loss = [[0.72627744]], train_acc = 22
Epoch: 150, test_loss = [[0.71934677]], test_acc = 10
Epoch: 200, train_loss = [[0.71875362]], train_acc = 23
Epoch: 200, test_loss = [[0.71560538]], test_acc = 9
Epoch: 250, train_loss = [[0.71186055]], train_acc = 29
Epoch: 250, test_loss = [[0.70686889]], test_acc = 10
Epoch: 300, train_loss = [[0.68495242]], train_acc = 30
Epoch: 300, test_loss = [[0.63875268]], test_acc = 11
Epoch: 350, train_loss = [[0.31242298]], train_acc = 49
Epoch: 350, test_loss = [[0.28931403]], test_acc = 18
Epoch: 400, train_loss = [[0.03335168]], train_acc = 58
Epoch: 400, test_loss = [[0.04897648]], test_acc = 20
Epoch: 450, train