In [4]:
from data import train_data, test_data

#create vocab (data corpus) 
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print("%d unique words found" % vocab_size)

18 unique words found


In [6]:
#Assign indices to each word
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for i, w in enumerate(vocab)}

#test it
print(word_to_idx['good'])
print(idx_to_word[1])

14
is


In [7]:
#one-hot encoding
import numpy as np

def createInputs(datum) -> list[int]:
    """Creates an array of one-hot vectors for a datum 

    Args:
        datum (str): a particular datum from the train_data 
    """
    inputs = []
    for word in datum.split(' '):
        tmp_arr = np.zeros((vocab_size, 1)) #create an 18 x 1 vector for each word
        tmp_arr[word_to_idx[word]] = 1
        inputs.append(tmp_arr)
    return inputs



In [21]:
from numpy.random import randn

class RNN:

    def __init__(self, input_size, output_size, hidden_size=64):
        self.Whh = randn(hidden_size, hidden_size) / 1000
        self.Wxh = randn(hidden_size, input_size) / 1000
        self.Why = randn(output_size, hidden_size) / 1000

        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs : list[list[int]]):
        """Perform forward pass of the RNN using the given inputs.
        Returns the final output and hidden state.

        Args:
            inputs list[list[int]]: one-hot input vectors
        """

        h = np.zeros((self.Whh.shape[0], 1))

        for i, x in enumerate(inputs):
            h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)

        y = self.Why @ h + self.by

        return y, h

In [None]:
def softmax(xs):
    return np.exp(xs) / sum(np.exp(xs))

rnn = RNN(vocab_size, 2)

inputs = createInputs('i am very good')
out, h = rnn.forward(inputs)
probs = softmax(out)
print(probs)