In [2]:
import random
import numpy as np
from mygrad.nn import RNN

from data.rnn_data import train_data, test_data

In [3]:
np.random.seed(1337)
random.seed(1337)

## Data Preparation

In [19]:
# Check what the text data looks like 
# for the first 10 items
list(train_data.items())[:10]

[('good', True),
 ('bad', False),
 ('happy', True),
 ('sad', False),
 ('not good', False),
 ('not bad', True),
 ('not happy', False),
 ('not sad', True),
 ('very good', True),
 ('very bad', False)]

In [None]:
# binary categories
all_categories = [0, 1]

In [5]:
# get the vocabulary from training data
vocab = list(set([w for text in train_data.keys() for w in text.split()]))
vocab_size = len(vocab)

In [6]:
# dict maps words to idx
word_to_idx = {w: i for i, w in enumerate(vocab)}
# dict maps idx to words
idx_to_word = {i : w for i, w in enumerate(vocab)}

In [9]:
# create one-hot vector that represent a word in
# the vocabulary
def createInputs(text):
    inputs = []
    for w in text.split(' '):
        v = np.zeros((1, vocab_size))
        v[0][word_to_idx[w]] = 1
        inputs.append(v)
        
    return inputs

In [22]:
# We can see what the vector for the word "good" looks like
sample = createInputs('good')

In [23]:
sample

[array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]])]

## Getting RNN Ready

In [24]:
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)] 

In [None]:
def randomTrainingExample():
    category = randomChoice()

In [13]:
input_sz = vocab_size # size of the input vector
hidden_sz = 64 # size of the hidden state
output_sz = 2 # size of rnn output

In [14]:
# init the RNN with input_sz of vocab
rnn = RNN(input_sz, hidden_sz, output_sz)

In [15]:
# check the number of parameters
len(rnn.parameters())

5478

In [20]:
# init the hidden state
hidden = rnn.initHidden()