# Intro to Recurrent Neural Nets (RNNs)

Reference: https://victorzhou.com/blog/intro-to-rnns/

## Instructions

1. Create Virtual Environment: `python3 -m venv datascience-venv`
2. Set Virtual Environment: `source datascience-venv/bin/activate`
3. Install JupyterLab in your Virtual Env using pip: `pip3 install jupyterlab`
4. Install dependencies (`numpy`, `pandas`, `scikit-learn`) into the virtual environment
   * `pip3 install pandas`, `pip3 install scikit-learn`
5. Add your Virtual Environment as a kernel to Jupyterlab: `python3 -m ipykernel install --user --name=datascience-venv`
6. Start JupyterLab from the virtual environment: `jupyter-lab --notebook-dir <location of your notebooks>`
7. Make sure your set your Virtual Env's kernel in the notebook that you're using

In [53]:
import sys
import pandas as pd
import numpy as np
from test_data.rnns_testdata import train_data, test_data
from functools import reduce

In [31]:
# Reduce all training data into set of unique words
# Note: the lambda does x + y - as [1,2,3] + [4,5,6] appends 2 lists together
vocabulary = list(set(reduce(
    lambda list_elem1, list_elem2: list_elem1+list_elem2,
    [key.split(' ') for key in train_data.keys()], 
    []
)))
assert len(vocabulary) == 18

In [59]:
train_data

{'good': True,
 'bad': False,
 'happy': True,
 'sad': False,
 'not good': False,
 'not bad': True,
 'not happy': False,
 'not sad': True,
 'very good': True,
 'very bad': False,
 'very happy': True,
 'very sad': False,
 'i am happy': True,
 'this is good': True,
 'i am bad': False,
 'this is bad': False,
 'i am sad': False,
 'this is sad': False,
 'i am not happy': False,
 'this is not good': False,
 'i am not bad': True,
 'this is not sad': True,
 'i am very happy': True,
 'this is very good': True,
 'i am very bad': False,
 'this is very sad': False,
 'this is very happy': True,
 'i am good not bad': True,
 'this is good not bad': True,
 'i am bad not good': False,
 'i am good and happy': True,
 'this is not good and not happy': False,
 'i am not at all good': False,
 'i am not at all bad': True,
 'i am not at all happy': False,
 'this is not at all sad': True,
 'this is not at all happy': False,
 'i am good right now': True,
 'i am bad right now': False,
 'this is bad right now': Fa

In [58]:
map_vocab_to_idx

{'bad': 0,
 'i': 1,
 'sad': 2,
 'earlier': 3,
 'all': 4,
 'right': 5,
 'happy': 6,
 'good': 7,
 'is': 8,
 'this': 9,
 'very': 10,
 'and': 11,
 'now': 12,
 'or': 13,
 'am': 14,
 'was': 15,
 'not': 16,
 'at': 17}

In [60]:
# Build a map of idx to word in the vocab
map_idx_to_vocab = {x[0]: x[1] for x in list(enumerate(vocabulary))}

# Build a map of word in the vocab to idx
map_vocab_to_idx = {x[1]: x[0] for x in list(enumerate(vocabulary))}

# Create one-hot encodings for these 18 features based on the input training data
feature_matrix = np.zeros(shape=(len(train_data), len(vocabulary)))
assert feature_matrix.shape == (58, 18)

train_data_l = list(train_data.keys())
for _iter in range(len(train_data)):
    for _elem in train_data_l[_iter].split(' '):
        feature_matrix[_iter][map_vocab_to_idx.get(_elem)] = 1
        
if False:
    np.set_printoptions(threshold=sys.maxsize)
    print(feature_matrix)