# RNN model for Sentiment Analysis

Based on the earlier preprocessing technique I have implemented a very basic RNN model in order to get an intuition and implementational details of RNN using tensorflow.

## Load Dependencies

In [1]:
import tensorflow as tf
from tqdm import tqdm, trange
import numpy as np

In [66]:
# load the saved numpy models of dataset
train = np.load('data/train_set.npz')
# reviews in form of their word indices
train_X = train['train_X']
# sentiment for reviews
train_y = train['train_y'].reshape(-1,1)
# load numpy aray containing word vectors
embed = np.load('data/embedding.npz')['embed']

The dataset is in this form 

**train_x**: (25000, seq_length)
```
[
[2, 435, 23, 34, 234, 324, 0,  0],
[1, 2,   43, 67, 23 , 20,  21, 0]
[3, 4,   2,  4,  6,   234, 45, 324],
]
```

**train_y**: (25000, 1)
```
[1, 1, 0, 0, 1]
```

**embed**: (voabulary_size, 200)
```
[
[0.06, ................. 0.04],
[0.1, 0.23 ....... 0.01, 0.06],
]
```

In [67]:
# one-hot encoding of class labels
train_y = np.concatenate([train_y, 1-train_y], axis=1)

In order to classify we use 2 nodes in output layer thus the train_y array is expanded
```
[[1,0],
 [1,0],
 [0,1],
 [1,0]]

```

In [83]:
# length of the sequences
seq_length = train_X.shape[1]
# size of each training batch
batch_size = 10
# number of iterations in training
epochs = 1000
# learning rate of optimizer
learning_rate = 0.1
# shape of input layer
input_shape = 200
# shape of hidden layer
hidden_shape = 256
# shape of output layer
output_shape = 2

In [23]:
# create mini batches for training
def get_batch(x, y, batch_size, random=False, start=0, end=0):
    if random:
        # shuffled indices of batch_size
        idx = np.random.choice(range(len(x)), size=batch_size, replace=False)
    else:
        # unshuffled indices of batch_size
        idx = np.arange(start, end)
    return x[idx], y[idx]

In [80]:
tf.reset_default_graph()
with tf.Graph().as_default() as graph:
    
    # parameters for RNN
    W_xh = tf.Variable(
                        tf.random_normal(mean=0.0, stddev=0.1, shape=[input_shape, hidden_shape]),
                        dtype=tf.float32
                    )
    
    W_hh = tf.Variable(
                        tf.random_normal(mean=0.0, stddev=0.1, shape=[hidden_shape, hidden_shape]),
                        dtype=tf.float32
                    )
    
    W_yh = tf.Variable(
                        tf.random_normal(mean=0.0, stddev=0.1, shape=[hidden_shape, output_shape]),
                        dtype=tf.float32
                    )
    
    # TODO : add bias
    
    # embedding tensors which contains word vectors
    embeds = tf.constant(embed, dtype=tf.float32)
    
    # placeholder for inputs (inputs are indices of words in sequences)
    X = tf.placeholder(shape=[None, seq_length], dtype=tf.int32)
    Y = tf.placeholder(shape=[None, output_shape], dtype=tf.float32)
    
    # placeholder for initial hidden state
    h_in = tf.placeholder(shape=[None, hidden_shape], dtype=tf.float32)
    
    # inputs in form of vectors (inputs are word vectors for words in sequences)
    X_embed = tf.nn.embedding_lookup(embeds, X)
    
    # the dimension of embedding vector is [batch_size, sequence_length, input_shape]
    # we need to convert it into [sequence_length, batch_size, input_shape]
    # this is done in order to perform the time unfolding of the RNN graph which is performed by tf.scan() below
    X_embed = tf.transpose(X_embed, [1, 0, 2])
    
    
    # define RNN
    def RNN(h_prev, x_t):
        # reshape input batch of words
        x_t = tf.reshape(x_t, [batch_size, input_shape])
        
        z_t = tf.matmul(x_t, W_xh) + tf.matmul(h_prev, W_hh)
        h_t = tf.tanh(z_t)
        
        # reshape the hidden state output 
        h_t = tf.reshape(h_t, [batch_size, hidden_shape])
        return h_t
    
    
    # tf.scan() function is used to perform an iterative RNN operation over the time steps
    # in this case the time unfolding of RNN is done upto sequence length
    # tf.scan() function performs RNN operation on a every value of X_embed
    # by using the returned value from the previous time step which is the hidden state here
    # the final output is the hidden state for every time step
    h_out = tf.scan(RNN, X_embed, initializer=h_in)
    
    # we only need take the hidden state output from the last time step h_out[-1]
    # this is fed into a softmax layer
    preds = tf.matmul(h_out[-1], W_yh)
    
    # mean square loss function
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=preds))
    #loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(preds, Y))))
    
    # Adam optimizer for backpropagation
    optimize_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    with tf.Session() as sess:
        # initialize variables
        sess.run(tf.global_variables_initializer())
        
        # initialize hidden state
        h_init = np.zeros((batch_size, hidden_shape))
        
        # iterate
        for i in trange(epochs):
            
            # generate batches randomly form the train set
            batch_X, batch_y = get_batch(train_X, train_y, batch_size, random=True)
            
            # run the graph for optimize_op and calculate loss
            _, cost, pred = sess.run([optimize_op, loss, preds], {X:batch_X, Y:batch_y, h_in:h_init})
            
            if i%100==0:
                print(cost)

  0%|          | 1/1000 [00:02<34:28,  2.07s/it]

1.03355


 10%|█         | 101/1000 [04:01<36:56,  2.47s/it]

0.706189


 20%|██        | 201/1000 [08:00<29:27,  2.21s/it]

5.93933


 30%|███       | 301/1000 [12:00<25:25,  2.18s/it]

1.04647


 40%|████      | 401/1000 [16:06<24:35,  2.46s/it]

2.52397


 50%|█████     | 501/1000 [20:14<20:50,  2.51s/it]

4.14904


 60%|██████    | 601/1000 [24:34<16:39,  2.50s/it]

3.77803


 70%|███████   | 701/1000 [28:43<12:59,  2.61s/it]

1.77753


 80%|████████  | 801/1000 [32:32<07:21,  2.22s/it]

4.51753


 90%|█████████ | 901/1000 [36:23<04:01,  2.44s/it]

1.68995


100%|██████████| 1000/1000 [40:20<00:00,  2.42s/it]
