# About
We are going to build a recurrent neural network with two LSTM layers. Please check out even_or_odd_rnn code first.  
TensorFlow provides tf.contrib.rnn.MultiRNNCell that runs through the LSTM cells.

In [1]:
import numpy as np
import tensorflow as tf

  return f(*args, **kwds)


In [2]:
batch_size = 128
h_size = 128
embedding_dim = 64
n_class = 2
time_steps = 6
element_size = 1

In [3]:
num_to_word_map = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six', 7: 'seven', 8: 'eight', 9: 'nine'}
num_to_word_map[0] = 'PAD'  # zero padding for the sentence shorter than time_steps 6

In [4]:
even_sent = []  # list of sentences composed of even digits
odd_sent = []
seq_lens = []  # list of lengths of sequences

### Generate odd and even sentences

We put a zero padding to the sentences shorter than `time_steps 6` to feed data of consistent shape to basic rnn network. However, this creates an unnecessary noise that the network has to learn additionally. So, make sure tolet `tf.nn.dynamic_rnn()` know where the sentence ends so that it can only consider the necessary features.

In [5]:
for i in range(10000):
    rand_seq_len = np.random.randint(3, 7)  # length of 3 ~ 6
    seq_lens.append(rand_seq_len)
    
    odd_nums = np.random.choice(range(1, 10, 2), rand_seq_len)  # 3, 5, 1, 3
    even_nums = np.random.choice(range(2, 10, 2), rand_seq_len)
    
    if rand_seq_len < 6:
        odd_nums = np.append(odd_nums, [0]*(6-rand_seq_len))
        even_nums = np.append(even_nums, [0]*(6-rand_seq_len))
    
    odd_sent.append(" ".join([num_to_word_map[i] for i in odd_nums]))
    even_sent.append(" ".join([num_to_word_map[i] for i in even_nums]))
    
data = even_sent + odd_sent  # remember this order. even first. odd later!
seq_lens *= 2

##### Test

In [6]:
even_sent[0:10]

['eight two two four PAD PAD',
 'eight eight eight PAD PAD PAD',
 'eight two four two eight four',
 'two eight four two eight PAD',
 'six four eight two PAD PAD',
 'eight eight two six eight PAD',
 'eight four four eight two PAD',
 'four eight two PAD PAD PAD',
 'eight six four PAD PAD PAD',
 'four two two two eight PAD']

## Map the "Token Ids" to each digits
The ids don't have a special meaning in itself. They are just indices randomly put to each digits they meet first. <strong>This means that the id 1 doesn't necessary imply that its value is one.

In [7]:
word2index = {}
idx = 0
for sent in data:
    for word in sent.split():
        if word not in word2index:
            word2index[word] = idx
            idx += 1

index2word = {index:word for word, index in word2index.items()}

In [8]:
vocab_size = len(word2index)

In [9]:
index2word

{0: 'eight',
 1: 'two',
 2: 'four',
 3: 'PAD',
 4: 'six',
 5: 'five',
 6: 'three',
 7: 'one',
 8: 'nine',
 9: 'seven'}

## Make one-hot labels and split data into train and test datasets

In [10]:
labels = [1]*10000 + [0]*10000 # [even*10000, odd*10000]. Remember the order of data above? Even first. Odd later

for i in range(len(labels)):
    label = labels[i]
    one_hot = [0, 0]
    one_hot[label] = 1  # if label == 1, then one_hot=[0, 1], which is even.
    labels[i] = one_hot
    
indices = list(range(len(data)))
np.random.shuffle(indices)
    
data = np.array(data)[indices]
labels = np.array(labels)[indices]
seq_lens = np.array(seq_lens)[indices]

num_test = len(data) // 10  # train: test = 9: 1

train_x = data[num_test:]
train_y = labels[num_test:]
train_seq_lens = seq_lens[num_test:]

test_x = data[:num_test]
test_y = labels[:num_test]
test_seq_lens = seq_lens[:num_test]

In [11]:
def get_sent_batch(batch_size, data_x, data_y, data_seq_lens):
    temp_indices = list(range(len(data_x)))
    np.random.shuffle(temp_indices)
    batch_indices = temp_indices[:batch_size]
    x = [[word2index[word] for word in data_x[i].split()] for i in batch_indices]
    y = [data_y[i] for i in batch_indices]
    seqlens = [data_seq_lens[i] for i in batch_indices]
    return x, y, seqlens

In [12]:
X = tf.placeholder(tf.int32, [batch_size, time_steps], name='X')
Y = tf.placeholder(tf.float32, [batch_size, n_class], name='Y')

_seqlens = tf.placeholder(tf.int32, [batch_size])

## Embed the words to vectors

In [13]:
with tf.name_scope('embeddings'):
    embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_dim], -1., 1., name='embedding'))
    embed = tf.nn.embedding_lookup(embeddings, X)

In [16]:
n_LSTM_layers = 2
with tf.variable_scope('LSTM'):
    lstm_cells = [tf.contrib.rnn.BasicLSTMCell(h_size, forget_bias=1.) for _ in range(n_LSTM_layers)]
    cell = tf.contrib.rnn.MultiRNNCell(lstm_cells, state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell, embed, sequence_length=_seqlens, dtype=tf.float32)
    W = tf.get_variable('W_linear', [h_size, n_class], initializer=tf.contrib.layers.xavier_initializer())
    b = tf.get_variable('b_linear', [n_class], initializer=tf.zeros_initializer())
    
    logits = tf.matmul(states[n_LSTM_layers-1][1], W) + b
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))

In [17]:
train_op = tf.train.AdamOptimizer().minimize(loss)

In [18]:
correct_preds = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
acc_op = tf.reduce_mean(tf.cast(correct_preds, tf.float32)) * 100

In [19]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(1000):
        x_batch, y_batch, seqlen_batch = get_sent_batch(batch_size, train_x, train_y, train_seq_lens)
        sess.run(train_op, feed_dict={X: x_batch, Y: y_batch, _seqlens: seqlen_batch})
        
        if not epoch % 100:
            _loss, acc = sess.run([loss, acc_op], feed_dict={X: x_batch, Y: y_batch, _seqlens: seqlen_batch})
            print('Epoch: ', epoch, ' Loss: ', _loss, ' Acc: ', acc)
    
    for i in range(5):
        x_test, y_test, seqlen_test = get_sent_batch(batch_size, test_x, test_y, test_seq_lens)
        pred, test_acc = sess.run([tf.argmax(logits, 1), acc_op], feed_dict={X: x_test, Y: y_test, _seqlens: seqlen_test})
        print('Acc: ', acc)
    
    output_ex, state_ex = sess.run([outputs, states], feed_dict={X: x_test, Y: y_test, _seqlens: seqlen_test})
    print('output: ', output_ex)
    print('state: ', state_ex)

Epoch:  0  Loss:  0.6586384  Acc:  92.1875
Epoch:  100  Loss:  5.045624e-05  Acc:  100.0
Epoch:  200  Loss:  2.7901591e-05  Acc:  100.0
Epoch:  300  Loss:  2.6127353e-05  Acc:  100.0
Epoch:  400  Loss:  2.0469604e-05  Acc:  100.0
Epoch:  500  Loss:  1.0959508e-05  Acc:  100.0
Epoch:  600  Loss:  8.933061e-06  Acc:  100.0
Epoch:  700  Loss:  7.7735995e-06  Acc:  100.0
Epoch:  800  Loss:  5.2032083e-06  Acc:  100.0
Epoch:  900  Loss:  3.9003335e-06  Acc:  100.0
Acc:  100.0
Acc:  100.0
Acc:  100.0
Acc:  100.0
Acc:  100.0
output:  [[[ 7.9929784e-02  6.8176682e-03  5.7104353e-02 ...  3.5779770e-02
   -3.0442338e-02 -9.0788461e-02]
  [ 3.1010464e-01 -2.5449570e-03  2.3223484e-01 ...  2.3181106e-01
   -8.2969993e-02 -3.5609621e-01]
  [ 6.6815746e-01 -2.2718480e-03  5.2944988e-01 ...  6.4440733e-01
   -7.6559059e-02 -6.7584026e-01]
  [ 8.8369554e-01 -4.3445607e-04  7.3655599e-01 ...  8.9715606e-01
   -5.2254144e-02 -8.5723937e-01]
  [ 9.4818819e-01 -7.6858414e-05  8.3095914e-01 ...  9.6909106e