# LSTM on self made sequence data

In [1]:
import tensorflow as tf
import numpy as np 

In [2]:
num_classes = 2
batch_size = 128
embedding_dimension = 64
hidden_layer_size = 32
time_steps = 6
element_size = 1

In [3]:
digit_to_word_map = {1:"One",2:"Two", 3:"Three", 4:"Four", 5:"Five",
                     6:"Six",7:"Seven",8:"Eight",9:"Nine"}

digit_to_word_map[0]="PAD"

seqlens = []
even_sentences = []
odd_sentences = []

for i in range(10000):
    rand_seq_len = np.random.choice(range(3, 7))
    seqlens.append(rand_seq_len)
    
    rand_odd_ints = np.random.choice(range(1, 10, 2), rand_seq_len)
    rand_even_ints = np.random.choice(range(2, 10, 2), rand_seq_len)
    
    if rand_seq_len < 6:
        rand_odd_ints = np.append(rand_odd_ints, [0]*(6-rand_seq_len))
        rand_even_ints = np.append(rand_even_ints, [0]*(6-rand_seq_len))
    
    even_sentences.append(" ".join([digit_to_word_map[w] for w in rand_even_ints]))
    odd_sentences.append(" ".join([digit_to_word_map[w] for w in rand_odd_ints]))

data = even_sentences + odd_sentences
seqlens *= 2

In [4]:
even_sentences[0:6]

['Eight Two Four PAD PAD PAD',
 'Six Two Two Six Four Two',
 'Eight Four Four PAD PAD PAD',
 'Six Six Four Eight Four Eight',
 'Six Two Six Four PAD PAD',
 'Four Four Six Six Six PAD']

In [5]:
odd_sentences[:6]

['Nine Three Five PAD PAD PAD',
 'Seven One Seven One Five Seven',
 'Nine Five One PAD PAD PAD',
 'Three Seven Seven Five One Nine',
 'Five Five Seven Nine PAD PAD',
 'One One Seven Three Nine PAD']

In [6]:
word2index_map = {}

index = 0 
for sent in data:
    for word in sent.lower().split():
        if word not in word2index_map:
            word2index_map[word] = index
            index += 1
            
index2word_map = {index: word for word, index in word2index_map.iteritems()}
vocabulary_size = len(index2word_map)

In [7]:
word2index_map, index2word_map

({'eight': 0,
  'five': 7,
  'four': 2,
  'nine': 5,
  'one': 9,
  'pad': 3,
  'seven': 8,
  'six': 4,
  'three': 6,
  'two': 1},
 {0: 'eight',
  1: 'two',
  2: 'four',
  3: 'pad',
  4: 'six',
  5: 'nine',
  6: 'three',
  7: 'five',
  8: 'seven',
  9: 'one'})

In [8]:
labels = [0]*10000 + [1]*10000
for i in range(len(labels)):
    label = labels[i]
    one_hot = [0]*2
    one_hot[label] = 1
    labels[i] = one_hot

data_indices = list(range(len(labels)))
np.random.shuffle(data_indices)

data = np.array(data)[data_indices]

labels = np.array(labels)[data_indices]

seqlens = np.array(seqlens)[data_indices]

train_x = data[:10000]
train_y = labels[:10000]
train_seqlens = seqlens[:10000]

test_x = data[10000:]
test_y = labels[10000:]
test_seqlens = seqlens[10000:]

In [9]:
def generate_batch(batch_size,data_x, data_y, data_seqlens):
    instance_indices = list(range(len(data_x)))
    
    np.random.shuffle(instance_indices)
    batch = instance_indices[:batch_size]
    
    x = [[word2index_map[word] for word in data_x[i].lower().split()]
        for i in batch]
    y = [data_y[i] for i in batch]
    
    seql = [data_seqlens[i] for i in batch]
    
    return x, y, seql

In [10]:
_inputs = tf.placeholder(tf.int32, shape=[None, time_steps])
_labels = tf.placeholder(tf.float32, shape=[None, num_classes])
_seqlens = tf.placeholder(tf.int32, shape=[None])

In [11]:
with tf.name_scope("word_embeddings"):
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size, embedding_dimension], -1.0, 1.0, name="embedding")
    )
    
    embed = tf.nn.embedding_lookup(embeddings, _inputs)

In [12]:
with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, forget_bias=1.0)
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, embed, sequence_length=_seqlens, dtype=tf.float32)

weights = {
    "linear_layer": tf.Variable(tf.truncated_normal([hidden_layer_size, num_classes], mean=0, stddev=0.01))
}

biases = {
    "linear_layer": tf.Variable(tf.truncated_normal([num_classes], mean=0, stddev=0.01))
}

In [13]:
final_output = tf.matmul(states[1], weights["linear_layer"]) + biases["linear_layer"]

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_output, labels=_labels))
optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(loss)

correct_predictions = tf.equal(tf.argmax(final_output, 1), tf.argmax(_labels, 1))
accuracy = (tf.reduce_mean(tf.cast(correct_predictions, tf.float32)))*100

In [14]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for i in range(1000):
        x_batch, y_batch, seq_batch = generate_batch(batch_size, train_x, train_y, train_seqlens)
        
        sess.run(optimizer, feed_dict={_inputs: x_batch, _labels: y_batch, _seqlens: seq_batch})
        
        if i%100 == 0:
            acc, train_loss =  sess.run([accuracy, loss], feed_dict=
                                        {_inputs: x_batch, _labels: y_batch, _seqlens: seq_batch})
            
            print "Iteration: " + str(i) +", Minibatch loss: {:.6f}".format(train_loss)\
            + ", Training Accuracy: {:.5f}".format(acc)
        
    for test_batch in range(5):
        x_test, y_test,seqlen_test = generate_batch(batch_size, test_x,test_y, test_seqlens)
        batch_pred, batch_acc = sess.run([tf.argmax(final_output,1),accuracy], 
                                        feed_dict={
                                            _inputs:x_test, _labels:y_test, _seqlens:seqlen_test
                                        })
        print("Test batch accuracy %d: %.5f" % (test_batch, batch_acc))
    
    x_test, y_test,seqlen_test = generate_batch(batch_size, test_x,test_y, test_seqlens)
    output_example = sess.run([outputs],feed_dict={_inputs:x_test,
                                                   _labels:y_test,
                                                   _seqlens:seqlen_test})

    states_example = sess.run([states[1]],feed_dict={_inputs:x_test, 
                                                     _labels:y_test,
                                                     _seqlens:seqlen_test})


Iteration: 0, Minibatch loss: 0.692232, Training Accuracy: 58.59375
Iteration: 100, Minibatch loss: 0.494559, Training Accuracy: 100.00000
Iteration: 200, Minibatch loss: 0.007550, Training Accuracy: 100.00000
Iteration: 300, Minibatch loss: 0.000129, Training Accuracy: 100.00000
Iteration: 400, Minibatch loss: 0.000007, Training Accuracy: 100.00000
Iteration: 500, Minibatch loss: 0.000002, Training Accuracy: 100.00000
Iteration: 600, Minibatch loss: 0.000001, Training Accuracy: 100.00000
Iteration: 700, Minibatch loss: 0.000001, Training Accuracy: 100.00000
Iteration: 800, Minibatch loss: 0.000001, Training Accuracy: 100.00000
Iteration: 900, Minibatch loss: 0.000000, Training Accuracy: 100.00000
Test batch accuracy 0: 100.00000
Test batch accuracy 1: 100.00000
Test batch accuracy 2: 100.00000
Test batch accuracy 3: 100.00000
Test batch accuracy 4: 100.00000


In [15]:
print seqlen_test[2]
x_test[2], y_test[2]

5


([9, 8, 6, 9, 6, 3], array([0, 1]))

In [16]:
output_example[0][2].shape

(6, 32)

In [17]:
output_example[0][2][:6,0:3]


array([[-0.29690939,  0.44627053, -0.39435387],
       [-0.58720136,  0.71543616, -0.63482565],
       [-0.76689607,  0.82111651, -0.64382738],
       [-0.87321401,  0.89006484, -0.74903172],
       [-0.79940873,  0.84842414, -0.66278774],
       [ 0.        ,  0.        ,  0.        ]], dtype=float32)

In [18]:
states_example[0][2][0:3]



array([-0.79940873,  0.84842414, -0.66278774], dtype=float32)

## stacking multiple RNNS

In [20]:
num_lstm_layers = 2
with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, forget_bias=1.0)
    cell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell]*num_lstm_layers, state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell, embed, sequence_length=_seqlens, dtype=tf.float32)

ValueError: Trying to share variable lstm/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (64, 128) and found shape (96, 128).

In [21]:
final_output = tf.matmul(states[num_lstm_layers-1][1], weights["linear_layer"]) + biases["linear_layer"]

ValueError: Shape must be rank 2 but is rank 1 for 'MatMul_1' (op: 'MatMul') with input shapes: [32], [32,2].