### Task: sorting digits (e.g. [3,1,2] -> [1,2,3])

* Bidirectional LSTM (treat it like NER-tagging)

In [1]:
# Add custom import path

import sys
sys.path.insert(0, '/home/jacobsuwang/Documents/UTA2018/NEURAL-NETS/ATTENTION/CODE/01-import-folder')

### MAKING DATA

In [74]:
import utils
import random
import numpy as np

VOCAB = set(['PAD','EOS','1','2','3','4','5','6','7','8','9','0'])
NUMBERS = ['1','2','3','4','5','6','7','8','9','0']
MAX_LEN = len(NUMBERS) + 2
WORD2IDX = {'PAD':0,'EOS':1,'1':2,'2':3,'3':4,'4':5,'5':6,'6':7,'7':8,'8':9,'9':10,'0':11}
IDX2WORD = {idx:word for word,idx in WORD2IDX.iteritems()}


def random_datum(n):
    input_seq = list(np.random.choice(NUMBERS, n, replace=False)) 
        # e.g. ['5', '6', '3', '9', '1'].
    sorted_seq = sorted(input_seq)
    output_seq = [input_seq.index(word) for word in sorted_seq] + [MAX_LEN-1]*(MAX_LEN-n)
        # index in ascending.
        # e.g. [4, 2, 0, 1, 3], for the input above.
    input_seq = input_seq + ['PAD']*(MAX_LEN-n)
    return input_seq, output_seq

def random_batch(batch_size):
    input_seqs, output_seqs = [], []
    seq_lengths = []
    for _ in range(batch_size):
        n = np.random.randint(2, MAX_LEN-2) # i.e. len(NUMBERS)
        input_seq, output_seq = random_datum(n)
        input_seqs.append(encode_seq(input_seq))
        output_seqs.append(output_seq)
        seq_lengths.append(n)
    return input_seqs, output_seqs, seq_lengths

def encode_seq(seq):
    return [WORD2IDX[word] for word in seq]

def decode_seq(seq):
    return [IDX2WORD[idx] for idx in seq]

In [75]:
a,b,c = random_batch(10)
for a_ in a:
    print decode_seq(a_)

['7', '5', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['4', '9', '8', '0', '6', '1', '3', '7', '5', 'PAD', 'PAD', 'PAD']
['4', '5', '3', '1', '2', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['1', '5', '4', '9', '2', '7', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['2', '7', '0', '5', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['6', '4', '8', '1', '3', '9', '7', '0', '2', 'PAD', 'PAD', 'PAD']
['2', '0', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['9', '3', '1', '4', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['7', '6', '1', '0', '4', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['0', '2', '8', '3', '7', '1', '6', '9', 'PAD', 'PAD', 'PAD', 'PAD']


In [70]:
random_batch(10)

([[6, 4, 7, 10, 2, 9, 5, 0, 0, 0, 0, 0],
  [7, 2, 11, 9, 10, 8, 5, 3, 0, 0, 0, 0],
  [10, 9, 8, 7, 11, 2, 0, 0, 0, 0, 0, 0],
  [7, 10, 11, 8, 4, 6, 0, 0, 0, 0, 0, 0],
  [11, 4, 7, 3, 6, 0, 0, 0, 0, 0, 0, 0],
  [7, 2, 4, 8, 6, 3, 10, 11, 5, 0, 0, 0],
  [3, 11, 6, 10, 7, 4, 9, 5, 0, 0, 0, 0],
  [7, 11, 4, 5, 2, 0, 0, 0, 0, 0, 0, 0],
  [8, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [8, 4, 9, 2, 7, 3, 0, 0, 0, 0, 0, 0]],
 [[4, 1, 6, 0, 2, 5, 3, 11, 11, 11, 11, 11],
  [2, 1, 7, 6, 0, 5, 3, 4, 11, 11, 11, 11],
  [4, 5, 3, 2, 1, 0, 11, 11, 11, 11, 11, 11],
  [2, 4, 5, 0, 3, 1, 11, 11, 11, 11, 11, 11],
  [0, 3, 1, 4, 2, 11, 11, 11, 11, 11, 11, 11],
  [7, 1, 5, 2, 8, 4, 0, 3, 6, 11, 11, 11],
  [1, 0, 5, 7, 2, 4, 6, 3, 11, 11, 11, 11],
  [1, 4, 2, 3, 0, 11, 11, 11, 11, 11, 11, 11],
  [2, 0, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11],
  [3, 5, 1, 4, 0, 2, 11, 11, 11, 11, 11, 11]],
 [7, 8, 6, 6, 5, 9, 8, 5, 3, 6])

### MAKING MODEL

In [58]:
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple

In [102]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

vocab_size = len(VOCAB)
embedding_size = 20
hidden_size = 20

inputs = tf.placeholder(shape=(None,MAX_LEN), dtype=tf.int32, name='inputs')
inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='inputs_length') 
targets = tf.placeholder(shape=(None,MAX_LEN), dtype=tf.int32, name='targets')

embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), dtype=tf.float32)
inputs_embedded = tf.nn.embedding_lookup(embeddings, inputs)

fw_cell = LSTMCell(hidden_size)
bw_cell = LSTMCell(hidden_size)
((fw_outputs,bw_outputs), 
 (fw_final_state,bw_final_state)) = ( 
        tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell,
                                        cell_bw=bw_cell,
                                        inputs=inputs_embedded,
                                        sequence_length=inputs_length,
                                        dtype=tf.float32)
    )
outputs = tf.concat((fw_outputs, bw_outputs), 2)
dense_inputs = tf.reshape(outputs, [-1, embedding_size*2])

W = tf.get_variable('W', shape=(embedding_size*2, MAX_LEN),
                    initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=(MAX_LEN,)), name='b')

logits = tf.nn.xw_plus_b(dense_inputs, W, b) # (batch_size*MAX_LEN, 12)
pred_probs = tf.nn.softmax(logits) # (batch_size*MAX_LEN, 12)
y_pred = tf.cast(tf.argmax(pred_probs, dimension=1), tf.int32) # (batch_size*MAX_LEN,)
y_pred_structured = tf.reshape(y_pred, [-1, MAX_LEN])
y_true = tf.reshape(targets, shape=[-1]) # (batch_size*MAX_LEN,)

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_true))

correct = tf.cast(tf.equal(y_pred, y_true), tf.int32)
mask = tf.cast(tf.not_equal(y_true, MAX_LEN-1), tf.int32) # now MAX_LEN=12, padding with 11 in target.
total_seqlen = tf.cast(tf.reduce_sum(inputs_length), tf.float32)
correct = tf.multiply(correct, mask)
accuracy = tf.cast(tf.reduce_sum(correct), tf.float32) / total_seqlen
train = tf.train.AdamOptimizer().minimize(loss)

num_epochs = 50
num_batches_per_epoch = 10000
batch_size = 5
verbose = 5000

sess.run(tf.global_variables_initializer())

for e in range(num_epochs):
    print 'Epoch: {}'.format(e+1)
    print 
    loss_track = []
    accuracy_track = []
    for i in range(num_batches_per_epoch):
        inputs_, targets_, inputs_length_ = random_batch(batch_size)
        fd = feed_dict={inputs:inputs_, targets:targets_, inputs_length:inputs_length_}
        loss_, train_, accuracy_ = sess.run([loss, train, accuracy], fd)
        loss_track.append(loss_)
        accuracy_track.append(accuracy_)
        if i%verbose==0:
            print 'Average Loss after {} batches = {}'.format(i,np.mean(loss_track))
    print
    print 'Epoch {} Average Loss = {} | Accuracy = {}'.format(e+1,np.mean(loss_track),np.mean(accuracy_track))
    preds = sess.run(y_pred_structured, fd)
    print
    print 'Test results:'
    for input_,pred,target_ in zip(inputs_,preds,targets_):
        print 'input = ', decode_seq(input_)
        print 'pred = ', pred
        print 'target = ', target_
        print

Epoch: 1

Average Loss after 0 batches = 2.50531888008
Average Loss after 5000 batches = 1.0928106308

Epoch 1 Average Loss = 0.90045517683 | Accuracy = 0.361017435789

Test results:
input =  ['4', '3', '0', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 2  1  0 11 11 11 11 11 11 11 11 11]
target =  [2, 1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['2', '6', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  0 11 11 11 11 11 11 11 11 11 11]
target =  [0, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['6', '5', '2', '9', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 4  3  2  0  0 11 11 11 11 11 11 11]
target =  [4, 2, 1, 0, 3, 11, 11, 11, 11, 11, 11, 11]

input =  ['4', '6', '1', '9', '5', '7', '8', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 2  1  0  1  4  5  5 11 11 11 11 11]
target =  [2, 0, 4, 1, 5, 6, 3, 11, 11, 11, 11, 11]

input =  ['0', '5', '2', '7', '3', '8', '9', '1', 'PAD', 'PAD', 'PAD

Average Loss after 5000 batches = 0.56467294693

Epoch 9 Average Loss = 0.564023435116 | Accuracy = 0.495810896158

Test results:
input =  ['8', '1', '0', '2', '7', '6', '5', '9', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 2  1  3  5  6  4  0  7 11 11 11 11]
target =  [2, 1, 3, 6, 5, 4, 0, 7, 11, 11, 11, 11]

input =  ['4', '9', '2', '6', '8', '0', '5', '3', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 5  4  3  6  5  4  1  1 11 11 11 11]
target =  [5, 2, 7, 0, 6, 3, 4, 1, 11, 11, 11, 11]

input =  ['0', '4', '5', '2', '3', '6', '1', '9', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  5  5  2  2  5  2  7 11 11 11 11]
target =  [0, 6, 3, 4, 1, 2, 5, 7, 11, 11, 11, 11]

input =  ['1', '7', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  1 11 11 11 11 11 11 11 11 11 11]
target =  [0, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['6', '7', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  1 11 11 11 11 11 11 11 11 11 11]
target =  [0

Average Loss after 5000 batches = 0.542657792568

Epoch 17 Average Loss = 0.541326582432 | Accuracy = 0.506732583046

Test results:
input =  ['5', '4', '8', '0', '6', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 3  1  0  1  0 11 11 11 11 11 11 11]
target =  [3, 1, 0, 4, 2, 11, 11, 11, 11, 11, 11, 11]

input =  ['4', '0', '1', '3', '8', '7', '5', '6', '2', 'PAD', 'PAD', 'PAD']
pred =  [ 1  2  8  7  6  6  5  5  4 11 11 11]
target =  [1, 2, 8, 3, 0, 6, 7, 5, 4, 11, 11, 11]

input =  ['9', '4', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  1 11 11 11 11 11 11 11 11 11 11]
target =  [1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['4', '7', '3', '8', '6', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  0  2  0  1  3 11 11 11 11 11 11]
target =  [5, 2, 0, 4, 1, 3, 11, 11, 11, 11, 11, 11]

input =  ['7', '6', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  1 11 11 11 11 11 11 11 11 11 11

Average Loss after 5000 batches = 0.536569774151

Epoch 25 Average Loss = 0.535996496677 | Accuracy = 0.511342585087

Test results:
input =  ['0', '9', '6', '1', '7', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  2  1  4  2 11 11 11 11 11 11 11]
target =  [0, 3, 2, 4, 1, 11, 11, 11, 11, 11, 11, 11]

input =  ['6', '1', '8', '2', '0', '3', '4', '7', '5', 'PAD', 'PAD', 'PAD']
pred =  [ 4  3  2  6  8  8  7  2  2 11 11 11]
target =  [4, 1, 3, 5, 6, 8, 0, 7, 2, 11, 11, 11]

input =  ['0', '4', '5', '6', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  1  1  1 11 11 11 11 11 11 11 11]
target =  [0, 1, 2, 3, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['2', '8', '0', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  0  0 11 11 11 11 11 11 11 11 11]
target =  [2, 0, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['3', '9', '8', '5', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  0  0  0  2 11 11 11 11 11 11 11]
t

Average Loss after 5000 batches = 0.528970658779

Epoch 33 Average Loss = 0.528125703335 | Accuracy = 0.516998946667

Test results:
input =  ['5', '0', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  0 11 11 11 11 11 11 11 11 11 11]
target =  [1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['4', '5', '2', '6', '8', '0', '7', '3', '1', 'PAD', 'PAD', 'PAD']
pred =  [ 5  8  7  6  1  4  6  4  4 11 11 11]
target =  [5, 8, 2, 7, 0, 1, 3, 6, 4, 11, 11, 11]

input =  ['8', '3', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 2  1  0 11 11 11 11 11 11 11 11 11]
target =  [2, 1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['1', '5', '0', '8', '4', '7', '6', '9', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 2  0  4  1  4  5  3  7 11 11 11 11]
target =  [2, 0, 4, 1, 6, 5, 3, 7, 11, 11, 11, 11]

input =  ['2', '0', '3', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  0  0 11 11 11 11 11 11 11 11 11]


Average Loss after 5000 batches = 0.524687767029

Epoch 41 Average Loss = 0.525186002254 | Accuracy = 0.516975104809

Test results:
input =  ['1', '7', '2', '6', '4', '5', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  0  4  3  3  1 11 11 11 11 11 11]
target =  [0, 2, 4, 5, 3, 1, 11, 11, 11, 11, 11, 11]

input =  ['1', '0', '4', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 0  0  2 11 11 11 11 11 11 11 11 11]
target =  [1, 0, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['5', '8', '7', '2', '3', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 3  2  1  0  0 11 11 11 11 11 11 11]
target =  [3, 4, 0, 2, 1, 11, 11, 11, 11, 11, 11, 11]

input =  ['4', '2', '5', '3', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  0  1  0 11 11 11 11 11 11 11 11]
target =  [1, 3, 0, 2, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['3', '2', '9', '7', '1', '4', '5', '0', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 5  6  0  4  3  2  2  2 11 11 11 11

Average Loss after 5000 batches = 0.525617539883

Epoch 49 Average Loss = 0.522006750107 | Accuracy = 0.517024099827

Test results:
input =  ['9', '8', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 1  2 11 11 11 11 11 11 11 11 11 11]
target =  [1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]

input =  ['6', '9', '5', '1', '2', '7', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 3  2  1  0  0  0 11 11 11 11 11 11]
target =  [3, 4, 2, 0, 5, 1, 11, 11, 11, 11, 11, 11]

input =  ['4', '7', '5', '9', '6', '3', '2', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 6  5  4  3  3  1  1 11 11 11 11 11]
target =  [6, 5, 0, 2, 4, 1, 3, 11, 11, 11, 11, 11]

input =  ['0', '1', '3', '2', '5', '9', '7', '8', '4', 'PAD', 'PAD', 'PAD']
pred =  [ 0  1  2  8  4  5  6  7  5 11 11 11]
target =  [0, 1, 3, 2, 8, 4, 6, 7, 5, 11, 11, 11]

input =  ['4', '9', '3', '7', '1', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
pred =  [ 4  3  2  0  0 11 11 11 11 11 11 11]
target =  

In [97]:
print targets
print y_true

Tensor("targets:0", shape=(?, 12), dtype=int32)
Tensor("Reshape_2:0", shape=(?,), dtype=int32)


In [95]:
tf.nn.softmax?

In [94]:
pred_probs

<tf.Tensor 'Softmax:0' shape=(?, 12) dtype=float32>

In [90]:
outputs

<tf.Tensor 'concat:0' shape=(?, 12, 40) dtype=float32>

In [91]:
outputs.get_shape()

TensorShape([Dimension(None), Dimension(12), Dimension(40)])

In [88]:
print y_pred
print y_true

Tensor("Cast:0", shape=(?,), dtype=int32)
Tensor("Reshape_2:0", shape=(?,), dtype=int32)


In [93]:
logits

<tf.Tensor 'xw_plus_b:0' shape=(?, 12) dtype=float32>

In [84]:
targets_

[[1, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11],
 [0, 8, 6, 4, 1, 7, 2, 3, 5, 11, 11, 11],
 [0, 1, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11],
 [6, 0, 4, 3, 2, 5, 1, 7, 11, 11, 11, 11],
 [2, 4, 1, 0, 3, 11, 11, 11, 11, 11, 11, 11]]

In [39]:
loss

<tf.Tensor 'Mean:0' shape=() dtype=float32>

In [37]:
pred_probs

<tf.Tensor 'Softmax:0' shape=(?, 10) dtype=float32>

In [36]:
y_pred

<tf.Tensor 'Cast:0' shape=(?,) dtype=int32>

In [5]:
inputs_embedded

<tf.Tensor 'embedding_lookup:0' shape=(?, ?, 20) dtype=float32>

In [9]:
fw_outputs

<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, ?, 20) dtype=float32>

In [11]:
outputs

<tf.Tensor 'concat:0' shape=(?, ?, 40) dtype=float32>

In [14]:
int(outputs.get_shape()[2])

40

In [16]:
dense_inputs

<tf.Tensor 'Reshape:0' shape=(?, 40) dtype=float32>

In [30]:
W.get_shape()

TensorShape([Dimension(40), Dimension(10)])

In [31]:
dense_inputs

<tf.Tensor 'Reshape:0' shape=(?, 40) dtype=float32>

In [33]:
logits

<tf.Tensor 'xw_plus_b:0' shape=(?, 10) dtype=float32>