##  Ptr-Net second pass

* Input: accident and earthquake dataset scrambled documents.
* Output: ordered documents.
* Gong et al. (2016)

In [1]:
# Add custom import path

import sys
sys.path.insert(0, '/home/jacobsuwang/Documents/UTA2018/NEURAL-NETS/ATTENTION/CODE/01-import-folder')

### LOAD DATA

In [2]:
import utils
import dill
import random
import numpy as np

bl08_path = '/home/jacobsuwang/Documents/UTA2018/NEURAL-NETS/ATTENTION/DATA/COHERENCE/data1-train-encoded-data.p'

data_dict = dill.load(open(bl08_path, 'rb'))

print data_dict['readme']


README

@ 'tar-order':targets_orders

List of sentence order lists; each list = integers indexing permuted sentences in the doc.

@ 'inp-encode':inputs_encoded

List of documents; each doc = a list of sentences; each sent = a list of word indices.

@ 'inp-slen':inputs_sent_lengths

List of length info of documents; each info = a list of sentence lengths.

@ 'w-indexer':word_indexer

Indexer() class. word <-> word index.

@ 'idx-emb':idx2emb

dict() class. index <-> glove embeddings.

@ 'glove-init':glove_init

Initializer of embedding matrix. 



In [6]:
print data_dict['inp-encode'][0]

[[2, 3, 4, 3, 5, 6, 7, 8, 9, 10, 6, 11, 12, 13], [14, 15, 4, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 10, 19, 32, 6, 11, 33, 13], [34, 30, 35, 36, 37, 38, 39, 40, 41, 42, 6, 15, 29, 43, 19, 44, 45, 37, 46, 13], [11, 33, 47, 6, 48, 49, 50, 51, 52, 53, 37, 25, 28, 37, 54, 15, 55, 56, 16, 57, 58, 18, 59, 37, 42, 60, 61, 13], [53, 62, 43, 63, 14, 64, 65, 15, 51, 42, 66, 67, 30, 68, 42, 69, 70, 71, 13], [4, 47, 72, 73, 74, 51, 75, 76, 74, 77, 78, 79, 21, 3, 80, 81, 82, 83, 42, 84, 85, 13], [16, 86, 3, 87, 18]]


In [4]:
max_sent, max_doc = 0, 0
for doc in data_dict['inp-encode']:
    if len(doc)>max_doc:
        max_doc = len(doc)
    for sent in doc:
        if len(sent)>max_sent:
            max_sent = len(sent)
print max_sent, max_doc

61 26


In [161]:
# Padding inputs

MAX_SENT = 61
MAX_DOC = 26

data_inputs = []

def pad_doc(doc):
    padded_doc = []
    doc_length = len(doc)
    for sent in doc:
        padded_sent = sent + [0]*(MAX_SENT-len(sent))
        padded_doc.append(padded_sent)
    padded_doc += [[0]*MAX_SENT]*(MAX_DOC-doc_length)
    return padded_doc, doc_length

def pad_sents_length(slen):
    return slen + [0]*(MAX_DOC-len(slen))

def pad_tars_order(tord):
    return tord + [0]*3 # +1 EOS, +2 PADs

### MAKE MODEL

In [3]:
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple

In [181]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

sent_emb_size = 20
doc_enc_emb_size = sent_emb_size*2
doc_dec_emb_size = doc_enc_emb_size*2

sent_vocab_size = MAX_DOC + 2 # 26 + PAD + EOS.

doc_inputs = tf.placeholder(tf.int32, shape=[MAX_DOC, MAX_SENT], name='doc-inputs')
    # MAX_DOC: pseudo-batch-size
    # MAX_SENT: pseudo-max-time.
doc_sents_length = tf.placeholder(tf.int32, shape=[MAX_DOC], name='doc-sents-length')

embeddings = tf.get_variable('word-embeddings', shape=data_dict['glove-init'].shape, # (1809,300)
                             initializer=tf.constant_initializer())
glove_feed = tf.placeholder(tf.float32, shape=data_dict['glove-init'].shape)
glove_init = embeddings.assign(glove_feed)
doc_inputs_embedded = tf.transpose(tf.nn.embedding_lookup(embeddings, doc_inputs),[1,0,2]) 
    # (61, 26, 300), (mt,bc,emb), time-major

with tf.variable_scope('sent-enc'):
    sent_enc_cell = LSTMCell(sent_emb_size)
    ((sent_enc_fw_outputs,sent_enc_bw_outputs), 
     (sent_enc_fw_final_state,sent_enc_bw_final_state)) = ( 
            tf.nn.bidirectional_dynamic_rnn(cell_fw=sent_enc_cell,
                                            cell_bw=sent_enc_cell,
                                            inputs=doc_inputs_embedded,
                                            sequence_length=doc_sents_length,
                                            dtype=tf.float32, time_major=True)
        )
    sent_enc_outputs = tf.concat((sent_enc_fw_outputs,sent_enc_bw_outputs), 2) 
        # <tf.Tensor 'sent-enc/concat:0' shape=(61, 26, 40) dtype=float32>
    sent_enc_final_state_c = tf.concat((sent_enc_fw_final_state.c,sent_enc_bw_final_state.c), 1)
    sent_enc_final_state_h = tf.concat((sent_enc_fw_final_state.c,sent_enc_bw_final_state.h), 1)
    sent_enc_final_state = LSTMStateTuple(
        c=sent_enc_final_state_c,
        h=sent_enc_final_state_h
    )    
        # LSTMStateTuple(c=<tf.Tensor 'sent-enc/concat_1:0' shape=(?, 40) dtype=float32>, 
        #                h=<tf.Tensor 'sent-enc/concat_2:0' shape=(?, 40) dtype=float32>)
        # .h should be (bc=MAX_DOC,emb), but why it gives "?"?
        
## TEST ##

# sess.run(tf.global_variables_initializer())
# doc_inputs_, _ = pad_doc(data_dict['inp-encode'][0]) # _ is doc_length, used later.
# doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][0])
# fd = {doc_inputs:doc_inputs_, doc_sents_length:doc_sents_length_,
#       glove_feed:data_dict['glove-init']}
# t1, t2 = sess.run([sent_enc_outputs, sent_enc_final_state], feed_dict=fd)

# assert 1==0
        
doc_final_embedded = tf.expand_dims(sent_enc_final_state.h, 1)
    # <tf.Tensor 'ExpandDims:0' shape=(?, 1, 40) dtype=float32>
    # (mt=#sents in doc, bc=1, emb)
    # e.g. (26, 1, 40)
doc_length = tf.placeholder(tf.int32, shape=[1], name='doc-length')
# doc_targets = tf.placeholder(tf.int32, shape=[MAX_DOC], name='doc-targets')
doc_targets = tf.placeholder(tf.int32, shape=[None, 1], name='doc-targets') # [mt,bc]
# data_dict['tar-order'][0]

with tf.variable_scope('doc-enc'):
    doc_enc_cell = LSTMCell(doc_enc_emb_size)
    ((doc_enc_fw_outputs,doc_enc_bw_outputs), 
     (doc_enc_fw_final_state,doc_enc_bw_final_state)) = ( 
            tf.nn.bidirectional_dynamic_rnn(cell_fw=doc_enc_cell,
                                            cell_bw=doc_enc_cell,
                                            inputs=doc_final_embedded,
                                            sequence_length=doc_length,
                                            dtype=tf.float32, time_major=True)
        )    
    doc_enc_outputs = tf.concat((doc_enc_fw_outputs,doc_enc_bw_outputs), 2) 
        # <tf.Tensor 'doc-enc/concat:0' shape=(?, 1, 80) dtype=float32>
    doc_enc_final_state_c = tf.concat((doc_enc_fw_final_state.c,doc_enc_bw_final_state.c), 1)
    doc_enc_final_state_h = tf.concat((doc_enc_fw_final_state.c,doc_enc_bw_final_state.h), 1)
    doc_enc_final_state = LSTMStateTuple(
        c=doc_enc_final_state_c,
        h=doc_enc_final_state_h
    )
        # LSTMStateTuple(c=<tf.Tensor 'doc-enc/concat_1:0' shape=(?, 80) dtype=float32>, 
        #                h=<tf.Tensor 'doc-enc/concat_2:0' shape=(?, 80) dtype=float32>)
        # .h should be (bc, emb)
        
## TEST ##

# sess.run(tf.global_variables_initializer())
# doc_inputs_, _ = pad_doc(data_dict['inp-encode'][0]) # _ is doc_length, used later.
# doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][0])
# doc_length_ = [len(data_dict['inp-encode'][0])] # NB: must be [1]!!!
# fd = {doc_inputs:doc_inputs_, doc_sents_length:doc_sents_length_,
#       doc_length:doc_length_,
#       glove_feed:data_dict['glove-init']}
# t3, t4 = sess.run([doc_enc_outputs, doc_enc_final_state], feed_dict=fd)

# assert 1==0


doc_dec_cell = LSTMCell(doc_dec_emb_size)
doc_max_time, doc_batch_size, _ = tf.unstack(tf.shape(doc_final_embedded))
doc_dec_length = doc_length + 3 # +2 steps, +1 for EOS. i.e. 26 + 3 = 29

## TEST ##

# sess.run(tf.global_variables_initializer())
# doc_inputs_, _ = pad_doc(data_dict['inp-encode'][0]) # _ is doc_length, used later.
# doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][0])
# doc_length_ = [len(data_dict['inp-encode'][0])] # NB: must be [1]!!!
# fd = {doc_inputs:doc_inputs_, doc_sents_length:doc_sents_length_,
#       doc_length:doc_length_,
#       glove_feed:data_dict['glove-init']}
# t5, t6 = sess.run([doc_final_embedded, doc_max_time], feed_dict=fd)

# assert 1==0


W = tf.Variable(tf.random_uniform([doc_dec_emb_size, sent_vocab_size], -1, 1), dtype=tf.float32) # for dec only!
b = tf.Variable(tf.zeros([sent_vocab_size]), dtype=tf.float32)

eos_step_embedded = tf.ones([doc_batch_size, doc_enc_emb_size], dtype=tf.float32, name='EOS')
pad_step_embedded = tf.zeros([doc_batch_size, doc_enc_emb_size], dtype=tf.float32, name='PAD')

def loop_fn_initial():
    initial_elements_finished = (0 >= doc_dec_length) 
    initial_input = eos_step_embedded                   
    initial_cell_state = doc_enc_final_state
    initial_cell_output = None 
    initial_loop_state = None 
    return (initial_elements_finished,
            initial_input,
            initial_cell_state,
            initial_cell_output,
            initial_loop_state)

# TODO: adjust dimensions later
W1 = tf.Variable(tf.random_uniform([doc_enc_emb_size, doc_enc_emb_size], -1, 1),
                 dtype=tf.float32) 
W2 = tf.Variable(tf.random_uniform([doc_dec_emb_size, doc_enc_emb_size], -1, 1), 
                 dtype=tf.float32) 
v = tf.Variable(tf.random_uniform([doc_enc_emb_size, 1], -1, 1),
                dtype=tf.float32) 

def loop_fn_transition(time, previous_output, previous_state, previous_loop_state):
    
    def get_next_input(): 
        
        mt, bc, _ = tf.unstack(tf.shape(doc_final_embedded))
        # mt here is for #sents-in-doc!!
#         print doc_final_embedded
#         assert 1==0
#         Tensor("ExpandDims:0", shape=(?, 1, 40), dtype=float32)
        
        EW1 = tf.reshape(tf.tensordot(doc_final_embedded, W1, axes=[[2],[0]]),
                         [mt, bc, doc_enc_emb_size]) 
#         print EW1
#         assert 1==0
#         Tensor("rnn/while/cond/Reshape:0", shape=(?, ?, 40), dtype=float32)
        
        DW2 = tf.matmul(previous_state.h, W2) 
#         print DW2
#         assert 1==0
#         Tensor("rnn/while/cond/MatMul:0", shape=(?, 40), dtype=float32)
        
        EW1_add_DW2 = tf.add(EW1, DW2)
#         print EW1_add_DW2
#         assert 1==0
#         Tensor("rnn/while/cond/Add:0", shape=(?, ?, 40), dtype=float32)
        
        attention_mat = tf.reshape(tf.nn.tanh(tf.squeeze(tf.tensordot(EW1_add_DW2, v, axes=[[2],[0]]), 
                                                         axis=2)), [mt,bc])
#         print attention_mat
#         assert 1==0
#         Tensor("rnn/while/cond/Reshape_1:0", shape=(?, ?), dtype=float32) [mt,bc]
        
        attention_norm_mat = tf.nn.softmax(attention_mat, dim=0) 
#         print attention_norm_mat
#         assert 1==0
#         Tensor("rnn/while/cond/transpose_1:0", shape=(?, ?), dtype=float32) [mt,bc]
        
        
        selector = tf.one_hot(tf.argmax(attention_norm_mat, axis=0), depth=doc_max_time,
                              on_value=1.0, off_value=0.0, axis=0) 
#         print selector
#         assert 1==0
#         Tensor("rnn/while/cond/one_hot:0", shape=(?, ?), dtype=float32)
        
        inputs_embedded_selected = tf.transpose(
            tf.multiply(
                tf.transpose(doc_final_embedded, [2,0,1]), 
                selector), 
            [1,2,0]
        ) 
#         print inputs_embedded_selected
#         assert 1==0
#         Tensor("rnn/while/cond/transpose_3:0", shape=(?, ?, 40), dtype=float32)
        
        inputs_embedded_selected = tf.reduce_sum(
            tf.reshape(inputs_embedded_selected, [mt, bc, doc_enc_emb_size]), 
            axis=0 
        )   
#         print previous_output
#         print inputs_embedded_selected
#         assert 1==0
#         Tensor("rnn/while/lstm_cell/mul_2:0", shape=(?, 80), dtype=float32)
#         Tensor("rnn/while/cond/Sum:0", shape=(?, 40), dtype=float32)
        
        next_input = inputs_embedded_selected
        return next_input
    
    elements_finished = (time >= doc_dec_length)
    finished = tf.reduce_all(elements_finished) 
    inpt = tf.cond(finished, lambda: pad_step_embedded, get_next_input)
    state = previous_state
    output = previous_output
    loop_state = None
    return (elements_finished,
            inpt, 
            state,
            output,
            loop_state)        

def loop_fn(time, previous_output, previous_state, previous_loop_state):
    if previous_state is None:
        assert previous_output is None and previous_state is None
        return loop_fn_initial()
    else:
        return loop_fn_transition(time, previous_output, previous_state, previous_loop_state)

# k = tf.nn.raw_rnn(doc_dec_cell, loop_fn)
# print k
# assert 1==0
    
doc_dec_outputs_ta, doc_dec_final_state, _ = tf.nn.raw_rnn(doc_dec_cell, loop_fn)
doc_dec_outputs = doc_dec_outputs_ta.stack()
# print doc_dec_outputs
# Tensor("TensorArrayStack/TensorArrayGatherV3:0", shape=(?, ?, 80), dtype=float32)
doc_dec_max_step, doc_dec_batch_size, doc_dec_dim = tf.unstack(tf.shape(doc_dec_outputs))
doc_dec_outputs_flat = tf.reshape(doc_dec_outputs, (-1, doc_dec_dim))

# print doc_dec_outputs_flat
# print doc_dec_dim
# assert 1==0

doc_dec_logits_flat = tf.add(tf.matmul(doc_dec_outputs_flat, W), b)

# print doc_dec_logits_flat

doc_dec_logits = tf.reshape(doc_dec_logits_flat, (doc_dec_max_step, doc_dec_batch_size, sent_vocab_size))

# print doc_dec_logits
# assert 1==0

doc_dec_prediction = tf.cast(tf.argmax(doc_dec_logits, 2), dtype=tf.int32)

# print doc_dec_prediction

## TEST ##

# sess.run(tf.global_variables_initializer())
# doc_inputs_, _ = pad_doc(data_dict['inp-encode'][1]) # _ is doc_length, used later.
# doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][1])
# doc_length_ = [len(data_dict['inp-encode'][1])] # NB: must be [2]!!!
# doc_targets_ = np.array(pad_tars_order(data_dict['tar-order'][1]))[:,np.newaxis]
# fd = {doc_inputs:doc_inputs_, doc_sents_length:doc_sents_length_,
#       doc_length:doc_length_, 
#       doc_targets:doc_targets_,
#       glove_feed:data_dict['glove-init']}
# t7, t8 = sess.run([doc_dec_outputs, doc_targets], feed_dict=fd)
# print t7.shape
# print t8.shape
# print sess.run([doc_dec_max_step, doc_dec_batch_size, doc_dec_dim], feed_dict=fd)
# t7, t8 = sess.run([doc_dec_prediction, doc_targets], feed_dict=fd)
# print t7
# print t7.shape
# print t8
# print t8.shape
# assert 1==0

# Accuracy
correct_raw = tf.cast(tf.equal(doc_dec_prediction, doc_targets), tf.int32)

# print doc_dec_prediction
# print doc_targets
# print correct_raw
# assert 1==0

mask = tf.cast(tf.not_equal(doc_targets, 0), tf.int32) # 0 for PAD
total_seqlen = tf.cast(doc_length, tf.float32)
correct = tf.multiply(correct_raw, mask)
accuracy = tf.cast(tf.reduce_sum(correct)-1, tf.float32) / total_seqlen # batch_size = 1

## TEST ##

# sess.run(tf.global_variables_initializer())
# doc_inputs_, _ = pad_doc(data_dict['inp-encode'][1]) # _ is doc_length, used later.
# doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][1])
# doc_length_ = [len(data_dict['inp-encode'][1])] # NB: must be [2]!!!
# doc_targets_ = np.array(pad_tars_order(data_dict['tar-order'][1]))[:,np.newaxis]
# fd = {doc_inputs:doc_inputs_, doc_sents_length:doc_sents_length_,
#       doc_length:doc_length_, 
#       doc_targets:doc_targets_,
#       glove_feed:data_dict['glove-init']}
# t9,t10,t11 = sess.run([doc_dec_prediction, doc_targets, accuracy], feed_dict=fd)
# print t9
# print t10
# print t11

# Optimization
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(doc_targets, depth=sent_vocab_size, dtype=tf.float32),
    logits=doc_dec_logits
)
loss = tf.reduce_mean(stepwise_cross_entropy)
train_op = tf.train.AdamOptimizer().minimize(loss)

init = tf.global_variables_initializer()
sess.run(init)

In [176]:
print len(data_dict['inp-encode'])
print len(data_dict['inp-slen'])
print len(data_dict['tar-order'])

2035
2035
2035


In [None]:
loss_track = []
accuracy_track = []

num_epochs = 100
verbose = 500

sess.run(glove_feed, feed_dict={glove_feed:data_dict['glove-init']})

for e in range(num_epochs):
    print 'Epoch {}:'.format(e+1)
    print
    for i in range(len(data_dict['inp-encode'])):
        doc_inputs_, _ = pad_doc(data_dict['inp-encode'][i])
        doc_sents_length_ = pad_sents_length(data_dict['inp-slen'][i])
        doc_length_ = [len(data_dict['inp-encode'][i])] # NB: must be [2]!!!
        doc_targets_ = np.array(pad_tars_order(data_dict['tar-order'][i]))[:,np.newaxis]
        fd = {doc_inputs:doc_inputs_, 
              doc_sents_length:doc_sents_length_,
              doc_length:doc_length_, 
              doc_targets:doc_targets_}
        _, l, a = sess.run([train_op, loss, accuracy], feed_dict=fd)
        loss_track.append(l)
        accuracy_track.append(a)
        if i % verbose == 0:
            print 'Current mean loss = {} | mean accuracy = {}'.format(np.mean(loss_track),np.mean(accuracy_track))
    print

Epoch 1:

Current mean loss = 0.00481224153191 | mean accuracy = 0.857142865658
Current mean loss = 0.83429569006 | mean accuracy = 0.551808357239
Current mean loss = 0.810026943684 | mean accuracy = 0.561738312244
Current mean loss = 0.683873176575 | mean accuracy = 0.601092517376
Current mean loss = 0.686348259449 | mean accuracy = 0.597848057747

Epoch 2:

Current mean loss = 0.677813529968 | mean accuracy = 0.600679934025
Current mean loss = 0.700254976749 | mean accuracy = 0.594907283783
Current mean loss = 0.708626627922 | mean accuracy = 0.5940528512
Current mean loss = 0.665521979332 | mean accuracy = 0.607996881008
Current mean loss = 0.665780544281 | mean accuracy = 0.607267439365

Epoch 3:

Current mean loss = 0.661509215832 | mean accuracy = 0.608664155006
Current mean loss = 0.673916637897 | mean accuracy = 0.605031013489
Current mean loss = 0.679312288761 | mean accuracy = 0.604320406914
Current mean loss = 0.652481198311 | mean accuracy = 0.612894177437
Current mean loss