In [1]:
#Kuhan Wang 17-09-15

from tensorflow.python.ops import array_ops

import matplotlib.pyplot as plt
from tensorflow.python.layers.core import Dense
import numpy as np
import tensorflow as tf
import pandas as pd

import helpers
import math
import pickle
import random

def encodeSent(sent):

    if type(sent) == str: sent = sent.split(' ')
    
    return [vocab_dict[word] if word in vocab_dict else 2 for word in sent]

def decodeSent(sent):
    return [inv_map[i] for i in sent]

def prepare_batch(seqs_x, maxlen=None):
    # seqs_x: a list of sentences
    lengths_x = [len(s) for s in seqs_x]
    if maxlen is not None:
        new_seqs_x = []
        new_lengths_x = []
        for l_x, s_x in zip(lengths_x, seqs_x):
            if l_x <= maxlen:
                new_seqs_x.append(s_x)
                new_lengths_x.append(l_x)
        lengths_x = new_lengths_x
        seqs_x = new_seqs_x
        
        if len(lengths_x) < 1:
            return None, None

    batch_size = len(seqs_x)
    x_lengths = np.array(lengths_x)
    maxlen_x = np.max(x_lengths)
    x = np.ones((batch_size, maxlen_x)).astype('int32') * PAD
    for idx, s_x in enumerate(seqs_x):
        x[idx, :lengths_x[idx]] = s_x
    return x, x_lengths

def prepare_train_batch(seqs_x, seqs_y, maxlen=None):
    # seqs_x, seqs_y: a list of sentences
    lengths_x = [len(s) for s in seqs_x]
    lengths_y = [len(s) for s in seqs_y]

    if maxlen is not None:
        new_seqs_x = []
        new_seqs_y = []
        new_lengths_x = []
        new_lengths_y = []
        for l_x, s_x, l_y, s_y in zip(lengths_x, seqs_x, lengths_y, seqs_y):
            if l_x <= maxlen and l_y <= maxlen:
                new_seqs_x.append(s_x)
                new_lengths_x.append(l_x)
                new_seqs_y.append(s_y)
                new_lengths_y.append(l_y)
        lengths_x = new_lengths_x
        seqs_x = new_seqs_x
        lengths_y = new_lengths_y
        seqs_y = new_seqs_y

        if len(lengths_x) < 1 or len(lengths_y) < 1:
            return None, None, None, None

    batch_size = len(seqs_x)
    
    x_lengths = np.array(lengths_x)
    y_lengths = np.array(lengths_y)

    maxlen_x = np.max(x_lengths)
    maxlen_y = np.max(y_lengths)

    x = np.ones((batch_size, maxlen_x)).astype('int32') * PAD
    y = np.ones((batch_size, maxlen_y)).astype('int32') * PAD
    
    for idx, [s_x, s_y] in enumerate(zip(seqs_x, seqs_y)):
        x[idx, :lengths_x[idx]] = s_x
        y[idx, :lengths_y[idx]] = s_y
    return x, x_lengths, y, y_lengths

def generateRandomSeqBatchMajor(length_from, length_to, vocab_lower, vocab_upper, batch_size):
    return [
            [random.randint(vocab_lower, vocab_upper-2) for digit in range(random.randint(length_from, length_to))] + [1]
                for batch in range(batch_size)]

In [2]:
tf.__version__

'1.3.0'

In [3]:
# Extra vocabulary symbols
_GO = '_GO'
EOS = '_EOS' # also function as PAD
UNK = '_UNK'

extra_tokens = [_GO, EOS, UNK]

start_token = extra_tokens.index(_GO)	# start_token = 0
end_token = extra_tokens.index(EOS)	# end_token = 1
unk_token = extra_tokens.index(UNK)

In [4]:
#df_all = pd.read_pickle('processed_data_v01_EN-DE_py35_seq_length_5_15_sample_540659_limited_vocab.pkl')
#vocab_dict = pickle.load(open('word_dict_v01_EN-DE_py35_seq_length_5_15_sample_540659_limited_vocab.pkl', 'rb'))

#df_all = pd.read_pickle('../processed_data/processed_data_v02_enron_py35_seq_length_3_49_sample_4256_limited_vocab.pkl')
#vocab_dict = pickle.load(open('../processed_data/word_dict_v02_enron_py35_seq_length_3_49_sample_4256_limited_vocab.pkl', 'rb'))

dataset = 'twitter'

df_all = pd.read_pickle('../processed_data/processed_data_v02_twitter_py35_seq_length_3_19_sample_22028_lem.pkl')
vocab_dict = pickle.load(open('../processed_data/word_dict_v02_twitter_py35_seq_length_3_19_sample_22028_lem.pkl', 'rb'))

#Encode sequences
#df_all['alpha_Pair_1_encoding'] =  df_all['alpha_Pair_1_tokens'].apply(numericEncode)
#df_all['alpha_Pair_0_encoding'] = df_all['alpha_Pair_0_tokens'].apply(numericEncode)

df_all['alpha_Pair_1_encoding'] =  df_all['alpha_Pair_1_tokens'].apply(encodeSent)
df_all['alpha_Pair_0_encoding'] = df_all['alpha_Pair_0_tokens'].apply(encodeSent)

df_all['Index'] = df_all.index.values

df_all_train = df_all.sample(frac=0.90, random_state=0)

df_all_dev = df_all[df_all['Index'].isin(df_all_train['Index'].values) == False]

df_all_test = df_all_dev.sample(frac=0.10, random_state=0)

df_all_dev = df_all_dev[df_all_dev['Index'].isin(df_all_test['Index'].values) == False]

inv_map = {v: k for k, v in vocab_dict.items()}

In [5]:
dev_encoded_text = df_all_dev['alpha_Pair_0_encoding'].values
dev_decoded_text = df_all_dev['alpha_Pair_1_encoding'].values

In [6]:
test_encoded_text = df_all_test['alpha_Pair_0_encoding'].values
test_decoded_text = df_all_test['alpha_Pair_1_encoding'].values

In [7]:
df_all_train.shape, df_all_dev.shape, df_all_test.shape, len(vocab_dict)

((15898, 5), (1590, 5), (177, 5), 17181)

In [8]:
tf.reset_default_graph()

PAD = 0
EOS = 1
UNK = 2
vocab_size = len(vocab_dict) + 1
#vocab_size = 30
input_embedding_size = 1024

length_from = 3
length_to = 19
vocab_lower = 0
vocab_upper = vocab_size
n_batch_size = 32

batches_in_epoch = df_all_train.shape[0]/n_batch_size
#batches_in_epoch=100
n_cells = 128
num_layers = 1

n_epochs = 1000
n_beam_width = 1

batches_in_epoch

496.8125

In [9]:
copy_batches = helpers.random_sequences(length_from=length_from, length_to=length_to,
                                       vocab_lower=vocab_lower, vocab_upper=vocab_size,
                                       batch_size=n_batch_size)

In [10]:
#Create handles for encoder and decoders
encoder_inputs = tf.placeholder(
            shape=(None, None),
            dtype=tf.int32,
            name='encoder_inputs',
        )

encoder_inputs_length = tf.placeholder(
            shape=(None,),
            dtype=tf.int32,
            name='encoder_inputs_length',
        )

# required for training, not required for testing
decoder_targets = tf.placeholder(
            shape=(None, None),
            dtype=tf.int32,
            name='decoder_targets'
        )

decoder_targets_length = tf.placeholder(
            shape=(None,),
            dtype=tf.int32,
            name='decoder_targets_length',
        )

In [11]:
batch_size = tf.shape(encoder_inputs)[0]

In [12]:
#Make EOS and PAD matrices to concatenate with targets
EOS_SLICE = tf.ones([batch_size, 1], dtype=tf.int32) * EOS
PAD_SLICE = tf.ones([batch_size, 1], dtype=tf.int32) * PAD

In [13]:
#Adding EOS to the beginning of the decoder targets
decoder_train_inputs = tf.concat([EOS_SLICE, decoder_targets], axis=1, name='decoder_train_inputs_concat')
#[1,10], [10, 16]
decoder_train_length = decoder_targets_length + 1

In [14]:
decoder_train_targets = tf.concat([decoder_targets, PAD_SLICE], axis=1, name='decoder_train_targets')

In [15]:
max_decoder_length = tf.reduce_max(decoder_train_length)

In [16]:
#Create word embeddings
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)

#Randomly initialize a embedding vector for each term in the vocabulary
embedding_matrix = tf.get_variable(name="embedding_matrix", shape=[vocab_size, input_embedding_size],
                                   initializer=initializer, 
                                   dtype=tf.float32)

In [17]:
#Map each input unit to a column in the embedding matrix
encoder_inputs_embedded = tf.nn.embedding_lookup(embedding_matrix, encoder_inputs)

decoder_train_inputs_embedded = tf.nn.embedding_lookup(embedding_matrix, decoder_train_inputs)

In [18]:
#input_layer = Dense(n_cells, name='input_projection')

# Output projection layer to convert cell_outputs to logits
output_layer = Dense(vocab_size, name='output_projection')

In [19]:
#Create a bi-directional encoder, encoding the forward and backward states
#The core abstraction is in tf.nn.bidirectional_dynamic_rnn!

In [20]:
encoder_output_keep = 1
decoder_output_keep = 1

In [21]:
#encoder_cell_fw = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells), input_keep_prob=1, 
#                                             output_keep_prob=encoder_output_keep)

encoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                            output_keep_prob=encoder_output_keep)
    encoder_cell_list.append(cell)

encoder_cell =  tf.contrib.rnn.MultiRNNCell(encoder_cell_list)
#encoder_cell_fw = tf.contrib.rnn.MultiRNNCell([encoder_cell_fw for _ in range(num_layers)])

In [22]:
#encoder_cell_bw = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells), input_keep_prob=1, 
#                                             output_keep_prob=encoder_output_keep)

#encoder_cell = tf.contrib.rnn.LSTMCell(n_cells)
#encoder_cell_bw = tf.contrib.rnn.MultiRNNCell([encoder_cell_bw for _ in range(num_layers)])

In [23]:
encoder_outputs, encoder_last_state = tf.nn.dynamic_rnn(
        cell=encoder_cell, inputs=encoder_inputs_embedded,
        sequence_length=encoder_inputs_length, dtype=tf.float32,
        time_major=False)

In [24]:
#encoder_last_state_beam = tf.contrib.framework.nest.map_structure(
#        lambda s: tf.contrib.seq2seq.tile_batch(s, n_beam_width), encoder_last_state)

encoder_last_state_beam = tf.contrib.seq2seq.tile_batch(encoder_last_state, n_beam_width)

encoder_outputs_beam = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=n_beam_width)

encoder_inputs_length_beam = tf.contrib.seq2seq.tile_batch(encoder_inputs_length, multiplier=n_beam_width)

In [25]:
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
    num_units=n_cells, 
    memory=encoder_outputs, 
    memory_sequence_length=encoder_inputs_length) 

decoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                            output_keep_prob=decoder_output_keep)
    decoder_cell_list.append(cell)

decoder_cell =  tf.contrib.rnn.MultiRNNCell(decoder_cell_list)

decoder_initial_state = encoder_last_state

In [26]:
attn_input_feeding = True

In [27]:
def attn_decoder_input_fn(inputs, attention):
    if not attn_input_feeding:
        return inputs

    # Essential when use_residual=True
    _input_layer = Dense(n_cells, dtype=tf.float32,
                         name='attn_input_feeding')
    return _input_layer(array_ops.concat([inputs, attention], -1))

In [28]:
decoder_cell_list[-1] = tf.contrib.seq2seq.AttentionWrapper(
            cell=decoder_cell_list[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=n_cells,
          #  cell_input_fn=attn_decoder_input_fn,
            #initial_cell_state=encoder_last_state[-1],
            initial_cell_state=encoder_last_state[-1],                   
            alignment_history=False,
            name='Attention_Wrapper')

In [29]:
initial_state = [state for state in encoder_last_state]

In [30]:
initial_state[-1] = decoder_cell_list[-1].zero_state(batch_size=batch_size*n_beam_width, dtype=tf.float32)

In [31]:
decoder_initial_state = tuple(initial_state)

In [32]:
decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cell_list)

In [33]:
#encoder_cell.state_size, encoder_inputs_embedded, encoder_inputs_length

In [34]:
#(
# (encoder_fw_outputs, encoder_bw_outputs),
# (encoder_fw_state, encoder_bw_state)
#) = (
#     tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell, 
#                                     cell_bw=encoder_cell,
#                                     inputs=encoder_inputs_embedded,
#                                     sequence_length=encoder_inputs_length,
#                                     time_major=True, dtype=tf.float32)
#    )

In [35]:
#concatenate backward and forward outputs
#encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

In [36]:
#encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c')
#encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h')

In [37]:
#encoder_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

In [38]:
#encoder_state = []

#for i in range(num_layers):
    
#    encoder_state_c = tf.concat((encoder_fw_state[i].c, encoder_bw_state[i].c), 1, name='bidirectional_concat_c')
#    encoder_state_h = tf.concat((encoder_fw_state[i].h, encoder_bw_state[i].h), 1, name='bidirectional_concat_h')
    
#    current_encoder_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

#    encoder_state.append(current_encoder_state)

#encoder_state = tuple(encoder_state)

In [39]:
attention_states = tf.transpose(encoder_outputs, perm=[1, 0, 2])

In [40]:
# Helper to feed inputs for training: read inputs from dense ground truth vectors
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_train_inputs_embedded,
                                   sequence_length=decoder_train_length,
                                   time_major=False,
                                   name='training_helper')

training_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                   helper=training_helper,
                                   initial_state=decoder_initial_state, 
                                   output_layer=output_layer)

In [41]:
(decoder_outputs_train, decoder_last_state_train, decoder_outputs_length_train) = \
              (tf.contrib.seq2seq.dynamic_decode(
                                                decoder=training_decoder,
                                                output_time_major=False,
                                                impute_finished=True,
                                                maximum_iterations=max_decoder_length
                                                )
              )

In [42]:
# More efficient to do the projection on the batch-time-concatenated tensor
# logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
# self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
decoder_logits_train = tf.identity(decoder_outputs_train.rnn_output) 

In [43]:
# Use argmax to extract decoder symbols to emit
decoder_pred_train = tf.argmax(decoder_logits_train, axis=-1, name='decoder_pred_train')

In [44]:
start_tokens = tf.ones([batch_size*n_beam_width,], tf.int32) * EOS
end_token = EOS

In [56]:
# Helper to feed inputs for greedy decoding: uses the argmax of the output
decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(start_tokens=start_tokens,
                                                end_token=end_token,
                                                embedding=embedding_matrix)

In [58]:
# Basic decoder performs greedy decoding at each time step
inference_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                         helper=decoding_helper,
                                         initial_state=decoder_initial_state,
                                         output_layer=output_layer)

In [59]:
#inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(cell=decoder_cell,
#                                                               embedding=embedding_matrix,
#                                                               start_tokens=start_tokens,
#                                                               end_token=end_token,
#                                                               initial_state=decoder_initial_state,
#                                                               beam_width=n_beam_width,
#                                                               output_layer=output_layer,)

In [60]:
max_decode_step = tf.reduce_max(encoder_inputs_length) + 3

In [61]:
(decoder_outputs_decode, decoder_last_state_decode,
         decoder_outputs_length_decode) = (tf.contrib.seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            #impute_finished=True,	# error occurs --why?
            maximum_iterations=max_decode_step))

In [79]:
decoder_pred_decode = tf.argmax(decoder_outputs_decode.rnn_output, axis=-1, name='decoder_pred_decode')

decoder_pred_decode_prob = tf.nn.softmax(decoder_outputs_decode.rnn_output, name='decoder_pred_decode_prob')

In [None]:
#decoder_pred_decode_prob = tf.nn.softmax(decoder_outputs_decode.rnn_output, name='decoder_pred_decode_prob')
#decoder_pred_decode_prob = decoder_outputs_decode.beam_search_decoder_output.scores

In [None]:
#####################

In [65]:
# masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
masks = tf.sequence_mask(lengths=decoder_train_length, 
                         maxlen=max_decoder_length, dtype=tf.float32, name='masks')

In [66]:
# Computes per word average cross-entropy over a batch
# Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
loss = tf.contrib.seq2seq.sequence_loss(logits=decoder_logits_train, 
                                  targets=decoder_train_targets,
                                  weights=masks,
                                  average_across_timesteps=True,
                                  average_across_batch=True,)

In [67]:
entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=decoder_train_targets,
    logits=decoder_logits_train
)

In [68]:
encoded_text = df_all_train['alpha_Pair_0_encoding'].values
decoded_text = df_all_train['alpha_Pair_1_encoding'].values
text_index = df_all_train['Index'].values

input_batches = ([encoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
         decoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                 text_index[block_idx*n_batch_size:(block_idx+1)*n_batch_size]]\
            for block_idx in range(len(encoded_text)))

In [69]:
ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                                       vocab_lower=2, vocab_upper=vocab_upper,
                                       batch_size=n_batch_size)
input_batch_data = ran_seq
target_batch_data = input_batch_data

In [70]:
vocab_size

17182

In [71]:
#fd = make_train_inputs(input_batch_data, target_batch_data)
fd = prepare_train_batch([[4, 5, 7, 2, 5, 1], [4, 5, 7, 4, 5, 1]], [[4, 5, 7, 2, 30, 1],[4, 5, 7, 9, 5, 1]])
feed_dict = {encoder_inputs: fd[0],
        encoder_inputs_length: fd[1],
        decoder_targets: fd[2],
        decoder_targets_length: fd[3]}

In [72]:
init = tf.global_variables_initializer()

with tf.Session() as session:
    init.run()
    for i in range(1):
        print (i)
        #epoch_batches = next(input_batches)

        #input_batch_data = epoch_batches[0]
        #target_batch_data = epoch_batches[1]
        #batch_data_index = epoch_batches[2]
        #print ([inv_map[i] for i in input_batch_data[0]])
       #fd = prepare_train_batch(input_batch_data, target_batch_data)
        
        #feed_dict = {encoder_inputs: fd[0],
        #encoder_inputs_length: fd[1],
        #decoder_targets: fd[2],
        #decoder_targets_length: fd[3]}
        
        t = session.run([loss], feed_dict)
        #y  = session.run([encoder_outputs_original, encoder_outputs], feed_dict)
        #if t[1] !=t[1]: 
        #    print (loss)
         #   break

0


In [74]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.01
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, \
                                           n_epochs*int(batches_in_epoch), 0.0001, staircase=False)

In [75]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

train_op = optimizer.minimize(loss, global_step=global_step)

In [76]:
copy_task = False

In [None]:
train_loss_track = []
dev_loss_track = []

all_weights = []
dev_test_results = []
metric_results = []
with tf.Session() as session:
    
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_100_decode_200_vocab_13679_embedding_200_seq_5_15_batch_64_layers_3_v5-210')
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_100_decode_200_vocab_13679_embedding_200_seq_5_15_batch_64_layers_3_v5-100')
    #saver.restore(session, \
    #    'd:\coding\seq2seq_CornelMovies_encode_200_decode_400_vocab_13679_embedding_200_seq_5_15_batch_128_layers_6_v6-990')
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_500_decode_1000_vocab_13679_embedding_1024_seq_5_15_batch_32_layers_6_v6-30')
    #saver.restore(session, \
    #'d:\coding\chkpt\seq2seq_Cornell_encode_128_decode_256_vocab_13679_embedding_256_seq_5_15_batch_32_layers_3_enkeep_10_dekeep_10-203320')
    #saver.restore(session, \
    #'d:\coding\seq2seq\chkpt\seq2seq_twitter_encode_128_decode_128_vocab_29164_embedding_512_seq_3_29_batch_32_layers_3_enkeep_10_dekeep_10-8858')
    
    
    for epoch in range(n_epochs):
        
        df_all_train = df_all_train.sample(frac=1, random_state=tf.train.global_step(session, global_step))
        #tf.train.global_step(session, global_step))
       
        encoded_text = df_all_train['alpha_Pair_0_encoding'].values
        decoded_text = df_all_train['alpha_Pair_1_encoding'].values
        text_index = df_all_train['Index'].values

        input_batches = ([encoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                 decoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                         text_index[block_idx*n_batch_size:(block_idx+1)*n_batch_size]]\
                    for block_idx in range(len(encoded_text)))
        
        for batch in range(int(batches_in_epoch)):
            mean_metric_train = []
            mean_metric_dev = []

            if copy_task == False:
                
                epoch_batches = next(input_batches)
                
                #input_batch_data = next(encoding_batches)
                #target_batch_data = next(decoding_batches)
                
                input_batch_data = epoch_batches[0]
                target_batch_data = epoch_batches[1]
                batch_data_index = epoch_batches[2]

            else:
                ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                                       vocab_lower=2, vocab_upper=vocab_upper,
                                       batch_size=n_batch_size)
                input_batch_data = ran_seq
                target_batch_data = input_batch_data
            
            fd = prepare_train_batch(input_batch_data, target_batch_data)
            feed_dict = {encoder_inputs: fd[0],
                        encoder_inputs_length: fd[1],
                        decoder_targets: fd[2],
                        decoder_targets_length: fd[3]}
           
            _, l = session.run([train_op, loss], feed_dict)
            
            if batch % 50 == 0: 
                
                print ('global_step: %s' % tf.train.global_step(session, global_step))
                print ('learning rate', session.run(optimizer._lr))
                
                print ('epoch', epoch)
                print ('batch {}'.format(batch))
                print ('training minibatch loss: {}'.format(l))
                
                train_loss_track.append([tf.train.global_step(session, global_step), l])

                for i, (e_in, dt_targ, dt_pred) in enumerate(zip(feed_dict[encoder_inputs], 
                                                                 feed_dict[decoder_targets], 
                                                                 session.run(decoder_pred_train, feed_dict))):

                    print('  sample {}:'.format(i + 1))
                    #print('    Index', batch_data_index[i])
                    #print('    enc input           > {}'.format(e_in))
                    print('    enc input           > {}'.format(' '.join([inv_map[i] for i in e_in if i!=0])))

                    #print('    dec input           > {}'.format(dt_targ))
                    print('    dec input           > {}'.format(' '.join([inv_map[i] for i in dt_targ if i!=0])))

                    #print('    dec train predicted > {}'.format(dt_pred))
                    print('    dec train predicted > {}'.format(' '.join([inv_map[i] for i in dt_pred if i!=0])))
                
                    if i >= 0: break
                        
                #DEV CHECK
                df_all_dev_check = df_all_dev.sample(n=32, random_state=tf.train.global_step(session, global_step))

                dev_encoded_text = df_all_dev_check['alpha_Pair_0_encoding'].values
                dev_decoded_text = df_all_dev_check['alpha_Pair_1_encoding'].values

                fd_dev = prepare_train_batch([i for i in dev_encoded_text], [i for i in dev_decoded_text])

                feed_dict_dev = {encoder_inputs: fd_dev[0],
                                 encoder_inputs_length: fd_dev[1],
                                 decoder_targets: fd_dev[2],
                                 decoder_targets_length: fd_dev[3]}

                #fd_inf = prepare_batch([i for i in dev_encoded_text])

                feed_dict_inf = {encoder_inputs: fd_dev[0],
                                 encoder_inputs_length: fd_dev[1]}

                dev_inf_out = session.run([decoder_pred_decode, decoder_pred_decode_prob], feed_dict_inf) 
                dev_loss = session.run(loss, feed_dict_dev)
                
                dev_loss_track.append([tf.train.global_step(session, global_step), dev_loss])
                print ('dev minibatch loss: {}'.format(dev_loss))

                for i, (e_in, dt_targ, dt_pred, dt_inf, df_inf_out_prob) in enumerate(zip(feed_dict_dev[encoder_inputs], 
                                                                 feed_dict_dev[decoder_targets], 
                                                                 session.run(decoder_pred_train, feed_dict_dev),
                                                                 dev_inf_out[0], dev_inf_out[1])):

                    print('  DEV sample {}:'.format(i + 1))
                    #print('    Index', batch_data_index[i])
                    #print('    DEV enc input           > {}'.format(e_in))
                    print('    DEV enc input           > {}'.format(' '.join([inv_map[i] for i in e_in if i!=0])))

                   # print('    DEV dec input           > {}'.format(dt_targ))
                    print('    DEV dec input           > {}'.format(' '.join([inv_map[i] for i in dt_targ if i!=0])))

                    #print('    DEV dec train predicted > {}'.format(dt_pred))
                    print('    DEV dec train predicted > {}'.format(' '.join([inv_map[i] for i in dt_pred if i!=0])))
                    
                    #print('    DEV dec train infer > {}'.format(dt_inf))
                    print('    DEV dec train infer > {}'.format(' '.join([inv_map[i] for i in dt_inf if i!=0])))
                
                    if i >= 0: break

             
                 #   df_prediction_train = predictionCheck(mean_metric_train)
                 #   print (df_prediction_train['meanCheckList'].describe()['mean'])

                 #   df_prediction_dev = predictionCheck(mean_metric_dev)
                #    print (df_prediction_dev['meanCheckList'].describe()['mean'])

                 #   metric_results.append([df_prediction_train, df_prediction_dev])
                
        if epoch % 3 == 0: 
            print ('Saving session')
            #eval_dev = devCheck(dev_encoded_text, dev_decoded_text, True)
            
            #dev_test_results.append(eval_dev)
            
            #pickle.dump(dev_test_results, open('d:\coding\chkpt\dev_test_results_epoch_%d.pkl' % epoch, 'wb'))
            
            saver.save(session, \
'chkpt/seq2seq_%s_encode_%d_decode_%d_vocab_%d_embedding_%d_seq_%d_%d_batch_%d_layers_%d_enkeep_%d_dekeep_%d' % \
                (dataset, n_cells, n_cells, vocab_size, input_embedding_size, length_from, length_to, n_batch_size, num_layers,
                int(encoder_output_keep*10), int(decoder_output_keep*10)), \
                       global_step = tf.train.global_step(session, global_step))
            #saver.save(session, 'd:\coding\seq2seq\chkpt\copy_task', global_step = tf.train.global_step(session, global_step))
       # variables_names =[v.name for v in tf.trainable_variables()]
       # values = session.run(variables_names)
       # all_weights.append([values[1], values[3]])

global_step: 1
learning rate 0.00999981
epoch 0
batch 0
training minibatch loss: 9.749956130981445
  sample 1:
    enc input           > his name is ghost we shall wait and see <EOS>
    dec input           > that s the worse part how is he gunna prove that he didn t do it ? <EOS>
    dec train predicted > that he he part how he he he that that he he that that he that <EOS>
dev minibatch loss: 9.567595481872559
  DEV sample 1:
    DEV enc input           > just think in month he will be off the stage thank you lord <EOS>
    DEV dec input           > on stage on the importance of investing in africa amp inspiring entrepreneur in africa u <EOS>
    DEV dec train predicted > a a a hillary opposing some camp camp do camp
    DEV dec train infer > that that that that that that that that that that that that that that that that that that that
global_step: 51
learning rate 0.00999053
epoch 0
batch 50
training minibatch loss: 6.092986106872559
  sample 1:
    enc input           > and they ain

In [1]:
plt.plot(list(zip(*dev_loss_track))[0], list(zip(*dev_loss_track))[1], label='Dev')
plt.plot(list(zip(*loss_track))[0], list(zip(*loss_track))[1], label='Train')
plt.xlabel('Global Step')
plt.ylabel('Sequence Loss')
plt.legend(loc='best')
plt.show()

NameError: name 'plt' is not defined

In [None]:
#Create inference
mean_metric = []
chunk_size = 500
#n_chunks = int(df_all_train.shape[0]/chunk_size)
n_chunks=10
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(session, \
'd:\coding\seq2seq\chkpt\seq2seq_enron_encode_24_decode_48_vocab_10_embedding_32_seq_3_10_batch_32_layers_1_enkeep_10_dekeep_10-9639')
  #  saver.restore(session, 'd:\coding\seq2seq\chkpt\copy_task-19100')
    
    for chunk in range(n_chunks):
        if chunk>0: break
        ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                       vocab_lower=2, vocab_upper=vocab_upper,
                       batch_size=n_batch_size)
        
        #input_batch_data = ran_seq
        input_batch_data = [[3,4,5,6,7,8, 9]]
        #input_batch_data = df_all_dev['alpha_Pair_0_encoding'].values[:32]
        
        fd_inf = prepare_batch(input_batch_data)
        feed_dict_inf = {encoder_inputs: fd_inf[0],
                    encoder_inputs_length: fd_inf[1]}
        inf_out = session.run([decoder_pred_decode, decoder_pred_decode_prob], feed_dict_inf)

        #print (df_all_train.values[0][1], df_all_train.values[1][1])
        #print (feed_dict_inf)
        for i, (e_in, dt_inf) in enumerate(zip(feed_dict_inf[encoder_inputs], inf_out[0])):
            #mean_metric.append([df_all_train.values[i][0], df_all_train.values[i][1], dt_inf])
            print('    sample {}:'.format(i + 1))
            print('    enc input                > {}'.format([inv_map[k] for k in e_in]))
            print('    dec input                > {}'.format([inv_map[k] for k in df_all_dev['alpha_Pair_1_encoding'].values[i]]))
            print('    dec train inference      > {}'.format([inv_map[k] for k in dt_inf]))
            #print('    dec train inference prob > {}'.format([inf_out[1][j][i].max() for j in range((len(inf_out[1])))]))
            
            #if i>0: break
        
       # print ('Save Model')
       # builder = tf.saved_model.builder.SavedModelBuilder('d:\coding\seq2seq\model')
       # builder.add_meta_graph_and_variables(session, ['serve'])

        #builder.save()
    ops = session.graph.get_operations()

    feed_ops = [op for op in ops if op.type=='Placeholder']

    print(feed_ops)
        #if n_chunks >0: 
         #   break        