In [1]:
#Kuhan Wang 17-09-15

from tensorflow.python.ops import array_ops

import matplotlib.pyplot as plt
from tensorflow.python.layers.core import Dense
import numpy as np
import tensorflow as tf
import pandas as pd

import helpers
import math
import pickle
import random

def make_train_inputs(input_seq, target_seq):
    inputs_, inputs_length_ = helpers.batch(input_seq)
    targets_, targets_length_ = helpers.batch(target_seq)
    
    return {
        encoder_inputs: inputs_,
        encoder_inputs_length: inputs_length_,
        decoder_targets: targets_,
        decoder_targets_length: targets_length_,
    }

def make_inference_inputs(input_seq):
    inputs_, inputs_length_ = helpers.batch(input_seq)

    return {
        encoder_inputs: inputs_,
        encoder_inputs_length: inputs_length_,
    }

def encodeSent(sent):

    if type(sent) == str: sent = sent.split(' ')
    
    return [vocab_dict[word] if word in vocab_dict else 2 for word in sent]

def decodeSent(sent):
    return [inv_map[i] for i in sent]

def prepare_batch(seqs_x, maxlen=None):
    # seqs_x: a list of sentences
    lengths_x = [len(s) for s in seqs_x]
    if maxlen is not None:
        new_seqs_x = []
        new_lengths_x = []
        for l_x, s_x in zip(lengths_x, seqs_x):
            if l_x <= maxlen:
                new_seqs_x.append(s_x)
                new_lengths_x.append(l_x)
        lengths_x = new_lengths_x
        seqs_x = new_seqs_x
        
        if len(lengths_x) < 1:
            return None, None

    batch_size = len(seqs_x)
    x_lengths = np.array(lengths_x)
    maxlen_x = np.max(x_lengths)
    x = np.ones((batch_size, maxlen_x)).astype('int32') * PAD
    for idx, s_x in enumerate(seqs_x):
        x[idx, :lengths_x[idx]] = s_x
    return x, x_lengths

def prepare_train_batch(seqs_x, seqs_y, maxlen=None):
    # seqs_x, seqs_y: a list of sentences
    lengths_x = [len(s) for s in seqs_x]
    lengths_y = [len(s) for s in seqs_y]

    if maxlen is not None:
        new_seqs_x = []
        new_seqs_y = []
        new_lengths_x = []
        new_lengths_y = []
        for l_x, s_x, l_y, s_y in zip(lengths_x, seqs_x, lengths_y, seqs_y):
            if l_x <= maxlen and l_y <= maxlen:
                new_seqs_x.append(s_x)
                new_lengths_x.append(l_x)
                new_seqs_y.append(s_y)
                new_lengths_y.append(l_y)
        lengths_x = new_lengths_x
        seqs_x = new_seqs_x
        lengths_y = new_lengths_y
        seqs_y = new_seqs_y

        if len(lengths_x) < 1 or len(lengths_y) < 1:
            return None, None, None, None

    batch_size = len(seqs_x)
    
    x_lengths = np.array(lengths_x)
    y_lengths = np.array(lengths_y)

    maxlen_x = np.max(x_lengths)
    maxlen_y = np.max(y_lengths)

    x = np.ones((batch_size, maxlen_x)).astype('int32') * PAD
    y = np.ones((batch_size, maxlen_y)).astype('int32') * PAD
    
    for idx, [s_x, s_y] in enumerate(zip(seqs_x, seqs_y)):
        x[idx, :lengths_x[idx]] = s_x
        y[idx, :lengths_y[idx]] = s_y
    return x, x_lengths, y, y_lengths

def generateRandomSeqBatchMajor(length_from, length_to, vocab_lower, vocab_upper, batch_size):
    return [
            [random.randint(vocab_lower, vocab_upper-2) for digit in range(random.randint(length_from, length_to))] + [1]
                for batch in range(batch_size)]

In [2]:
tf.__version__

'1.3.0'

In [3]:
# Extra vocabulary symbols
_GO = '_GO'
EOS = '_EOS' # also function as PAD
UNK = '_UNK'

extra_tokens = [_GO, EOS, UNK]

start_token = extra_tokens.index(_GO)	# start_token = 0
end_token = extra_tokens.index(EOS)	# end_token = 1
unk_token = extra_tokens.index(UNK)

In [4]:
#df_all = pd.read_pickle('processed_data_v01_EN-DE_py35_seq_length_5_15_sample_540659_limited_vocab.pkl')
#vocab_dict = pickle.load(open('word_dict_v01_EN-DE_py35_seq_length_5_15_sample_540659_limited_vocab.pkl', 'rb'))

#df_all = pd.read_pickle('../processed_data/processed_data_v02_enron_py35_seq_length_3_49_sample_4256_limited_vocab.pkl')
#vocab_dict = pickle.load(open('../processed_data/word_dict_v02_enron_py35_seq_length_3_49_sample_4256_limited_vocab.pkl', 'rb'))

dataset = 'twitter'

df_all = pd.read_pickle('../processed_data/processed_data_v02_twitter_py35_seq_length_3_19_sample_22028_lem.pkl')
vocab_dict = pickle.load(open('../processed_data/word_dict_v02_twitter_py35_seq_length_3_19_sample_22028_lem.pkl', 'rb'))

#Encode sequences
#df_all['alpha_Pair_1_encoding'] =  df_all['alpha_Pair_1_tokens'].apply(numericEncode)
#df_all['alpha_Pair_0_encoding'] = df_all['alpha_Pair_0_tokens'].apply(numericEncode)

df_all['alpha_Pair_1_encoding'] =  df_all['alpha_Pair_1_tokens'].apply(encodeSent)
df_all['alpha_Pair_0_encoding'] = df_all['alpha_Pair_0_tokens'].apply(encodeSent)

df_all['Index'] = df_all.index.values

df_all_train = df_all.sample(frac=0.90, random_state=0)

df_all_dev = df_all[df_all['Index'].isin(df_all_train['Index'].values) == False]

df_all_test = df_all_dev.sample(frac=0.10, random_state=0)

df_all_dev = df_all_dev[df_all_dev['Index'].isin(df_all_test['Index'].values) == False]

inv_map = {v: k for k, v in vocab_dict.items()}

In [5]:
dev_encoded_text = df_all_dev['alpha_Pair_0_encoding'].values
dev_decoded_text = df_all_dev['alpha_Pair_1_encoding'].values

In [6]:
test_encoded_text = df_all_test['alpha_Pair_0_encoding'].values
test_decoded_text = df_all_test['alpha_Pair_1_encoding'].values

In [7]:
df_all_train.shape, df_all_dev.shape, df_all_test.shape, len(vocab_dict)

((15898, 5), (1590, 5), (177, 5), 17181)

In [8]:
tf.reset_default_graph()

PAD = 0
EOS = 1
UNK = 2
vocab_size = len(vocab_dict) + 1
#vocab_size = 30
input_embedding_size = 512

length_from = 3
length_to = 19
vocab_lower = 0
vocab_upper = vocab_size
n_batch_size = 32

batches_in_epoch = df_all_train.shape[0]/n_batch_size
#batches_in_epoch=100
n_cells = 128
num_layers = 2

n_epochs = 1000
n_beam_width = 1

encoder_output_keep = 1
decoder_output_keep = 1

batches_in_epoch

496.8125

In [9]:
copy_batches = helpers.random_sequences(length_from=length_from, length_to=length_to,
                                       vocab_lower=vocab_lower, vocab_upper=vocab_size,
                                       batch_size=n_batch_size)

In [10]:
#Create handles for encoder and decoders
encoder_inputs = tf.placeholder(
            shape=(None, None),
            dtype=tf.int32,
            name='encoder_inputs',
        )

encoder_inputs_length = tf.placeholder(
            shape=(None,),
            dtype=tf.int32,
            name='encoder_inputs_length',
        )

# required for training, not required for testing
decoder_targets = tf.placeholder(
            shape=(None, None),
            dtype=tf.int32,
            name='decoder_targets'
        )

decoder_targets_length = tf.placeholder(
            shape=(None,),
            dtype=tf.int32,
            name='decoder_targets_length',
        )

In [11]:
batch_size = tf.shape(encoder_inputs)[0]

#Make EOS and PAD matrices to concatenate with targets
EOS_SLICE = tf.ones([batch_size, 1], dtype=tf.int32) * EOS
PAD_SLICE = tf.ones([batch_size, 1], dtype=tf.int32) * PAD

#Adding EOS to the beginning of the decoder targets
decoder_train_inputs = tf.concat([EOS_SLICE, decoder_targets], axis=1, name='decoder_train_inputs_concat')
#[1,10], [10, 16]
decoder_train_length = decoder_targets_length + 1

decoder_train_targets = tf.concat([decoder_targets, PAD_SLICE], axis=1, name='decoder_train_targets')

max_decoder_length = tf.reduce_max(decoder_train_length)

#Create word embeddings
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)

#Randomly initialize a embedding vector for each term in the vocabulary
embedding_matrix = tf.get_variable(name="embedding_matrix", shape=[vocab_size, input_embedding_size],
                                   initializer=initializer, 
                                   dtype=tf.float32)

#Map each input unit to a column in the embedding matrix
encoder_inputs_embedded = tf.nn.embedding_lookup(embedding_matrix, encoder_inputs)

decoder_train_inputs_embedded = tf.nn.embedding_lookup(embedding_matrix, decoder_train_inputs)

In [12]:
#input_layer = Dense(n_cells, name='input_projection')

# Output projection layer to convert cell_outputs to logits
output_layer = Dense(vocab_size, name='output_projection')

In [13]:
#Create a bi-directional encoder, encoding the forward and backward states
#The core abstraction is in tf.nn.bidirectional_dynamic_rnn!

In [14]:
encoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                            output_keep_prob=encoder_output_keep)
    encoder_cell_list.append(cell)

encoder_cell =  tf.contrib.rnn.MultiRNNCell(encoder_cell_list)

encoder_outputs, encoder_last_state = tf.nn.dynamic_rnn(
        cell=encoder_cell, inputs=encoder_inputs_embedded,
        sequence_length=encoder_inputs_length, dtype=tf.float32,
        time_major=False)

In [15]:
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
    num_units=n_cells, 
    memory=encoder_outputs, 
    memory_sequence_length=encoder_inputs_length) 

decoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                            output_keep_prob=decoder_output_keep)
    decoder_cell_list.append(cell)

#decoder_initial_state = encoder_last_state

decoder_cell_list[-1] = tf.contrib.seq2seq.AttentionWrapper(
            cell=decoder_cell_list[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=n_cells,
          #  cell_input_fn=attn_decoder_input_fn,
            #initial_cell_state=encoder_last_state[-1],
            initial_cell_state=encoder_last_state[-1],                   
            alignment_history=False,
            name='Attention_Wrapper')

initial_state = [state for state in encoder_last_state]

initial_state[-1] = decoder_cell_list[-1].zero_state(batch_size=batch_size*n_beam_width, dtype=tf.float32)

decoder_initial_state = tuple(initial_state)

decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cell_list)

#encoder_cell.state_size, encoder_inputs_embedded, encoder_inputs_length

# Helper to feed inputs for training: read inputs from dense ground truth vectors
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_train_inputs_embedded,
                                   sequence_length=decoder_train_length,
                                   time_major=False,
                                   name='training_helper')

training_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                   helper=training_helper,
                                   initial_state=decoder_initial_state, 
                                   output_layer=output_layer)

In [15]:
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                                    num_units=n_cells, 
                                    memory=encoder_outputs, 
                                    memory_sequence_length=encoder_inputs_length)

decoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                                    output_keep_prob=decoder_output_keep)
    decoder_cell_list.append(cell)

#decoder_initial_state = encoder_last_state

#Last layer of decoders is wrapped in attention
decoder_cell_list[-1] = tf.contrib.seq2seq.AttentionWrapper(
                                 cell=decoder_cell_list[-1],
                                 attention_mechanism=attention_mechanism,
                                 attention_layer_size=n_cells,
                                 #  cell_input_fn=attn_decoder_input_fn,
                                 initial_cell_state=encoder_last_state[-1],                   
                                 alignment_history=False,
                                 name='Attention_Wrapper')

initial_state = [state for state in encoder_last_state]

initial_state[-1] = decoder_cell_list[-1].zero_state(batch_size=batch_size*n_beam_width, dtype=tf.float32)

decoder_initial_state = tuple(initial_state)

decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cell_list)

training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_train_inputs_embedded,
                           sequence_length=decoder_train_length,
                           time_major=False,
                           name='training_helper')

training_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                                   helper=training_helper,
                                                   initial_state=decoder_initial_state, 
                                                   output_layer=output_layer)

In [16]:
(decoder_outputs_train, decoder_last_state_train, decoder_outputs_length_train) = \
      (tf.contrib.seq2seq.dynamic_decode(
                                        decoder=training_decoder,
                                        output_time_major=False,
                                        impute_finished=True,
                                        maximum_iterations=max_decoder_length
                                        )
      )

In [16]:
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
    num_units=n_cells, 
    memory=encoder_outputs, 
    memory_sequence_length=encoder_inputs_length) 

decoder_cell_list = []

for layer in range(num_layers):
    cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(n_cells, state_is_tuple=True), input_keep_prob=1, 
                                            output_keep_prob=decoder_output_keep)
    decoder_cell_list.append(cell)

decoder_initial_state = encoder_last_state

In [17]:
attn_input_feeding = True

In [18]:
def attn_decoder_input_fn(inputs, attention):
    if not attn_input_feeding:
        return inputs

    # Essential when use_residual=True
    _input_layer = Dense(n_cells, dtype=tf.float32,
                         name='attn_input_feeding')
    return _input_layer(array_ops.concat([inputs, attention], -1))

In [19]:
decoder_cell_list[-1] = tf.contrib.seq2seq.AttentionWrapper(
            cell=decoder_cell_list[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=n_cells,
          #  cell_input_fn=attn_decoder_input_fn,
            #initial_cell_state=encoder_last_state[-1],
            initial_cell_state=encoder_last_state[-1],                   
            alignment_history=False,
            name='Attention_Wrapper')

In [20]:
initial_state = [state for state in encoder_last_state]

initial_state[-1] = decoder_cell_list[-1].zero_state(batch_size=batch_size*n_beam_width, dtype=tf.float32)

decoder_initial_state = tuple(initial_state)

decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cell_list)

In [24]:
#encoder_cell.state_size, encoder_inputs_embedded, encoder_inputs_length

In [26]:
# Helper to feed inputs for training: read inputs from dense ground truth vectors
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_train_inputs_embedded,
                                   sequence_length=decoder_train_length,
                                   time_major=False,
                                   name='training_helper')

training_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                   helper=training_helper,
                                   initial_state=decoder_initial_state, 
                                   output_layer=output_layer)

In [27]:
(decoder_outputs_train, decoder_last_state_train, decoder_outputs_length_train) = \
              (tf.contrib.seq2seq.dynamic_decode(
                                                decoder=training_decoder,
                                                output_time_major=False,
                                                impute_finished=True,
                                                maximum_iterations=max_decoder_length
                                                )
              )

In [28]:
# More efficient to do the projection on the batch-time-concatenated tensor
# logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
# self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
decoder_logits_train = tf.identity(decoder_outputs_train.rnn_output) 

# Use argmax to extract decoder symbols to emit
decoder_pred_train = tf.argmax(decoder_logits_train, axis=-1, name='decoder_pred_train')

In [30]:
start_tokens = tf.ones([batch_size*n_beam_width,], tf.int32) * EOS
end_token = EOS

In [31]:
# Helper to feed inputs for greedy decoding: uses the argmax of the output
decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(start_tokens=start_tokens,
                                                end_token=end_token,
                                                embedding=embedding_matrix)

In [32]:
# Basic decoder performs greedy decoding at each time step
inference_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                         helper=decoding_helper,
                                         initial_state=decoder_initial_state,
                                         output_layer=output_layer)

In [33]:
#inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(cell=decoder_cell,
#                                                               embedding=embedding_matrix,
#                                                               start_tokens=start_tokens,
#                                                               end_token=end_token,
#                                                               initial_state=decoder_initial_state,
#                                                               beam_width=n_beam_width,
#                                                               output_layer=output_layer,)

In [34]:
max_decode_step = tf.reduce_max(encoder_inputs_length) + 3

In [35]:
(decoder_outputs_decode, decoder_last_state_decode,
         decoder_outputs_length_decode) = (tf.contrib.seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            #impute_finished=True,	# error occurs --why?
            maximum_iterations=max_decode_step))

In [36]:
decoder_pred_decode = tf.argmax(decoder_outputs_decode.rnn_output, axis=-1, name='decoder_pred_decode')

decoder_pred_decode_prob = tf.nn.softmax(decoder_outputs_decode.rnn_output, name='decoder_pred_decode_prob')

In [37]:
#decoder_pred_decode_prob = tf.nn.softmax(decoder_outputs_decode.rnn_output, name='decoder_pred_decode_prob')
#decoder_pred_decode_prob = decoder_outputs_decode.beam_search_decoder_output.scores

In [38]:
#####################

In [39]:
# masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
masks = tf.sequence_mask(lengths=decoder_train_length, 
                         maxlen=max_decoder_length, dtype=tf.float32, name='masks')

In [40]:
# Computes per word average cross-entropy over a batch
# Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
loss = tf.contrib.seq2seq.sequence_loss(logits=decoder_logits_train, 
                                  targets=decoder_train_targets,
                                  weights=masks,
                                  average_across_timesteps=True,
                                  average_across_batch=True,)

In [41]:
entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=decoder_train_targets,
    logits=decoder_logits_train
)

In [42]:
encoded_text = df_all_train['alpha_Pair_0_encoding'].values
decoded_text = df_all_train['alpha_Pair_1_encoding'].values
text_index = df_all_train['Index'].values

input_batches = ([encoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
         decoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                 text_index[block_idx*n_batch_size:(block_idx+1)*n_batch_size]]\
            for block_idx in range(len(encoded_text)))

In [43]:
ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                                       vocab_lower=2, vocab_upper=vocab_upper,
                                       batch_size=n_batch_size)
input_batch_data = ran_seq
target_batch_data = input_batch_data

In [44]:
vocab_size

17182

In [45]:
#fd = make_train_inputs(input_batch_data, target_batch_data)
fd = prepare_train_batch([[4, 5, 7, 2, 5, 1], [4, 5, 7, 4, 5, 1]], [[4, 5, 7, 2, 30, 1],[4, 5, 7, 9, 5, 1]])
feed_dict = {encoder_inputs: fd[0],
        encoder_inputs_length: fd[1],
        decoder_targets: fd[2],
        decoder_targets_length: fd[3]}

In [46]:
init = tf.global_variables_initializer()

with tf.Session() as session:
    init.run()
    for i in range(1):
        print (i)
        #epoch_batches = next(input_batches)

        #input_batch_data = epoch_batches[0]
        #target_batch_data = epoch_batches[1]
        #batch_data_index = epoch_batches[2]
        #print ([inv_map[i] for i in input_batch_data[0]])
       #fd = prepare_train_batch(input_batch_data, target_batch_data)
        
        #feed_dict = {encoder_inputs: fd[0],
        #encoder_inputs_length: fd[1],
        #decoder_targets: fd[2],
        #decoder_targets_length: fd[3]}
        
        t = session.run([loss], feed_dict)
        #y  = session.run([encoder_outputs_original, encoder_outputs], feed_dict)
        #if t[1] !=t[1]: 
        #    print (loss)
         #   break

0


In [47]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, \
                                           n_epochs*int(batches_in_epoch), 0.0001, staircase=False)

In [48]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

train_op = optimizer.minimize(loss, global_step=global_step)

In [49]:
copy_task = False

In [None]:
train_loss_track = []
dev_loss_track = []

all_weights = []
dev_test_results = []
metric_results = []
with tf.Session() as session:
    
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_100_decode_200_vocab_13679_embedding_200_seq_5_15_batch_64_layers_3_v5-210')
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_100_decode_200_vocab_13679_embedding_200_seq_5_15_batch_64_layers_3_v5-100')
    #saver.restore(session, \
    #    'd:\coding\seq2seq_CornelMovies_encode_200_decode_400_vocab_13679_embedding_200_seq_5_15_batch_128_layers_6_v6-990')
    #saver.restore(session, \
    #    './seq2seq_CornelMovies_encode_500_decode_1000_vocab_13679_embedding_1024_seq_5_15_batch_32_layers_6_v6-30')
    #saver.restore(session, \
    #'d:\coding\chkpt\seq2seq_Cornell_encode_128_decode_256_vocab_13679_embedding_256_seq_5_15_batch_32_layers_3_enkeep_10_dekeep_10-203320')
    #saver.restore(session, \
    #'d:\coding\seq2seq\chkpt\seq2seq_twitter_encode_128_decode_128_vocab_17182_embedding_1024_seq_3_19_batch_32_layers_1_enkeep_10_dekeep_10-16864')
    
    
    for epoch in range(n_epochs):
        
        df_all_train = df_all_train.sample(frac=1, random_state=tf.train.global_step(session, global_step))
        #tf.train.global_step(session, global_step))
       
        encoded_text = df_all_train['alpha_Pair_0_encoding'].values
        decoded_text = df_all_train['alpha_Pair_1_encoding'].values
        text_index = df_all_train['Index'].values

        input_batches = ([encoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                 decoded_text[block_idx*n_batch_size:(block_idx+1)*n_batch_size], 
                         text_index[block_idx*n_batch_size:(block_idx+1)*n_batch_size]]\
                    for block_idx in range(len(encoded_text)))
        
        for batch in range(int(batches_in_epoch)):
            mean_metric_train = []
            mean_metric_dev = []

            if copy_task == False:
                
                epoch_batches = next(input_batches)
                
                #input_batch_data = next(encoding_batches)
                #target_batch_data = next(decoding_batches)
                
                input_batch_data = epoch_batches[0]
                target_batch_data = epoch_batches[1]
                batch_data_index = epoch_batches[2]

            else:
                ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                                       vocab_lower=2, vocab_upper=vocab_upper,
                                       batch_size=n_batch_size)
                input_batch_data = ran_seq
                target_batch_data = input_batch_data
            
            #fd = make_train_inputs(input_batch_data, target_batch_data)
            fd = prepare_train_batch(input_batch_data, target_batch_data)
            feed_dict = {encoder_inputs: fd[0],
                        encoder_inputs_length: fd[1],
                        decoder_targets: fd[2],
                        decoder_targets_length: fd[3]}
           
            _, l = session.run([train_op, loss], feed_dict)
            
            if batch % 50 == 0: 
                
                print ('global_step: %s' % tf.train.global_step(session, global_step))
                print ('learning rate', session.run(optimizer._lr))
                
                print ('epoch', epoch)
                print ('batch {}'.format(batch))
                print ('training minibatch loss: {}'.format(l))
                
                train_loss_track.append([tf.train.global_step(session, global_step), l])

                for i, (e_in, dt_targ, dt_pred) in enumerate(zip(feed_dict[encoder_inputs], 
                                                                 feed_dict[decoder_targets], 
                                                                 session.run(decoder_pred_train, feed_dict))):

                    print('  sample {}:'.format(i + 1))
                    #print('    Index', batch_data_index[i])
                    #print('    enc input           > {}'.format(e_in))
                    print('    enc input           > {}'.format(' '.join([inv_map[i] for i in e_in if i!=0])))

                    #print('    dec input           > {}'.format(dt_targ))
                    print('    dec input           > {}'.format(' '.join([inv_map[i] for i in dt_targ if i!=0])))

                    #print('    dec train predicted > {}'.format(dt_pred))
                    print('    dec train predicted > {}'.format(' '.join([inv_map[i] for i in dt_pred if i!=0])))
                
                    if i >= 0: break
                        
                #DEV CHECK
                df_all_dev_check = df_all_dev.sample(n=32, random_state=tf.train.global_step(session, global_step))

                dev_encoded_text = df_all_dev_check['alpha_Pair_0_encoding'].values
                dev_decoded_text = df_all_dev_check['alpha_Pair_1_encoding'].values

                fd_dev = prepare_train_batch([i for i in dev_encoded_text], [i for i in dev_decoded_text])

                feed_dict_dev = {encoder_inputs: fd_dev[0],
                                 encoder_inputs_length: fd_dev[1],
                                 decoder_targets: fd_dev[2],
                                 decoder_targets_length: fd_dev[3]}

                #fd_inf = prepare_batch([i for i in dev_encoded_text])

                feed_dict_inf = {encoder_inputs: fd_dev[0],
                                 encoder_inputs_length: fd_dev[1]}

                dev_inf_out = session.run([decoder_pred_decode, decoder_pred_decode_prob], feed_dict_inf) 
                dev_loss = session.run(loss, feed_dict_dev)
                
                dev_loss_track.append([tf.train.global_step(session, global_step), dev_loss])
                print ('dev minibatch loss: {}'.format(dev_loss))

                for i, (e_in, dt_targ, dt_pred, dt_inf, df_inf_out_prob) in enumerate(zip(feed_dict_dev[encoder_inputs], 
                                                                 feed_dict_dev[decoder_targets], 
                                                                 session.run(decoder_pred_train, feed_dict_dev),
                                                                 dev_inf_out[0], dev_inf_out[1])):

                    print('  DEV sample {}:'.format(i + 1))
                    #print('    Index', batch_data_index[i])
                    #print('    DEV enc input           > {}'.format(e_in))
                    print('    DEV enc input           > {}'.format(' '.join([inv_map[i] for i in e_in if i!=0])))

                   # print('    DEV dec input           > {}'.format(dt_targ))
                    print('    DEV dec input           > {}'.format(' '.join([inv_map[i] for i in dt_targ if i!=0])))

                    #print('    DEV dec train predicted > {}'.format(dt_pred))
                    print('    DEV dec train predicted > {}'.format(' '.join([inv_map[i] for i in dt_pred if i!=0])))
                    
                    #print('    DEV dec train infer > {}'.format(dt_inf))
                    print('    DEV dec train infer > {}'.format(' '.join([inv_map[i] for i in dt_inf if i!=0])))
                
                    if i >= 0: break

             
                 #   df_prediction_train = predictionCheck(mean_metric_train)
                 #   print (df_prediction_train['meanCheckList'].describe()['mean'])

                 #   df_prediction_dev = predictionCheck(mean_metric_dev)
                #    print (df_prediction_dev['meanCheckList'].describe()['mean'])

                 #   metric_results.append([df_prediction_train, df_prediction_dev])
                
        if epoch % 3 == 0: 
            print ('Saving session')
            #eval_dev = devCheck(dev_encoded_text, dev_decoded_text, True)
            
            #dev_test_results.append(eval_dev)
            
            #pickle.dump(dev_test_results, open('d:\coding\chkpt\dev_test_results_epoch_%d.pkl' % epoch, 'wb'))
            
            saver.save(session, \
'chkpt/seq2seq_%s_encode_%d_decode_%d_vocab_%d_embedding_%d_seq_%d_%d_batch_%d_layers_%d_enkeep_%d_dekeep_%d' % \
                (dataset, n_cells, n_cells, vocab_size, input_embedding_size, length_from, length_to, n_batch_size, num_layers,
                int(encoder_output_keep*10), int(decoder_output_keep*10)), \
                       global_step = tf.train.global_step(session, global_step))
            #saver.save(session, 'd:\coding\seq2seq\chkpt\copy_task', global_step = tf.train.global_step(session, global_step))
       # variables_names =[v.name for v in tf.trainable_variables()]
       # values = session.run(variables_names)
       # all_weights.append([values[1], values[3]])

global_step: 1
learning rate 0.000999982
epoch 0
batch 0
training minibatch loss: 9.750731468200684
  sample 1:
    enc input           > i think they gon na go straight to the it s the th anniversary of the iphone <EOS>
    dec input           > i m not sure if i want the or to wait for s <EOS>
    dec train predicted > 
dev minibatch loss: 9.74228286743164
  DEV sample 1:
    DEV enc input           > just think in month he will be off the stage thank you lord <EOS>
    DEV dec input           > on stage on the importance of investing in africa amp inspiring entrepreneur in africa u <EOS>
    DEV dec train predicted > plant plant is desean is advice advice advice alright dutch shimmering shimmering tinder newsworthy tinder quesadilla
    DEV dec train infer > plant plant plant a capitalist capitalist capitalist capitalist alright alright socal socal praised tinder tinder tinder audi audi audi establishes
global_step: 51
learning rate 0.000999053
epoch 0
batch 50
training minibatch lo

dev minibatch loss: 5.753232479095459
  DEV sample 1:
    DEV enc input           > no fool you are not that lit <EOS>
    DEV dec input           > my birthday is the day project x come out coincidence ? ? <EOS>
    DEV dec train predicted > i the to the the <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
    DEV dec train infer > i m the the the the the the <EOS>
global_step: 597
learning rate 0.000988975
epoch 1
batch 100
training minibatch loss: 5.988624572753906
  sample 1:
    enc input           > we are doomed then ugh goodnight <EOS>
    dec input           > total resistance came from the people the offered no resistance and the court went along <EOS>
    dec train predicted > i the <EOS> <EOS> the the <EOS> the <EOS> <EOS> <EOS> the the <EOS> <EOS> <EOS>
dev minibatch loss: 5.713786602020264
  DEV sample 1:
    DEV enc input           > thanks for the rt <EOS>
    DEV dec input           > rt reach for the sky <EOS>
    DEV dec train predicted > i is the the the <EOS>
    DE

global_step: 1193
learning rate 0.000978091
epoch 2
batch 200
training minibatch loss: 5.159919738769531
  sample 1:
    enc input           > not when ur taking a dump lol <EOS>
    dec input           > someone s always watching everything you do <EOS>
    dec train predicted > i is a be the <EOS> re the
dev minibatch loss: 5.705637454986572
  DEV sample 1:
    DEV enc input           > i would like to know where my country is going still thank you <EOS>
    DEV dec input           > why do nothing but bitch about the candidate when you can t even vote ? <EOS>
    DEV dec train predicted > i is you to i <EOS> the time <EOS> you re have be be <EOS> <EOS>
    DEV dec train infer > i m a good day <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 1243
learning rate 0.000977183
epoch 2
batch 250
training minibatch loss: 5.298300743103027
  sample 1:
    enc input           > how do you create that illusion ? lol <EOS>
    dec input           > if you don t know how to twerk you have 

dev minibatch loss: 6.0095930099487305
  DEV sample 1:
    DEV enc input           > how adorable have a wonderful day <EOS>
    DEV dec input           > seeing mommy properly for the first time beautiful <EOS>
    DEV dec train predicted > i you for <EOS> the great day <EOS> <EOS>
    DEV dec train infer > i m just just just to be a great day <EOS> <EOS> <EOS> <EOS>
global_step: 1739
learning rate 0.000968224
epoch 3
batch 250
training minibatch loss: 5.198957920074463
  sample 1:
    enc input           > they also got divorced when i wa but you can t prove those thing are directly linked <EOS>
    dec input           > who even are you guy ? <EOS>
    dec train predicted > i s like it a <EOS> <EOS>
dev minibatch loss: 5.628173351287842
  DEV sample 1:
    DEV enc input           > why would you talk trash on your gfs school lol bye <EOS>
    DEV dec input           > supporting my girl on jv dawgggg screw the varsity <EOS>
    DEV dec train predicted > i the favorite <EOS> the <EOS

global_step: 2285
learning rate 0.000958457
epoch 4
batch 300
training minibatch loss: 4.9669880867004395
  sample 1:
    enc input           > yes it is haha thank you <EOS>
    dec input           > you are so beautiful is this you ? xd <EOS>
    dec train predicted > i re a much <EOS> it <EOS> re <EOS> <EOS>
dev minibatch loss: 5.623543739318848
  DEV sample 1:
    DEV enc input           > like every day <EOS>
    DEV dec input           > how often are you in manhattan ? <EOS>
    DEV dec train predicted > i is i you <EOS> the <EOS> <EOS>
    DEV dec train infer > i m just going to be a bit <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 2335
learning rate 0.000957567
epoch 4
batch 350
training minibatch loss: 4.903368949890137
  sample 1:
    enc input           > i originally read that a bug catching <EOS>
    dec input           > friday night is for bug writing <EOS>
    dec train predicted > i of is a a the <EOS>
dev minibatch loss: 5.651825904846191
  DEV sample 1:
  

dev minibatch loss: 6.1120285987854
  DEV sample 1:
    DEV enc input           > wait both of these should be not <EOS>
    DEV dec input           > then the bridge is maybe <EOS>
    DEV dec train predicted > the you best is getting you
    DEV dec train infer > the new headphone is a great time for a great time of <EOS> <EOS> <EOS>
global_step: 2881
learning rate 0.000947908
epoch 5
batch 400
training minibatch loss: 4.413819789886475
  sample 1:
    enc input           > bad idea wouldn t recommend <EOS>
    dec input           > maybe i should light myself on fire too <EOS>
    dec train predicted > this the m be the to the <EOS> <EOS>
dev minibatch loss: 5.825194835662842
  DEV sample 1:
    DEV enc input           > you spelled shitbag of humanity wrong <EOS>
    DEV dec input           > lt oh look another ignorant racist let s have twitter follower block him <EOS>
    DEV dec train predicted > the the the a the a in s be a own have <EOS> <EOS>
    DEV dec train infer > the la

dev minibatch loss: 6.365240573883057
  DEV sample 1:
    DEV enc input           > very important info to figure out very confused amp a speechless over here cc <EOS>
    DEV dec input           > i m with some gentleman who need an education in <EOS>
    DEV dec train predicted > is think not the problem conference s to apology via the
    DEV dec train infer > is you been tired and trump is a lot of rapist to wx <EOS> <EOS> <EOS>
global_step: 3427
learning rate 0.000938346
epoch 6
batch 450
training minibatch loss: 4.123628616333008
  sample 1:
    enc input           > the public see right through the establishment fluff after year of keeping u down stop it <EOS>
    dec input           > thanks for watching <EOS>
    dec train predicted > the for the <EOS>
dev minibatch loss: 5.823311805725098
  DEV sample 1:
    DEV enc input           > old black english teacher hello ? vampire student fuck me daddy <EOS>
    DEV dec input           > what s up do you have game on your phone <EO

dev minibatch loss: 6.294200420379639
  DEV sample 1:
    DEV enc input           > yea that wa rough <EOS>
    DEV dec input           > oh god help you i hated that movie <EOS>
    DEV dec train predicted > i i i you have had the <EOS> <EOS>
    DEV dec train infer > i m not a lot of a year of a bit of a of job <EOS> <EOS>
global_step: 3969
learning rate 0.000928949
epoch 8
batch 0
training minibatch loss: 3.4951188564300537
  sample 1:
    enc input           > that s hideous <EOS>
    dec input           > if you have a weak stomach look away <EOS>
    dec train predicted > i you re a good thing like in <EOS>
dev minibatch loss: 6.508508205413818
  DEV sample 1:
    DEV enc input           > dude seriously read his debate he should not be your favorite writer <EOS>
    DEV dec input           > funny how my fav author get into quarrel with the other thinker i like such a pinker and <EOS>
    DEV dec train predicted > just the the campaign opinion <EOS> the the and the debate <EOS> 

global_step: 4515
learning rate 0.000919578
epoch 9
batch 50
training minibatch loss: 2.8853261470794678
  sample 1:
    enc input           > repeal and replace with what sir ? <EOS>
    dec input           > but you definitely should not have to pay this tax after your co op fails <EOS>
    dec train predicted > how the re will have have to tax the and ? the tv set fails <EOS>
dev minibatch loss: 7.230550289154053
  DEV sample 1:
    DEV enc input           > that s le than an iphone dongle <EOS>
    DEV dec input           > an issue these day ? ? ? <EOS>
    DEV dec train predicted > you iphone ? ? <EOS> <EOS> <EOS> <EOS>
    DEV dec train infer > you re a sadness <EOS> ? <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 4565
learning rate 0.000918725
epoch 9
batch 100
training minibatch loss: 2.9936718940734863
  sample 1:
    enc input           > omg i did that friday they wont wash out <EOS>
    dec input           > i spilled popper on my fitted sheet so fuck my sleep ton

global_step: 5061
learning rate 0.000910302
epoch 10
batch 100
training minibatch loss: 2.726698637008667
  sample 1:
    enc input           > receipt on that ? <EOS>
    dec input           > sound just like hillary and her email while secretary of state <EOS>
    dec train predicted > just a seen the and clinton debate secretary secretary of muslim ?
dev minibatch loss: 7.703882217407227
  DEV sample 1:
    DEV enc input           > before the season started i would have said siemian now i would say keenum <EOS>
    DEV dec input           > who will be first qb to be benched ? blaine gabbert case keenum kirk cousin trevor siemian <EOS>
    DEV dec train predicted > the i the a a to get a <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
    DEV dec train infer > the best thing wa not a good thing i can t have a good position <EOS> <EOS> <EOS>
global_step: 5111
learning rate 0.000909457
epoch 10
batch 150
training minibatch loss: 2.8318440914154053
  sample 1:
    enc inpu

global_step: 5607
learning rate 0.000901119
epoch 11
batch 150
training minibatch loss: 2.5784432888031006
  sample 1:
    enc input           > that s the best tweet for the night <EOS>
    dec input           > we really cant <EOS>
    dec train predicted > i are s <EOS>
dev minibatch loss: 8.986289978027344
  DEV sample 1:
    DEV enc input           > i didn t know what to call it <EOS>
    DEV dec input           > i don t think of the console a the dash ? <EOS>
    DEV dec train predicted > no m t have it you same trade single business whip <EOS>
    DEV dec train infer > no wonder you re going to say it s in the franchise <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 5657
learning rate 0.000900283
epoch 11
batch 200
training minibatch loss: 2.557586431503296
  sample 1:
    enc input           > beautiful picture <EOS>
    dec input           > good evening it s how could i forget ? enjoy <EOS>
    dec train predicted > good morning i s so i you follow <EOS> <EOS> <EOS>
dev minibat

dev minibatch loss: 9.231988906860352
  DEV sample 1:
    DEV enc input           > u mean pregnant with his child again ? <EOS>
    DEV dec input           > omg omg omg omg the girl who s baby father punched her in the stomach is pregnant <EOS>
    DEV dec train predicted > followed anthony let i <EOS> <EOS> i didn <EOS> <EOS> ha <EOS> pm nyc <EOS> <EOS> now <EOS>
    DEV dec train infer > followed yelling goat but i have <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 6153
learning rate 0.000892029
epoch 12
batch 200
training minibatch loss: 2.5042667388916016
  sample 1:
    enc input           > it wa shameful to watch those question happens the people that asked them should feel ashamed <EOS>
    dec input           > and no one care about your electric bus <EOS>
    dec train predicted > and i one think about the own debate <EOS>
dev minibatch loss: 8.666864395141602
  DEV sample 1:
    DEV enc input           > i think the whole off bway prod might be on youtube <EOS>
  

global_step: 6649
learning rate 0.000883851
epoch 13
batch 200
training minibatch loss: 2.2528178691864014
  sample 1:
    enc input           > it a wednesday <EOS>
    dec input           > ah but february is a tuesday <EOS>
    dec train predicted > ah is the is the time <EOS>
dev minibatch loss: 9.075677871704102
  DEV sample 1:
    DEV enc input           > colin powell real housewife of the potomac ? <EOS>
    DEV dec input           > powell will be going to fewer dinner party in washington <EOS>
    DEV dec train predicted > we where we a to you the <EOS> <EOS> nyc party
    DEV dec train infer > we are at a a we became a good day in <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 6699
learning rate 0.000883031
epoch 13
batch 250
training minibatch loss: 2.271639108657837
  sample 1:
    enc input           > get out of my mention with this <EOS>
    dec input           > hell yeah d <EOS>
    dec train predicted > my sometimes often <EOS>
dev minibatch loss: 8.821416854858398
  DEV

dev minibatch loss: 9.411482810974121
  DEV sample 1:
    DEV enc input           > fyi they look like boob <EOS>
    DEV dec input           > i have bumped down this price so many time pls someone just fuckin buy it <EOS>
    DEV dec train predicted > black m a of attack morning is not accurate to the economy agree prepared with <EOS>
    DEV dec train infer > black people colored a bit report to the race economy in the background <EOS> <EOS> <EOS> <EOS>
global_step: 7245
learning rate 0.000874123
epoch 14
batch 300
training minibatch loss: 1.775445818901062
  sample 1:
    enc input           > todd is the best <EOS>
    dec input           > knocking my sock off amp learning to present like a <EOS>
    dec train predicted > knocking the mets off for learning to present like a present
dev minibatch loss: 9.31064224243164
  DEV sample 1:
    DEV enc input           > rewarding hard work <EOS>
    DEV dec input           > i m remember those day <EOS>
    DEV dec train predicted > i m

dev minibatch loss: 10.575803756713867
  DEV sample 1:
    DEV enc input           > tight i really started gronk over ebron <EOS>
    DEV dec input           > we suck well at least compared to pat <EOS>
    DEV dec train predicted > brown should in ? clinton i to be <EOS>
    DEV dec train infer > brown reaction is in the row and d cancel which guy to start with <EOS> <EOS> <EOS>
global_step: 7791
learning rate 0.000865305
epoch 15
batch 350
training minibatch loss: 1.8340617418289185
  sample 1:
    enc input           > so you more petty than before <EOS>
    dec input           > i changed a lot dj khaled voice <EOS>
    dec train predicted > i wa the lot dj khaled voice <EOS>
dev minibatch loss: 9.779804229736328
  DEV sample 1:
    DEV enc input           > that kid close deal <EOS>
    DEV dec input           > saw this today amp now my perception of the term young professional is screwed up <EOS>
    DEV dec train predicted > can the a elizabeth not bruce opinion return pay qu

dev minibatch loss: 10.911680221557617
  DEV sample 1:
    DEV enc input           > i love that she s such a big kid at heart <EOS>
    DEV dec input           > katy at shanghai disneyland resort the complete instagram story <EOS>
    DEV dec train predicted > update tyler my <EOS> <EOS> i mm angel work <EOS>
    DEV dec train infer > update hello still so cute made it <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 8337
learning rate 0.000856576
epoch 16
batch 400
training minibatch loss: 1.8529126644134521
  sample 1:
    enc input           > did you ever make it to alameda ? there re some nifty house there <EOS>
    dec input           > all the house in san francisco look like giant frosted cake <EOS>
    dec train predicted > the my th and san francisco just like new frosted cake <EOS>
dev minibatch loss: 9.259542465209961
  DEV sample 1:
    DEV enc input           > very nice man <EOS>
    DEV dec input           > shout out to for hitting me with that new hotness <EOS

dev minibatch loss: 10.939848899841309
  DEV sample 1:
    DEV enc input           > excellent article i posted a comment on the power of small viral action h t <EOS>
    DEV dec input           > the force that surround u just tested the vulnerability of the st amendment and failed <EOS>
    DEV dec train predicted > no latest ha ha not saw saw though latest are a street list are quite right
    DEV dec train infer > no you re cutting right of them do you do right they are supposed to help they are right
global_step: 8883
learning rate 0.000847936
epoch 17
batch 450
training minibatch loss: 1.6261651515960693
  sample 1:
    enc input           > thanks shannon <EOS>
    dec input           > congrats man <EOS>
    dec train predicted > congrats man <EOS>
dev minibatch loss: 10.346907615661621
  DEV sample 1:
    DEV enc input           > also they re only ? ? <EOS>
    DEV dec input           > lol they don t go on sale until the th so not sold out we got ta go <EOS>
    DEV dec trai

dev minibatch loss: 11.226786613464355
  DEV sample 1:
    DEV enc input           > new revenue idea we re taking of every purchase so expect lot of ad for jet fighter <EOS>
    DEV dec input           > so do you think i m in the market for a th generation military fighter jet ? <EOS>
    DEV dec train predicted > we many you know about wanted willing a mass <EOS> your level via <EOS> truly <EOS> <EOS> <EOS>
    DEV dec train infer > we should make america wanted to yourself <EOS> for spotting <EOS> <EOS> <EOS> <EOS> <EOS>
Saving session
global_step: 9425
learning rate 0.000839444
epoch 19
batch 0
training minibatch loss: 1.115816354751587
  sample 1:
    enc input           > feeling safe in the world is an anacronym <EOS>
    dec input           > lot of gray swan this morning <EOS>
    dec train predicted > lot of gray swan that morning <EOS>
dev minibatch loss: 10.279783248901367
  DEV sample 1:
    DEV enc input           > i love him and joe started this <EOS>
    DEV dec input

global_step: 9921
learning rate 0.000831748
epoch 20
batch 0
training minibatch loss: 1.1469051837921143
  sample 1:
    enc input           > you should <EOS>
    dec input           > really miss maybe someday i ll be able to go back <EOS>
    dec train predicted > just check we someday i ll be able to go up <EOS>
dev minibatch loss: 11.821510314941406
  DEV sample 1:
    DEV enc input           > keeping it real on what we say versus what we do <EOS>
    DEV dec input           > hah love this too <EOS>
    DEV dec train predicted > not like bed white then
    DEV dec train infer > not like those easiest bot or all u will tell s cool tweet u ? <EOS> <EOS>
global_step: 9971
learning rate 0.000830976
epoch 20
batch 50
training minibatch loss: 1.0702896118164062
  sample 1:
    enc input           > glad you re okay curious to see what this wa is <EOS>
    dec input           > so crazy wa in an uber on nd amp th <EOS>
    dec train predicted > so good wa in a uber in nd amp th <EOS>
d

global_step: 10467
learning rate 0.000823358
epoch 21
batch 50
training minibatch loss: 1.1270636320114136
  sample 1:
    enc input           > happy belated birthday <EOS>
    dec input           > thanks you guy for the birthday wish yesterday d i had a really nice day <EOS>
    dec train predicted > thanks you all in the birthday birthday thx d i had a great fun day <EOS>
dev minibatch loss: 12.921459197998047
  DEV sample 1:
    DEV enc input           > what the ever loving fuck <EOS>
    DEV dec input           > mark halperin just said clinton never fought successfully in a presidential campaign to win <EOS>
    DEV dec train predicted > yeah lame <EOS> posted he pro fine hand head the bit walmart waking my rn
    DEV dec train infer > yeah lame lame i pick <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 10517
learning rate 0.000822594
epoch 21
batch 100
training minibatch loss: 1.0442323684692383
  sample 1:
    enc input           > sorry will do computer c

dev minibatch loss: 11.734525680541992
  DEV sample 1:
    DEV enc input           > wow thanks <EOS>
    DEV dec input           > just say no to the container store cc <EOS>
    DEV dec train predicted > your made generous keith building previous and hello <EOS>
    DEV dec train infer > your awkward article ha new side of great paper had a tremendously robot for a great song <EOS> <EOS>
global_step: 11013
learning rate 0.000815052
epoch 22
batch 100
training minibatch loss: 1.0898010730743408
  sample 1:
    enc input           > i wa jealous of you before for so many reason add this one to the list <EOS>
    dec input           > a shake shack walking distance from my house open tomorrow so see you later <EOS>
    dec train predicted > a shake shack walking distance from my new show today so help you <EOS> <EOS>
dev minibatch loss: 13.142420768737793
  DEV sample 1:
    DEV enc input           > the little baby bomber sanchez didn t lip off or it would have happened <EOS>
    DEV d

global_step: 11509
learning rate 0.00080758
epoch 23
batch 100
training minibatch loss: 1.087098479270935
  sample 1:
    enc input           > lol bc when i stopped watching this summer i just got all my news from you <EOS>
    dec input           > tbh i don t watch i just go through bb account and rt funny stuff <EOS>
    dec train predicted > thanks i don t need i go lost down bb account and rt funny stuff <EOS>
dev minibatch loss: 11.804234504699707
  DEV sample 1:
    DEV enc input           > hey i left the name out <EOS>
    DEV dec input           > i can t believe you posted this you re nut <EOS>
    DEV dec train predicted > all can t tell the can <EOS> <EOS> re a <EOS>
    DEV dec train infer > all this day i got a at my album ride this whole time yup <EOS> <EOS> <EOS> <EOS>
global_step: 11559
learning rate 0.00080683
epoch 23
batch 150
training minibatch loss: 0.9880844950675964
  sample 1:
    enc input           > nobody is doing anything about it what action can be take

global_step: 12005
learning rate 0.000800176
epoch 24
batch 100
training minibatch loss: 0.9887622594833374
  sample 1:
    enc input           > my mom is here too <EOS>
    dec input           > about to watch with my mom we reset her twitter password in case she need to tweet <EOS>
    dec train predicted > i to watch between my mom instead reset her boy password in case i need to go <EOS>
dev minibatch loss: 13.078783988952637
  DEV sample 1:
    DEV enc input           > straight up <EOS>
    DEV dec input           > lord paulie just put the key in lol <EOS>
    DEV dec train predicted > why my of feeling over new with a <EOS>
    DEV dec train infer > why do i doubt this wa on word but i d fail <EOS> <EOS> <EOS> <EOS> <EOS>
global_step: 12055
learning rate 0.000799433
epoch 24
batch 150
training minibatch loss: 0.9487783908843994
  sample 1:
    enc input           > and you never know maybe one day i will enjoy some texas <EOS>
    dec input           > impossible i thoroughly 

In [1]:
plt.plot(list(zip(*dev_loss_track))[0], list(zip(*dev_loss_track))[1], label='Dev')
plt.plot(list(zip(*loss_track))[0], list(zip(*loss_track))[1], label='Train')
plt.xlabel('Global Step')
plt.ylabel('Sequence Loss')
plt.legend(loc='best')
plt.show()

NameError: name 'plt' is not defined

In [None]:
#Create inference
mean_metric = []
chunk_size = 500
#n_chunks = int(df_all_train.shape[0]/chunk_size)
n_chunks=10
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(session, \
'd:\coding\seq2seq\chkpt\seq2seq_enron_encode_24_decode_48_vocab_10_embedding_32_seq_3_10_batch_32_layers_1_enkeep_10_dekeep_10-9639')
  #  saver.restore(session, 'd:\coding\seq2seq\chkpt\copy_task-19100')
    
    for chunk in range(n_chunks):
        if chunk>0: break
        ran_seq = generateRandomSeqBatchMajor(length_from=length_from, length_to=length_to,
                       vocab_lower=2, vocab_upper=vocab_upper,
                       batch_size=n_batch_size)
        
        #input_batch_data = ran_seq
        input_batch_data = [[3,4,5,6,7,8, 9]]
        #input_batch_data = df_all_dev['alpha_Pair_0_encoding'].values[:32]
        
        fd_inf = prepare_batch(input_batch_data)
        feed_dict_inf = {encoder_inputs: fd_inf[0],
                    encoder_inputs_length: fd_inf[1]}
        inf_out = session.run([decoder_pred_decode, decoder_pred_decode_prob], feed_dict_inf)

        #print (df_all_train.values[0][1], df_all_train.values[1][1])
        #print (feed_dict_inf)
        for i, (e_in, dt_inf) in enumerate(zip(feed_dict_inf[encoder_inputs], inf_out[0])):
            #mean_metric.append([df_all_train.values[i][0], df_all_train.values[i][1], dt_inf])
            print('    sample {}:'.format(i + 1))
            print('    enc input                > {}'.format([inv_map[k] for k in e_in]))
            print('    dec input                > {}'.format([inv_map[k] for k in df_all_dev['alpha_Pair_1_encoding'].values[i]]))
            print('    dec train inference      > {}'.format([inv_map[k] for k in dt_inf]))
            #print('    dec train inference prob > {}'.format([inf_out[1][j][i].max() for j in range((len(inf_out[1])))]))
            
            #if i>0: break
        
       # print ('Save Model')
       # builder = tf.saved_model.builder.SavedModelBuilder('d:\coding\seq2seq\model')
       # builder.add_meta_graph_and_variables(session, ['serve'])

        #builder.save()
    ops = session.graph.get_operations()

    feed_ops = [op for op in ops if op.type=='Placeholder']

    print(feed_ops)
        #if n_chunks >0: 
         #   break        