In [1]:
#!/usr/bin/env python
# coding=utf-8

__author__ = "Xinpeng.Chen"

import os
import sys
import time
import matplotlib.pyplot as plt
import pickle
import numpy as np
import pandas as pd
import random

import h5py
import ipdb

import tensorflow as tf


# ------------------------------------------------------------------------------------------------------
# Initialization class
#  1. Pooling the visual features into a single dense feature
#  2. Then, build sentence LSTM, word LSTM
# ------------------------------------------------------------------------------------------------------
class RegionPooling_HierarchicalRNN():
    def __init__(self, n_words,
                       batch_size,
                       num_boxes,
                       feats_dim,
                       project_dim,
                       sentRNN_lstm_dim,
                       sentRNN_FC_dim,
                       wordRNN_lstm_dim,
                       S_max,
                       N_max,
                       word_embed_dim,
                       bias_init_vector=None):

        self.n_words = n_words
        self.batch_size = batch_size
        self.num_boxes = num_boxes # 50
        self.feats_dim = feats_dim # 4096
        self.project_dim = project_dim # 1024
        self.S_max = S_max # 6
        self.N_max = N_max # 50
        self.word_embed_dim = word_embed_dim # 1024

        self.sentRNN_lstm_dim = sentRNN_lstm_dim # 512 hidden size
        self.sentRNN_FC_dim = sentRNN_FC_dim # 1024 in fully connected layer
        self.wordRNN_lstm_dim = wordRNN_lstm_dim # 512 hidden size
	


        # word embedding, parameters of embedding
        # embedding shape: n_words x wordRNN_lstm_dim
        with tf.device('/cpu:0'):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, word_embed_dim], -0.1, 0.1), name='Wemb')
        print("Wemb output>>>>>>>>>>>>",self.Wemb,tf.shape(self.Wemb))
        #self.bemb = tf.Variable(tf.zeros([word_embed_dim]), name='bemb')

        # regionPooling_W shape: 4096 x 1024
        # regionPooling_b shape: 1024
        self.regionPooling_W = tf.Variable(tf.random_uniform([feats_dim, project_dim], -0.1, 0.1), name='regionPooling_W')
        self.regionPooling_b = tf.Variable(tf.zeros([project_dim]), name='regionPooling_b')

        # sentence LSTM
        self.sent_LSTM = tf.nn.rnn_cell.BasicLSTMCell(sentRNN_lstm_dim, state_is_tuple=True)

        # logistic classifier
        self.logistic_Theta_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, 2], -0.1, 0.1), name='logistic_Theta_W')
        self.logistic_Theta_b = tf.Variable(tf.zeros(2), name='logistic_Theta_b')

        # fc1_W: 512 x 1024, fc1_b: 1024
        # fc2_W: 1024 x 1024, fc2_b: 1024
        self.fc1_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, sentRNN_FC_dim], -0.1, 0.1), name='fc1_W')
        self.fc1_b = tf.Variable(tf.zeros(sentRNN_FC_dim), name='fc1_b')
        self.fc2_W = tf.Variable(tf.random_uniform([sentRNN_FC_dim, 1024], -0.1, 0.1), name='fc2_W')
        self.fc2_b = tf.Variable(tf.zeros(1024), name='fc2_b')
        def get_a_cell(lstm_size):
          lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
          return lstm
        # word LSTM
        self.word_LSTM = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)
        #self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([self.word_LSTM] * 2, state_is_tuple=True)
        self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([get_a_cell(wordRNN_lstm_dim) for i in range(2)], state_is_tuple=True)
        #self.word_LSTM2 = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)


        self.embed_word_W = tf.Variable(tf.random_uniform([wordRNN_lstm_dim, n_words], -0.1,0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')

    def build_model(self):
        # receive the feats in the current image
        # it's shape is 10 x 50 x 4096
        # tmp_feats: 500 x 4096
        feats = tf.placeholder(tf.float32, [self.batch_size, self.num_boxes, self.feats_dim])
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 500 x 4096 * 4096 x 1024 --> 500 x 1024
        # project_vec: 10 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [self.batch_size, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # receive the [continue:0, stop:1] lists
        # example: [0, 0, 0, 0, 1, 1], it means this paragraph has five sentences
        num_distribution = tf.placeholder(tf.int32, [self.batch_size, self.S_max])

        # receive the ground truth words, which has been changed to idx use word2idx function
        captions = tf.placeholder(tf.int32, [self.batch_size, self.S_max, self.N_max+1])
        #print("Captions:>>>>>>>>>>>>>>>>>>>>>>>",captions)
        captions_masks = tf.placeholder(tf.float32, [self.batch_size, self.S_max, self.N_max+1])

        # ---------------------------------------------------------------------------------------------------------------------
        # The method which initialize the state, is refered from below sites:
        # 1. http://stackoverflow.com/questions/38241410/tensorflow-remember-lstm-state-for-next-batch-stateful-lstm/38417699
        # 2. https://www.tensorflow.org/api_docs/python/rnn_cell/classes_storing_split_rnncell_state#LSTMStateTuple
        # 3. https://medium.com/@erikhallstrm/using-the-tensorflow-lstm-api-3-7-5f2b97ca6b73#.u4w9z6h0h
        # ---------------------------------------------------------------------------------------------------------------------
        sent_state = self.sent_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state1 = self.word_LSTM1.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state2 = self.word_LSTM2.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #sent_state = tf.zeros([self.batch_size, self.sent_LSTM1.state_size])
        #word_state1 = tf.zeros([self.batch_size, self.word_LSTM1.state_size])
        #word_state2 = tf.zeros([self.batch_size, self.word_LSTM2.state_size])

        probs = []
        loss = 0.0
        loss_sent = 0.0
        loss_word = 0.0
        lambda_sent = 5.0
        lambda_word = 1.0

        print('Start build model:')
        #----------------------------------------------------------------------------------------------
        # Hierarchical RNN: sentence RNN and words RNN
        # The word RNN has the max number, N_max = 50, the number in the papar is 50
        #----------------------------------------------------------------------------------------------
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            # sent_state is a tuple, sent_state = (c, h)
            # 'c': shape=(1, 512) dtype=float32, 'h': shape=(1, 512) dtype=float32
            # The loss here, I refer from the web which is very helpful for me:
            # 1. http://stackoverflow.com/questions/34240703/difference-between-tensorflow-tf-nn-softmax-and-tf-nn-softmax-cross-entropy-with
            # 2. http://stackoverflow.com/questions/35277898/tensorflow-for-binary-classification
            # 3. http://stackoverflow.com/questions/35226198/is-this-one-hot-encoding-in-tensorflow-fast-or-flawed-for-any-reason
            # 4. http://stackoverflow.com/questions/35198528/reshape-y-train-for-binary-text-classification-in-tensorflow
            sentRNN_logistic_mu = tf.nn.xw_plus_b( sent_output, self.logistic_Theta_W, self.logistic_Theta_b )
            sentRNN_label = tf.stack([ 1 - num_distribution[:, i], num_distribution[:, i] ])
            sentRNN_label = tf.transpose(sentRNN_label)
            sentRNN_loss = tf.nn.softmax_cross_entropy_with_logits(logits=sentRNN_logistic_mu, labels=sentRNN_label)
            sentRNN_loss = tf.reduce_sum(sentRNN_loss)/self.batch_size
            loss += sentRNN_loss * lambda_sent
            loss_sent += sentRNN_loss

            # the begining input of word_LSTM is topic vector, and DON'T compute the loss
            # This is follow the paper: Show and Tell
            #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, captions[:, i, j])

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # How to make one-hot encoder, I refer from this excellent web:
                # http://stackoverflow.com/questions/33681517/tensorflow-one-hot-encoder
                labels = tf.reshape(captions[:, i, j+1], [-1, 1])
                #print("Labels and its shape +++++++++++++++",labels,tf.shape(labels))
                indices = tf.reshape(tf.range(0, self.batch_size, 1), [-1, 1])
                #print("Indices and its shape +++++++++++++++",indices,tf.shape(indices))
                concated = tf.concat([indices, labels],1)
                #print("Concated+++++++++++++++++++",concated)
                print("Success")
                onehot_labels = tf.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

                # At each timestep the hidden state of the last LSTM layer is used to predict a distribution
                # over the words in the vocbulary
                logit_words = tf.nn.xw_plus_b(word_output[:], self.embed_word_W, self.embed_word_b)
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
                cross_entropy = cross_entropy * captions_masks[:, i, j]
                loss_wordRNN = tf.reduce_sum(cross_entropy) / self.batch_size
                loss += loss_wordRNN * lambda_word
                loss_word += loss_wordRNN

        return feats, num_distribution, captions, captions_masks, loss, loss_sent, loss_word

    def generate_model(self):
        # feats: 1 x 50 x 4096
        feats = tf.placeholder(tf.float32, [1, self.num_boxes, self.feats_dim])
        # tmp_feats: 50 x 4096
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 50 x 4096 * 4096 x 1024 + 1024 --> 50 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [1, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # initialize the sent_LSTM state
        sent_state = self.sent_LSTM.zero_state(batch_size=1, dtype=tf.float32)

        # save the generated paragraph to list, here I named generated_sents
        generated_paragraph = []

        # pred
        pred_re = []

        # T_stop: run the sentence RNN forward until the stopping probability p_i (STOP) exceeds a threshold T_stop
        T_stop = tf.constant(0.5)

        # Start build the generation model
        print('Start build the generation model: ')

        # sentence RNN
        #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
        #with tf.variable_scope('word_LSTM'):
        #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            # sent_state:
            # LSTMStateTuple(c=<tf.Tensor 'sent_LSTM/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
            #                h=<tf.Tensor 'sent_LSTM/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            # self.fc1_W: 512 x 1024, self.fc1_b: 1024
            # hidden1: 1 x 1024
            # sent_topic_vec: 1 x 1024
            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            sentRNN_logistic_mu = tf.nn.xw_plus_b(sent_output, self.logistic_Theta_W, self.logistic_Theta_b)
            pred = tf.nn.softmax(sentRNN_logistic_mu)
            pred_re.append(pred)

            # save the generated sentence to list, named generated_sent
            generated_sent = []

            # initialize the word LSTM state
            #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            # word RNN, unrolled to N_max time steps
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                if j == 0:
                    with tf.device('/cpu:0'):
                        # get word embedding of BOS (index = 0)
                        current_embed = tf.nn.embedding_lookup(self.Wemb, tf.zeros([1], dtype=tf.int64))

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # word_state:
                # (
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>),
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
                # )
                logit_words = tf.nn.xw_plus_b(word_output, self.embed_word_W, self.embed_word_b)
                max_prob_index = tf.argmax(logit_words, 1)[0]
                generated_sent.append(max_prob_index)

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)
                    current_embed = tf.expand_dims(current_embed, 0)

            generated_paragraph.append(generated_sent)

        return feats, generated_paragraph, pred_re, sent_topic_vec


# -----------------------------------------------------------------------------------------------------
# Preparing Functions
# -----------------------------------------------------------------------------------------------------
def preProBuildWordVocab(sentence_iterator, word_count_threshold=5):
    # borrowed this function from NeuralTalk
    print('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold, ))

    word_counts = {}
    nsents = 0

    for sent in sentence_iterator:
        nsents += 1
        tmp_sent = sent.lower().split(' ')
        if '' in tmp_sent:
            tmp_sent.remove('')

        for w in tmp_sent:
           word_counts[w] = word_counts.get(w, 0) + 1

    vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
    print('filtered words from %d to %d' % (len(word_counts), len(vocab)))

    ixtoword = {}
    ixtoword[0] = '<bos>'
    ixtoword[1] = '<eos>'
    ixtoword[2] = '<pad>'
    ixtoword[3] = '<unk>'

    wordtoix = {}
    wordtoix['<bos>'] = 0
    wordtoix['<eos>'] = 1
    wordtoix['<pad>'] = 2
    wordtoix['<unk>'] = 3

    for idx, w in enumerate(vocab):
        wordtoix[w] = idx + 4
        ixtoword[idx+4] = w

    word_counts['<eos>'] = nsents
    word_counts['<bos>'] = nsents
    word_counts['<pad>'] = nsents
    word_counts['<unk>'] = nsents

    bias_init_vector = np.array([1.0 * word_counts[ ixtoword[i] ] for i in ixtoword])
    bias_init_vector /= np.sum(bias_init_vector) # normalize to frequencies
    bias_init_vector = np.log(bias_init_vector)
    bias_init_vector -= np.max(bias_init_vector) # shift to nice numeric range

    return wordtoix, ixtoword, bias_init_vector


#######################################################################################################
# Parameters Setting
#######################################################################################################
batch_size = 5 # Being support batch_size
num_boxes = 50 # number of Detected regions in each image
feats_dim = 4096 # feature dimensions of each regions
project_dim = 1024 # project the features to one vector, which is 1024 dimensions

sentRNN_lstm_dim = 512 # the sentence LSTM hidden units
sentRNN_FC_dim = 1024 # the fully connected units
wordRNN_lstm_dim = 512 # the word LSTM hidden units
word_embed_dim = 1024 # the learned embedding vectors for the words

S_max = 6
N_max = 50
T_stop = 0.5

n_epochs = 10
learning_rate = 0.0001


#######################################################################################################
# Word vocubulary and captions preprocessing stage
#######################################################################################################
img2paragraph = pickle.load(open('./img2paragraph', 'rb'))
all_sentences = []
for key, paragraph in img2paragraph.items():
    for each_sent in paragraph[1]:
        each_sent.replace(',', ' ,')
        all_sentences.append(each_sent)
word2idx, idx2word, bias_init_vector = preProBuildWordVocab(all_sentences, word_count_threshold=2)
np.save('./idx2word_batch', idx2word)

img2paragraph_modify = {}
for img_name, img_paragraph in img2paragraph.items():
    img_paragraph_1 = img_paragraph[1]

    # img_paragraph_1 is a list
    # it may contain the element: '' or ' ', like this:
    # [["a man is walking"], ["the dog is running"], [""], [" "]]
    # so, we should remove them ' ' and '' element
    if '' in img_paragraph_1:
        img_paragraph_1.remove('')
    if ' ' in paragraph[1]:
        img_paragraph_1.remove(' ')

    # the number sents in each paragraph
    # if the sents is bigger than S_max,
    # we force the number of sents to be S_max
    img_num_sents = len(img_paragraph_1)
    if img_num_sents > S_max:
        img_num_sents = S_max

    # if a paragraph has 4 sentences
    # then the img_num_distribution will be like this:
    # [0, 0, 0, 1, 1, 1]
    img_num_distribution = np.zeros([S_max], dtype=np.int32)
    img_num_distribution[img_num_sents-1:] = 1

    # we multiply the number 2, because the <pad> is encoded into 2
    img_captions_matrix = np.ones([S_max, N_max+1], dtype=np.int32) * 2 # zeros([6, 50])
    for idx, img_sent in enumerate(img_paragraph_1):
        # the number of sentences is img_num_sents
        if idx == img_num_sents:
            break

        # because we treat the ',' as a word
        img_sent = img_sent.replace(',', ' ,')

        # Because I have preprocess the paragraph_v1.json file in VScode before,
        # and I delete all the 2, 3, 4...bankspaces
        # so, actually, the 'elif' code will never run
        if img_sent[0] == ' ' and img_sent[1] != ' ':
            img_sent = img_sent[1:]
        elif img_sent[0] == ' ' and img_sent[1] == ' ' and img_sent[2] != ' ':
            img_sent = img_sent[2:]

        # Be careful the last part in a sentence, like this:
        # '...world.'
        # '...world. '
        if img_sent[-1] == '.':
            img_sent = img_sent[0:-1]
        elif img_sent[-1] == ' ' and img_sent[-2] == '.':
            img_sent = img_sent[0:-2]

        # Last, we add the <bos> and the <eos> in each sentences
        img_sent = '<bos> ' + img_sent + ' <eos>'

        # translate each word in a sentence into the unique number in word2idx dict
        # when we meet the word which is not in the word2idx dict, we use the mark: <unk>
        for idy, word in enumerate(img_sent.lower().split(' ')):
            # because the biggest number of words in a sentence is N_max, here is 50
            if idy == N_max:
                break

            if word in word2idx:
                img_captions_matrix[idx, idy] = word2idx[word]
            else:
                img_captions_matrix[idx, idy] = word2idx['<unk>']

    # Pay attention, the value type 'img_name' here is NUMBER, I change it to STRING type
    img2paragraph_modify[str(img_name)] = [img_num_distribution, img_captions_matrix]

with open('./img2paragraph_modify_batch', 'wb') as f:
    pickle.dump(img2paragraph_modify, f)


#######################################################################################################
# Train, validation and testing stage
#######################################################################################################
def train():
    ##############################################################################
    # some preparing work
    ##############################################################################
    model_path = './models_batch/'
    train_feats_path = './im2p_train_output.h5'
    train_output_file = h5py.File(train_feats_path, 'r')
    train_feats = train_output_file.get('feats')
    train_imgs_full_path_lists = open('./imgs_train_path.txt').read().splitlines()
    train_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists)


    # Model Initialization:
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                          batch_size = batch_size,
                                          num_boxes = num_boxes,
                                          feats_dim = feats_dim,
                                          project_dim = project_dim,
                                          sentRNN_lstm_dim = sentRNN_lstm_dim,
                                          sentRNN_FC_dim = sentRNN_FC_dim,
                                          wordRNN_lstm_dim = wordRNN_lstm_dim,
                                          S_max = S_max,
                                          N_max = N_max,
                                          word_embed_dim = word_embed_dim,
                                          bias_init_vector = bias_init_vector)

    tf_feats, tf_num_distribution, tf_captions_matrix, tf_captions_masks, tf_loss, tf_loss_sent, tf_loss_word = model.build_model()
    sess = tf.InteractiveSession()

    saver = tf.train.Saver(max_to_keep=500, write_version=1)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.global_variables_initializer().run()

    # when you want to train the model from the front model
    #new_saver = tf.train.Saver(max_to_keep=500)
    #new_saver = tf.train.import_meta_graph('./models_batch/model-92.meta')
    #new_saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))

    all_vars = tf.trainable_variables()

    # open a loss file to record the loss value
    loss_fd = open('loss_batch.txt', 'w')
    img2idx = {}
    for idx, img in enumerate(train_imgs_names):
        img2idx[img] = idx

    # plt draw the loss curve
    # refer from: http://stackoverflow.com/questions/11874767/real-time-plotting-in-while-loop-with-matplotlib
    loss_to_draw = []

    for epoch in range(0, n_epochs):
        loss_to_draw_epoch = []
        # disorganize the order
        random.shuffle(train_imgs_names)

        for start, end in zip(range(0, len(train_imgs_names), batch_size),
                              range(batch_size, len(train_imgs_names), batch_size)):

            start_time = time.time()

            img_name = train_imgs_names[start:end]
            current_feats_index = map(lambda x: img2idx[x], img_name)
            current_feats = np.asarray( map(lambda x: train_feats[x], current_feats_index) )

            current_num_distribution = np.asarray( map(lambda x: img2paragraph_modify[x][0], img_name) )
            current_captions_matrix = np.asarray( map(lambda x: img2paragraph_modify[x][1], img_name) )

            current_captions_masks = np.zeros( (current_captions_matrix.shape[0], current_captions_matrix.shape[1], current_captions_matrix.shape[2]) )/idx2word_batch
            # find the non-zero element
            nonzeros = np.array( map(lambda each_matrix: np.array( map(lambda x: (x != 2).sum() + 1, each_matrix ) ), current_captions_matrix ) )
            for i in range(batch_size):
                for ind, row in enumerate(current_captions_masks[i]):
                    row[:(nonzeros[i, ind]-1)] = 1

            # shape of current_feats: batch_size x 50 x 4096
            # shape of current_num_distribution: batch_size x 6
            # shape of current_captions_matrix: batch_size x 6 x 50
            _, loss_val, loss_sent, loss_word= sess.run(
                                [train_op, tf_loss, tf_loss_sent, tf_loss_word],
                                feed_dict={
                                           tf_feats: current_feats,
                                           tf_num_distribution: current_num_distribution,
                                           tf_captions_matrix: current_captions_matrix,
                                           tf_captions_masks: current_captions_masks
                                })

            # append loss to list in a epoch
            loss_to_draw_epoch.append(loss_val)

            # running information
            print('idx: ', start, ' Epoch: ', epoch, ' loss: ', loss_val, ' loss_sent: ', loss_sent, ' loss_word: ', loss_word, ' Time cost: ', str((time.time() - start_time)))
            loss_fd.write('epoch ' + str(epoch) + ' loss ' + str(loss_val))

        # draw loss curve every epoch
        loss_to_draw.append(np.mean(loss_to_draw_epoch))
        plt_save_dir = './loss_imgs'
        plt_save_img_name = str(epoch) + '.png'
        plt.plot(range(len(loss_to_draw)), loss_to_draw, color='g')
        plt.grid(True)
        plt.savefig(os.path.join(plt_save_dir, plt_save_img_name))

        if np.mod(epoch, 10) == 0:
            print("Epoch ", epoch, " is done. Saving the model ...")
            saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
    loss_fd.close()


def test():
    
    start_time = time.time()
    # change the model path according to your environment
    model_path = './model-250'

    # It's very important to use Pandas to Series this idx2word dict
    # After this operation, we can use list to extract the word at the same time
    idx2word = pd.Series(np.load('./data/idx2word_batch.npy').tolist())

    test_feats_path = './data/im2p_test_output.h5'
    test_output_file = h5py.File(test_feats_path, 'r')
    test_feats = test_output_file.get('feats')

    test_imgs_full_path_lists = open('./densecap/imgs_test_path.txt').read().splitlines()
    test_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], test_imgs_full_path_lists)
    
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    test_model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                               batch_size = batch_size,
                                               num_boxes = num_boxes,
                                               feats_dim = feats_dim,
                                               project_dim = project_dim,
                                               sentRNN_lstm_dim = sentRNN_lstm_dim,
                                               sentRNN_FC_dim = sentRNN_FC_dim,
                                               wordRNN_lstm_dim = wordRNN_lstm_dim,
                                               S_max = S_max,
                                               N_max = N_max,
                                               word_embed_dim = word_embed_dim,
                                               bias_init_vector = bias_init_vector)
    

    tf_feats, tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors = test_model.generate_model()
    sess = tf.InteractiveSession()
    #print("Before there >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n\n")

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    img2idx = {}
    for idx, img in enumerate(test_imgs_names):
        img2idx[img] = idx
    print("IM2idx:::::::::::",img2idx)

    test_fd = open('HRNN_results.txt', 'w')
    for idx, img_name in enumerate(test_imgs_names):
        print(idx, img_name)
        test_fd.write(img_name + '\n')

        each_paragraph = []
        current_paragraph = ""

        current_feats_index = img2idx[img_name]
        current_feats = test_feats[current_feats_index]
        current_feats = np.reshape(current_feats, [1, 50, 4096])

        generated_paragraph_indexes, pred, sent_topic_vectors = sess.run(
                                                                         [tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors],
                                                                         feed_dict={
                                                                             tf_feats: current_feats
                                                                         })

        #generated_paragraph = idx2word[generated_paragraph_indexes]
        for sent_index in generated_paragraph_indexes:
            each_sent = []
            for word_index in sent_index:
                each_sent.append(idx2word[word_index])
            each_paragraph.append(each_sent)

        for idx, each_sent in enumerate(each_paragraph):
            # if the current sentence is the end sentence of the paragraph
            # According to the probability distribution:
            # CONTINUE: [1, 0]
            # STOP    : [0, 1]
            # So, if the first item of pred is less than the T_stop
            # the generation process is break
            if pred[idx][0][0] <= T_stop:
                break
            current_sent = ''
            for each_word in each_sent:
                current_sent += each_word + ' '
            current_sent = current_sent.replace('<eos> ', '')
            current_sent = current_sent.replace('<pad> ', '')
            current_sent = current_sent + '.'
            current_sent = current_sent.replace(' .', '.')
            current_sent = current_sent.replace(' ,', ',')
            current_paragraph +=current_sent
            if idx != len(each_paragraph) - 1:
                current_paragraph += ' '

        test_fd.write(current_paragraph + '\n')
    test_fd.close()
    print("Time cost: " + str(time.time()-start_time))




preprocessing word counts and creating vocab based on word count threshold 2
filtered words from 18418 to 9900


In [4]:
    #start by parts
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    
    train_output_file = h5py.File(train_feats_path, 'r')
    #print("Train feats path--------------------------------------------->>>>>>>>",train_output_file)
    train_feats = train_output_file.get('feats')
    #print("Train feats->>>>>>>>>>>>>>>>",train_feats)
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    #print("paths>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>",train_imgs_full_path_lists)
    train_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists)
    print("Val images names",list(train_imgs_names))


    # Model Initialization:
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                          batch_size = batch_size,
                                          num_boxes = num_boxes,
                                          feats_dim = feats_dim,
                                          project_dim = project_dim,
                                          sentRNN_lstm_dim = sentRNN_lstm_dim,
                                          sentRNN_FC_dim = sentRNN_FC_dim,
                                          wordRNN_lstm_dim = wordRNN_lstm_dim,
                                          S_max = S_max,
                                          N_max = N_max,
                                          word_embed_dim = word_embed_dim,
                                          bias_init_vector = bias_init_vector)

    tf_feats, tf_num_distribution, tf_captions_matrix, tf_captions_masks, tf_loss, tf_loss_sent, tf_loss_word = model.build_model()
    sess = tf.InteractiveSession()
    
    
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    #print("THE SESSION",)
    
    train_output_file = h5py.File(train_feats_path, 'r')
    #print("Train feats path--------------------------------------------->>>>>>>>",train_output_file)
    train_feats = train_output_file.get('feats')
    #print("Train feats->>>>>>>>>>>>>>>>",train_feats)
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    #print("paths>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>",train_imgs_full_path_lists)
    train_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists)
    #print("Train images names",list(train_imgs_names))
    
    #print("Train images names",list(train_imgs_names))
    

Val images names ['2385757', '2389264', '2407325', '2337141', '2345813', '2386694', '2368581', '2397809', '185', '2387911', '2343682', '2409165', '2357129', '498337', '2326621', '2320205', '2412782', '2352466', '2414097', '2381526', '2390981', '2375715', '2405096', '2402848', '2346948', '2387882', '2352243', '2337581', '2330919', '2380444', '2377611', '2354637', '2401996', '2391375', '2415510', '2400079', '2366081', '2360527', '2365490', '2336229', '2367475', '2373691', '2376356', '2368952', '2369653', '2355012', '2400892', '2369971', '2414408', '2394007', '2415978', '2405444', '2401822', '2387651', '2350622', '2318603', '2383962', '2377677', '2380510', '2392746', '2412229', '2392926', '2362383', '2398991', '2367805', '2360264', '2400991', '2354133', '2374619', '2413514', '2387844', '2349764', '2403572', '2372008', '2388111', '2366871', '2385053', '2400907', '2379868', '2389754', '2389615', '2365263', '2414350', '2409171', '4025', '2352062', '2378864', '2367278', '2319025', '2335091', 

In [3]:
sess.close()

In [5]:
    #saver = tf.train.Saver(max_to_keep=500, write_version=1)
    print("Learning Rate and Loss++++++++++++",(learning_rate,tf_loss))
    with tf.variable_scope('optimizer',reuse= tf.AUTO_REUSE):
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    print("Adam Done")
    tf.global_variables_initializer().run()
    print("After run")

Learning Rate and Loss++++++++++++ (0.0001, <tf.Tensor 'add_1224:0' shape=() dtype=float32>)
Adam Done
After run


In [6]:
all_vars = tf.trainable_variables()
print("Done",list(all_vars))

Done [<tf.Variable 'Wemb:0' shape=(9904, 1024) dtype=float32_ref>, <tf.Variable 'regionPooling_W:0' shape=(4096, 1024) dtype=float32_ref>, <tf.Variable 'regionPooling_b:0' shape=(1024,) dtype=float32_ref>, <tf.Variable 'logistic_Theta_W:0' shape=(512, 2) dtype=float32_ref>, <tf.Variable 'logistic_Theta_b:0' shape=(2,) dtype=float32_ref>, <tf.Variable 'fc1_W:0' shape=(512, 1024) dtype=float32_ref>, <tf.Variable 'fc1_b:0' shape=(1024,) dtype=float32_ref>, <tf.Variable 'fc2_W:0' shape=(1024, 1024) dtype=float32_ref>, <tf.Variable 'fc2_b:0' shape=(1024,) dtype=float32_ref>, <tf.Variable 'embed_word_W:0' shape=(512, 9904) dtype=float32_ref>, <tf.Variable 'embed_word_b:0' shape=(9904,) dtype=float32_ref>, <tf.Variable 'sent_LSTM/basic_lstm_cell/kernel:0' shape=(1536, 2048) dtype=float32_ref>, <tf.Variable 'sent_LSTM/basic_lstm_cell/bias:0' shape=(2048,) dtype=float32_ref>, <tf.Variable 'word_LSTM/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(1536, 2048) dtype=float32_ref>, <tf.Varia

In [7]:
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    
    train_output_file = h5py.File(train_feats_path, 'r')
    #print("Train feats path--------------------------------------------->>>>>>>>",train_output_file)
    train_feats = train_output_file.get('feats')
    #print("Train feats->>>>>>>>>>>>>>>>",train_feats)
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    #print("paths>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>",train_imgs_full_path_lists)
    train_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists)
    #print("Train images names",list(train_imgs_names))
    
    print("Train images names",list(train_imgs_names))

Train images names ['2385757', '2389264', '2407325', '2337141', '2345813', '2386694', '2368581', '2397809', '185', '2387911', '2343682', '2409165', '2357129', '498337', '2326621', '2320205', '2412782', '2352466', '2414097', '2381526', '2390981', '2375715', '2405096', '2402848', '2346948', '2387882', '2352243', '2337581', '2330919', '2380444', '2377611', '2354637', '2401996', '2391375', '2415510', '2400079', '2366081', '2360527', '2365490', '2336229', '2367475', '2373691', '2376356', '2368952', '2369653', '2355012', '2400892', '2369971', '2414408', '2394007', '2415978', '2405444', '2401822', '2387651', '2350622', '2318603', '2383962', '2377677', '2380510', '2392746', '2412229', '2392926', '2362383', '2398991', '2367805', '2360264', '2400991', '2354133', '2374619', '2413514', '2387844', '2349764', '2403572', '2372008', '2388111', '2366871', '2385053', '2400907', '2379868', '2389754', '2389615', '2365263', '2414350', '2409171', '4025', '2352062', '2378864', '2367278', '2319025', '2335091'

In [8]:
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    
    train_output_file = h5py.File(train_feats_path, 'r')
    #print("Train feats path--------------------------------------------->>>>>>>>",train_output_file)
    train_feats = train_output_file.get('feats')
    #print("Train feats->>>>>>>>>>>>>>>>",train_feats)
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    #print("paths>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>",train_imgs_full_path_lists)
    train_imgs_names = list(map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists))
    #print("Train images names",list(train_imgs_names))
    
    #print("Train images names",list(train_imgs_names))
    
#     loss_fd = open('loss_batch.txt', 'w')
    img2idx = {}
    print("train_imgs_names-------",train_imgs_names)
    for idx, img in enumerate(train_imgs_names):
        img2idx[img] = idx
    print("IMG@IDX",img2idx)

    # plt draw the loss curve
    # refer from: http://stackoverflow.com/questions/11874767/real-time-plotting-in-while-loop-with-matplotlib
    loss_to_draw = []

train_imgs_names------- ['2385757', '2389264', '2407325', '2337141', '2345813', '2386694', '2368581', '2397809', '185', '2387911', '2343682', '2409165', '2357129', '498337', '2326621', '2320205', '2412782', '2352466', '2414097', '2381526', '2390981', '2375715', '2405096', '2402848', '2346948', '2387882', '2352243', '2337581', '2330919', '2380444', '2377611', '2354637', '2401996', '2391375', '2415510', '2400079', '2366081', '2360527', '2365490', '2336229', '2367475', '2373691', '2376356', '2368952', '2369653', '2355012', '2400892', '2369971', '2414408', '2394007', '2415978', '2405444', '2401822', '2387651', '2350622', '2318603', '2383962', '2377677', '2380510', '2392746', '2412229', '2392926', '2362383', '2398991', '2367805', '2360264', '2400991', '2354133', '2374619', '2413514', '2387844', '2349764', '2403572', '2372008', '2388111', '2366871', '2385053', '2400907', '2379868', '2389754', '2389615', '2365263', '2414350', '2409171', '4025', '2352062', '2378864', '2367278', '2319025', '233

In [10]:
#sess = tf.InteractiveSession()
#tf.reset_default_graph()
saver = tf.train.import_meta_graph('./models_batch/model-100.meta')
saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))
for epoch in range(101, n_epochs):
        loss_to_draw_epoch = []
        #saver = tf.train.import_meta_graph('./models_batch/model-0.meta')
        #saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))
        # disorganize the order
        #print("train images",train_imgs_names,type(train_imgs_names))
        #train_imgs_names=list(train_imgs_names)
        random.shuffle(train_imgs_names)

        for start, end in zip(range(0, len(train_imgs_names), batch_size),
                              range(batch_size, len(train_imgs_names), batch_size)):
            loss_fd = open('loss_batch.txt', 'a')
            start_time = time.time()
            print("Start and end",(start,end))
            img_name = train_imgs_names[start:end]
            current_feats_index = map(lambda x: img2idx[x], img_name)
            current_feats = np.asarray( list(map(lambda x: train_feats[x], current_feats_index) ))


            current_num_distribution = np.asarray(list(map(lambda x: img2paragraph_modify[x][0], img_name)))
            current_captions_matrix = np.asarray(list(map(lambda x: img2paragraph_modify[x][1], img_name)))

            #print("Blah Blah",current_captions_matrix,type(current_captions_matrix))
            
            
            #print("Current captions matrix >>>>>>>>>>>>>>>>>>",current_captions_matrix,type(current_captions_matrix))
            #print("kwiojwejiowe",(current_captions_matrix.shape)[0])
            
            current_captions_masks = np.zeros( (current_captions_matrix.shape[0], current_captions_matrix.shape[1], current_captions_matrix.shape[2]) )
            #done by me current_captions_masks = np.zeros( (current_captions_matrix.shape[0], current_captions_matrix.shape[1], current_captions_matrix.shape[2]) )/idx2word_batch
            # find the non-zero element
            #done by me nonzeros = np.asarray(map(lambda each_matrix: np.asarray( map(lambda x: (x != 2).sum() + 1, each_matrix ) ), current_captions_matrix ) )
            nonzeros = np.array(list(map(lambda each_matrix: np.array( list(map(lambda x: (x != 2).sum() + 1, each_matrix ) )), current_captions_matrix ) ))

            #print("NONZEROS",nonzeros,nonzeros.shape)
            for i in range(batch_size):
                for ind, row in enumerate(current_captions_masks[i]):
                    row[:(nonzeros[i, ind]-1)] = 1

            # shape of current_feats: batch_size x 50 x 4096
            # shape of current_num_distribution: batch_size x 6
            # shape of current_captions_matrix: batch_size x 6 x 50
            #print("Done w the loop")
            #print("train op, tf loss ,tf_loss_sent,tf_loss_word",[train_op, tf_loss, tf_loss_sent, tf_loss_word])
            _, loss_val, loss_sent, loss_word= sess.run(
                                [train_op, tf_loss, tf_loss_sent, tf_loss_word],
                                feed_dict={
                                           tf_feats: current_feats,
                                           tf_num_distribution: current_num_distribution,
                                           tf_captions_matrix: current_captions_matrix,
                                           tf_captions_masks: current_captions_masks
                                })

            # append loss to list in a epoch
            loss_to_draw_epoch.append(loss_val)

            # running information
            print('idx: ', start, ' Epoch: ', epoch, ' loss: ', loss_val, ' loss_sent: ', loss_sent, ' loss_word: ', loss_word, ' Time cost: ', str((time.time() - start_time)))
            
            loss_fd.write('start: '+ str(start) +' end: ' + str(end) +' epoch ' + str(epoch) + ' loss ' + str(loss_val) + '\n')
            loss_fd.close()

        # draw loss curve every epoch
        loss_to_draw.append(np.mean(loss_to_draw_epoch))
        plt_save_dir = './loss_imgs'
        plt_save_img_name = str(epoch) + '.png'
        plt.plot(range(len(loss_to_draw)), loss_to_draw, color='g')
        plt.grid(True)
        plt.savefig(os.path.join(plt_save_dir, plt_save_img_name))

        if np.mod(epoch, 1) == 0:
            print("Epoch ", epoch, " is done. Saving the model ...")
            saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
#loss_fd.close()

sess.close()

TypeError: Cannot interpret feed_dict key as Tensor: The name 'save/Const:0' refers to a Tensor which does not exist. The operation, 'save/Const', does not exist in the graph.

In [1]:
#!/usr/bin/env python
# coding=utf-8

__author__ = "Xinpeng.Chen"

import os
import sys
import time
import matplotlib.pyplot as plt
import pickle
import numpy as np
import pandas as pd
import random

import h5py
import ipdb

import tensorflow as tf


# ------------------------------------------------------------------------------------------------------
# Initialization class
#  1. Pooling the visual features into a single dense feature
#  2. Then, build sentence LSTM, word LSTM
# ------------------------------------------------------------------------------------------------------
class RegionPooling_HierarchicalRNN():
    def __init__(self, n_words,
                       batch_size,
                       num_boxes,
                       feats_dim,
                       project_dim,
                       sentRNN_lstm_dim,
                       sentRNN_FC_dim,
                       wordRNN_lstm_dim,
                       S_max,
                       N_max,
                       word_embed_dim,
                       bias_init_vector=None):

        self.n_words = n_words
        self.batch_size = batch_size
        self.num_boxes = num_boxes # 50
        self.feats_dim = feats_dim # 4096
        self.project_dim = project_dim # 1024
        self.S_max = S_max # 6
        self.N_max = N_max # 50
        self.word_embed_dim = word_embed_dim # 1024

        self.sentRNN_lstm_dim = sentRNN_lstm_dim # 512 hidden size
        self.sentRNN_FC_dim = sentRNN_FC_dim # 1024 in fully connected layer
        self.wordRNN_lstm_dim = wordRNN_lstm_dim # 512 hidden size
	


        # word embedding, parameters of embedding
        # embedding shape: n_words x wordRNN_lstm_dim
        with tf.device('/cpu:0'):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, word_embed_dim], -0.1, 0.1), name='Wemb')
        print("Wemb output>>>>>>>>>>>>",self.Wemb,tf.shape(self.Wemb))
        #self.bemb = tf.Variable(tf.zeros([word_embed_dim]), name='bemb')

        # regionPooling_W shape: 4096 x 1024
        # regionPooling_b shape: 1024
        self.regionPooling_W = tf.Variable(tf.random_uniform([feats_dim, project_dim], -0.1, 0.1), name='regionPooling_W')
        self.regionPooling_b = tf.Variable(tf.zeros([project_dim]), name='regionPooling_b')

        # sentence LSTM
        self.sent_LSTM = tf.nn.rnn_cell.BasicLSTMCell(sentRNN_lstm_dim, state_is_tuple=True)

        # logistic classifier
        self.logistic_Theta_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, 2], -0.1, 0.1), name='logistic_Theta_W')
        self.logistic_Theta_b = tf.Variable(tf.zeros(2), name='logistic_Theta_b')

        # fc1_W: 512 x 1024, fc1_b: 1024
        # fc2_W: 1024 x 1024, fc2_b: 1024
        self.fc1_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, sentRNN_FC_dim], -0.1, 0.1), name='fc1_W')
        self.fc1_b = tf.Variable(tf.zeros(sentRNN_FC_dim), name='fc1_b')
        self.fc2_W = tf.Variable(tf.random_uniform([sentRNN_FC_dim, 1024], -0.1, 0.1), name='fc2_W')
        self.fc2_b = tf.Variable(tf.zeros(1024), name='fc2_b')
        def get_a_cell(lstm_size):
          lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
          return lstm
        # word LSTM
        self.word_LSTM = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)
        #self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([self.word_LSTM] * 2, state_is_tuple=True)
        self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([get_a_cell(wordRNN_lstm_dim) for i in range(2)], state_is_tuple=True)
        #self.word_LSTM2 = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)


        self.embed_word_W = tf.Variable(tf.random_uniform([wordRNN_lstm_dim, n_words], -0.1,0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')

    def build_model(self):
        # receive the feats in the current image
        # it's shape is 10 x 50 x 4096
        # tmp_feats: 500 x 4096
        feats = tf.placeholder(tf.float32, [self.batch_size, self.num_boxes, self.feats_dim])
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 500 x 4096 * 4096 x 1024 --> 500 x 1024
        # project_vec: 10 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [self.batch_size, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # receive the [continue:0, stop:1] lists
        # example: [0, 0, 0, 0, 1, 1], it means this paragraph has five sentences
        num_distribution = tf.placeholder(tf.int32, [self.batch_size, self.S_max])

        # receive the ground truth words, which has been changed to idx use word2idx function
        captions = tf.placeholder(tf.int32, [self.batch_size, self.S_max, self.N_max+1])
        #print("Captions:>>>>>>>>>>>>>>>>>>>>>>>",captions)
        captions_masks = tf.placeholder(tf.float32, [self.batch_size, self.S_max, self.N_max+1])

        # ---------------------------------------------------------------------------------------------------------------------
        # The method which initialize the state, is refered from below sites:
        # 1. http://stackoverflow.com/questions/38241410/tensorflow-remember-lstm-state-for-next-batch-stateful-lstm/38417699
        # 2. https://www.tensorflow.org/api_docs/python/rnn_cell/classes_storing_split_rnncell_state#LSTMStateTuple
        # 3. https://medium.com/@erikhallstrm/using-the-tensorflow-lstm-api-3-7-5f2b97ca6b73#.u4w9z6h0h
        # ---------------------------------------------------------------------------------------------------------------------
        sent_state = self.sent_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state1 = self.word_LSTM1.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state2 = self.word_LSTM2.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #sent_state = tf.zeros([self.batch_size, self.sent_LSTM1.state_size])
        #word_state1 = tf.zeros([self.batch_size, self.word_LSTM1.state_size])
        #word_state2 = tf.zeros([self.batch_size, self.word_LSTM2.state_size])

        probs = []
        loss = 0.0
        loss_sent = 0.0
        loss_word = 0.0
        lambda_sent = 5.0
        lambda_word = 1.0

        print('Start build model:')
        #----------------------------------------------------------------------------------------------
        # Hierarchical RNN: sentence RNN and words RNN
        # The word RNN has the max number, N_max = 50, the number in the papar is 50
        #----------------------------------------------------------------------------------------------
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            # sent_state is a tuple, sent_state = (c, h)
            # 'c': shape=(1, 512) dtype=float32, 'h': shape=(1, 512) dtype=float32
            # The loss here, I refer from the web which is very helpful for me:
            # 1. http://stackoverflow.com/questions/34240703/difference-between-tensorflow-tf-nn-softmax-and-tf-nn-softmax-cross-entropy-with
            # 2. http://stackoverflow.com/questions/35277898/tensorflow-for-binary-classification
            # 3. http://stackoverflow.com/questions/35226198/is-this-one-hot-encoding-in-tensorflow-fast-or-flawed-for-any-reason
            # 4. http://stackoverflow.com/questions/35198528/reshape-y-train-for-binary-text-classification-in-tensorflow
            sentRNN_logistic_mu = tf.nn.xw_plus_b( sent_output, self.logistic_Theta_W, self.logistic_Theta_b )
            sentRNN_label = tf.stack([ 1 - num_distribution[:, i], num_distribution[:, i] ])
            sentRNN_label = tf.transpose(sentRNN_label)
            sentRNN_loss = tf.nn.softmax_cross_entropy_with_logits(logits=sentRNN_logistic_mu, labels=sentRNN_label)
            sentRNN_loss = tf.reduce_sum(sentRNN_loss)/self.batch_size
            loss += sentRNN_loss * lambda_sent
            loss_sent += sentRNN_loss

            # the begining input of word_LSTM is topic vector, and DON'T compute the loss
            # This is follow the paper: Show and Tell
            #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, captions[:, i, j])

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # How to make one-hot encoder, I refer from this excellent web:
                # http://stackoverflow.com/questions/33681517/tensorflow-one-hot-encoder
                labels = tf.reshape(captions[:, i, j+1], [-1, 1])
                #print("Labels and its shape +++++++++++++++",labels,tf.shape(labels))
                indices = tf.reshape(tf.range(0, self.batch_size, 1), [-1, 1])
                #print("Indices and its shape +++++++++++++++",indices,tf.shape(indices))
                concated = tf.concat([indices, labels],1)
                #print("Concated+++++++++++++++++++",concated)
                print("Success")
                onehot_labels = tf.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

                # At each timestep the hidden state of the last LSTM layer is used to predict a distribution
                # over the words in the vocbulary
                logit_words = tf.nn.xw_plus_b(word_output[:], self.embed_word_W, self.embed_word_b)
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
                cross_entropy = cross_entropy * captions_masks[:, i, j]
                loss_wordRNN = tf.reduce_sum(cross_entropy) / self.batch_size
                loss += loss_wordRNN * lambda_word
                loss_word += loss_wordRNN

        return feats, num_distribution, captions, captions_masks, loss, loss_sent, loss_word

    def generate_model(self):
        # feats: 1 x 50 x 4096
        feats = tf.placeholder(tf.float32, [1, self.num_boxes, self.feats_dim])
        # tmp_feats: 50 x 4096
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 50 x 4096 * 4096 x 1024 + 1024 --> 50 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [1, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # initialize the sent_LSTM state
        sent_state = self.sent_LSTM.zero_state(batch_size=1, dtype=tf.float32)

        # save the generated paragraph to list, here I named generated_sents
        generated_paragraph = []

        # pred
        pred_re = []

        # T_stop: run the sentence RNN forward until the stopping probability p_i (STOP) exceeds a threshold T_stop
        T_stop = tf.constant(0.5)

        # Start build the generation model
        print('Start build the generation model: ')

        # sentence RNN
        #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
        #with tf.variable_scope('word_LSTM'):
        #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            # sent_state:
            # LSTMStateTuple(c=<tf.Tensor 'sent_LSTM/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
            #                h=<tf.Tensor 'sent_LSTM/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            # self.fc1_W: 512 x 1024, self.fc1_b: 1024
            # hidden1: 1 x 1024
            # sent_topic_vec: 1 x 1024
            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            sentRNN_logistic_mu = tf.nn.xw_plus_b(sent_output, self.logistic_Theta_W, self.logistic_Theta_b)
            pred = tf.nn.softmax(sentRNN_logistic_mu)
            pred_re.append(pred)

            # save the generated sentence to list, named generated_sent
            generated_sent = []

            # initialize the word LSTM state
            #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            # word RNN, unrolled to N_max time steps
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                if j == 0:
                    with tf.device('/cpu:0'):
                        # get word embedding of BOS (index = 0)
                        current_embed = tf.nn.embedding_lookup(self.Wemb, tf.zeros([1], dtype=tf.int64))

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # word_state:
                # (
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>),
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
                # )
                logit_words = tf.nn.xw_plus_b(word_output, self.embed_word_W, self.embed_word_b)
                max_prob_index = tf.argmax(logit_words, 1)[0]
                generated_sent.append(max_prob_index)

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)
                    current_embed = tf.expand_dims(current_embed, 0)

            generated_paragraph.append(generated_sent)

        return feats, generated_paragraph, pred_re, sent_topic_vec


# -----------------------------------------------------------------------------------------------------
# Preparing Functions
# -----------------------------------------------------------------------------------------------------
def preProBuildWordVocab(sentence_iterator, word_count_threshold=5):
    # borrowed this function from NeuralTalk
    print('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold, ))

    word_counts = {}
    nsents = 0

    for sent in sentence_iterator:
        nsents += 1
        tmp_sent = sent.lower().split(' ')
        if '' in tmp_sent:
            tmp_sent.remove('')

        for w in tmp_sent:
           word_counts[w] = word_counts.get(w, 0) + 1

    vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
    print('filtered words from %d to %d' % (len(word_counts), len(vocab)))

    ixtoword = {}
    ixtoword[0] = '<bos>'
    ixtoword[1] = '<eos>'
    ixtoword[2] = '<pad>'
    ixtoword[3] = '<unk>'

    wordtoix = {}
    wordtoix['<bos>'] = 0
    wordtoix['<eos>'] = 1
    wordtoix['<pad>'] = 2
    wordtoix['<unk>'] = 3

    for idx, w in enumerate(vocab):
        wordtoix[w] = idx + 4
        ixtoword[idx+4] = w

    word_counts['<eos>'] = nsents
    word_counts['<bos>'] = nsents
    word_counts['<pad>'] = nsents
    word_counts['<unk>'] = nsents

    bias_init_vector = np.array([1.0 * word_counts[ ixtoword[i] ] for i in ixtoword])
    bias_init_vector /= np.sum(bias_init_vector) # normalize to frequencies
    bias_init_vector = np.log(bias_init_vector)
    bias_init_vector -= np.max(bias_init_vector) # shift to nice numeric range

    return wordtoix, ixtoword, bias_init_vector


#######################################################################################################
# Parameters Setting
#######################################################################################################
batch_size = 5 # Being support batch_size
num_boxes = 50 # number of Detected regions in each image
feats_dim = 4096 # feature dimensions of each regions
project_dim = 1024 # project the features to one vector, which is 1024 dimensions

sentRNN_lstm_dim = 512 # the sentence LSTM hidden units
sentRNN_FC_dim = 1024 # the fully connected units
wordRNN_lstm_dim = 512 # the word LSTM hidden units
word_embed_dim = 1024 # the learned embedding vectors for the words

S_max = 6
N_max = 50
T_stop = 0.5

n_epochs = 500
learning_rate = 0.0001


#######################################################################################################
# Word vocubulary and captions preprocessing stage
#######################################################################################################
img2paragraph = pickle.load(open('./img2paragraph', 'rb'))
all_sentences = []
for key, paragraph in img2paragraph.items():
    for each_sent in paragraph[1]:
        each_sent.replace(',', ' ,')
        all_sentences.append(each_sent)
word2idx, idx2word, bias_init_vector = preProBuildWordVocab(all_sentences, word_count_threshold=2)
np.save('./idx2word_batch', idx2word)

img2paragraph_modify = {}
for img_name, img_paragraph in img2paragraph.items():
    img_paragraph_1 = img_paragraph[1]

    # img_paragraph_1 is a list
    # it may contain the element: '' or ' ', like this:
    # [["a man is walking"], ["the dog is running"], [""], [" "]]
    # so, we should remove them ' ' and '' element
    if '' in img_paragraph_1:
        img_paragraph_1.remove('')
    if ' ' in paragraph[1]:
        img_paragraph_1.remove(' ')

    # the number sents in each paragraph
    # if the sents is bigger than S_max,
    # we force the number of sents to be S_max
    img_num_sents = len(img_paragraph_1)
    if img_num_sents > S_max:
        img_num_sents = S_max

    # if a paragraph has 4 sentences
    # then the img_num_distribution will be like this:
    # [0, 0, 0, 1, 1, 1]
    img_num_distribution = np.zeros([S_max], dtype=np.int32)
    img_num_distribution[img_num_sents-1:] = 1

    # we multiply the number 2, because the <pad> is encoded into 2
    img_captions_matrix = np.ones([S_max, N_max+1], dtype=np.int32) * 2 # zeros([6, 50])
    for idx, img_sent in enumerate(img_paragraph_1):
        # the number of sentences is img_num_sents
        if idx == img_num_sents:
            break

        # because we treat the ',' as a word
        img_sent = img_sent.replace(',', ' ,')

        # Because I have preprocess the paragraph_v1.json file in VScode before,
        # and I delete all the 2, 3, 4...bankspaces
        # so, actually, the 'elif' code will never run
        if img_sent[0] == ' ' and img_sent[1] != ' ':
            img_sent = img_sent[1:]
        elif img_sent[0] == ' ' and img_sent[1] == ' ' and img_sent[2] != ' ':
            img_sent = img_sent[2:]

        # Be careful the last part in a sentence, like this:
        # '...world.'
        # '...world. '
        if img_sent[-1] == '.':
            img_sent = img_sent[0:-1]
        elif img_sent[-1] == ' ' and img_sent[-2] == '.':
            img_sent = img_sent[0:-2]

        # Last, we add the <bos> and the <eos> in each sentences
        img_sent = '<bos> ' + img_sent + ' <eos>'

        # translate each word in a sentence into the unique number in word2idx dict
        # when we meet the word which is not in the word2idx dict, we use the mark: <unk>
        for idy, word in enumerate(img_sent.lower().split(' ')):
            # because the biggest number of words in a sentence is N_max, here is 50
            if idy == N_max:
                break

            if word in word2idx:
                img_captions_matrix[idx, idy] = word2idx[word]
            else:
                img_captions_matrix[idx, idy] = word2idx['<unk>']

    # Pay attention, the value type 'img_name' here is NUMBER, I change it to STRING type
    img2paragraph_modify[str(img_name)] = [img_num_distribution, img_captions_matrix]

with open('./img2paragraph_modify_batch', 'wb') as f:
    pickle.dump(img2paragraph_modify, f)


#######################################################################################################
# Train, validation and testing stage
#######################################################################################################
def train():
    ##############################################################################
    # some preparing work
    ##############################################################################
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    train_output_file = h5py.File(train_feats_path, 'r')
    train_feats = train_output_file.get('feats')
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    train_imgs_names = list(map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists))
    print("Train Images Names",train_imgs_names)

    # Model Initialization:
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                          batch_size = batch_size,
                                          num_boxes = num_boxes,
                                          feats_dim = feats_dim,
                                          project_dim = project_dim,
                                          sentRNN_lstm_dim = sentRNN_lstm_dim,
                                          sentRNN_FC_dim = sentRNN_FC_dim,
                                          wordRNN_lstm_dim = wordRNN_lstm_dim,
                                          S_max = S_max,
                                          N_max = N_max,
                                          word_embed_dim = word_embed_dim,
                                          bias_init_vector = bias_init_vector)

    tf_feats, tf_num_distribution, tf_captions_matrix, tf_captions_masks, tf_loss, tf_loss_sent, tf_loss_word = model.build_model()
    sess = tf.Session()

    saver = tf.train.Saver(max_to_keep=500, write_version=1)
    with tf.variable_scope('optimizer',reuse= tf.AUTO_REUSE):
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.global_variables_initializer().run(session=sess)
    print("Done w session")

    # when you want to train the model from the front model
    #new_saver = tf.train.Saver(max_to_keep=500)
    #new_saver = tf.train.import_meta_graph('./models_batch/model-92.meta')
    #new_saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))

    all_vars = tf.trainable_variables()

    # open a loss file to record the loss value
    #loss_fd = open('loss_batch.txt', 'a')
    img2idx = {}
    for idx, img in enumerate(train_imgs_names):
        img2idx[img] = idx
    print("Img2idx",img2idx)

    # plt draw the loss curve
    # refer from: http://stackoverflow.com/questions/11874767/real-time-plotting-in-while-loop-with-matplotlib
    loss_to_draw = []
    print("Before epoch loop->>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
    saver = tf.train.import_meta_graph('./models_batch/model-1.meta')
    saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))
    for epoch in range(2, n_epochs):
        loss_to_draw_epoch = []
        # disorganize the order
        random.shuffle(train_imgs_names)

        for start, end in zip(range(0, len(train_imgs_names), batch_size),
                              range(batch_size, len(train_imgs_names), batch_size)):
            loss_fd = open('loss_batch.txt', 'a')

            start_time = time.time()
            print("Start and end",(start,end))
            img_name = train_imgs_names[start:end]
            current_feats_index = map(lambda x: img2idx[x], img_name)
            current_feats = np.asarray( list(map(lambda x: train_feats[x], current_feats_index) ))

            current_num_distribution = np.asarray( list(map(lambda x: img2paragraph_modify[x][0], img_name) ))
            current_captions_matrix = np.asarray( list(map(lambda x: img2paragraph_modify[x][1], img_name) ))

            current_captions_masks = np.zeros( (current_captions_matrix.shape[0], current_captions_matrix.shape[1], current_captions_matrix.shape[2]) )
            # find the non-zero element
            nonzeros = np.array( list(map(lambda each_matrix: np.array( list(map(lambda x: (x != 2).sum() + 1, each_matrix ) )), current_captions_matrix ) ))
            for i in range(batch_size):
                for ind, row in enumerate(current_captions_masks[i]):
                    row[:(nonzeros[i, ind]-1)] = 1

            # shape of current_feats: batch_size x 50 x 4096
            # shape of current_num_distribution: batch_size x 6
            # shape of current_captions_matrix: batch_size x 6 x 50
            _, loss_val, loss_sent, loss_word= sess.run(
                                [train_op, tf_loss, tf_loss_sent, tf_loss_word],
                                feed_dict={
                                           tf_feats: current_feats,
                                           tf_num_distribution: current_num_distribution,
                                           tf_captions_matrix: current_captions_matrix,
                                           tf_captions_masks: current_captions_masks
                                })

            # append loss to list in a epoch
            loss_to_draw_epoch.append(loss_val)

            # running information
            print('idx: ', start, ' Epoch: ', epoch, ' loss: ', loss_val, ' loss_sent: ', loss_sent, ' loss_word: ', loss_word, ' Time cost: ', str((time.time() - start_time)))
            loss_fd.write('start: '+ str(start) +' end: ' + str(end) +' epoch ' + str(epoch) + ' loss ' + str(loss_val) + '\n')
            loss_fd.close()
        # draw loss curve every epoch
        loss_to_draw.append(np.mean(loss_to_draw_epoch))
        plt_save_dir = './loss_imgs'
        plt_save_img_name = str(epoch) + '.png'
        plt.plot(range(len(loss_to_draw)), loss_to_draw, color='g')
        plt.grid(True)
        plt.savefig(os.path.join(plt_save_dir, plt_save_img_name))

        if np.mod(epoch, 1) == 0:
            print("Epoch ", epoch, " is done. Saving the model ...")
            saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
    #loss_fd.close()


def test():
    
    start_time = time.time()
    # change the model path according to your environment
    model_path = './model-250'

    # It's very important to use Pandas to Series this idx2word dict
    # After this operation, we can use list to extract the word at the same time
    idx2word = pd.Series(np.load('./data/idx2word_batch.npy').tolist())

    test_feats_path = './data/im2p_test_output.h5'
    test_output_file = h5py.File(test_feats_path, 'r')
    test_feats = test_output_file.get('feats')

    test_imgs_full_path_lists = open('./densecap/imgs_test_path.txt').read().splitlines()
    test_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], test_imgs_full_path_lists)
    
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    test_model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                               batch_size = batch_size,
                                               num_boxes = num_boxes,
                                               feats_dim = feats_dim,
                                               project_dim = project_dim,
                                               sentRNN_lstm_dim = sentRNN_lstm_dim,
                                               sentRNN_FC_dim = sentRNN_FC_dim,
                                               wordRNN_lstm_dim = wordRNN_lstm_dim,
                                               S_max = S_max,
                                               N_max = N_max,
                                               word_embed_dim = word_embed_dim,
                                               bias_init_vector = bias_init_vector)
    

    tf_feats, tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors = test_model.generate_model()
    sess = tf.InteractiveSession()
    #print("Before there >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n\n")

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    img2idx = {}
    for idx, img in enumerate(test_imgs_names):
        img2idx[img] = idx
    print("IM2idx:::::::::::",img2idx)

    test_fd = open('HRNN_results.txt', 'w')
    for idx, img_name in enumerate(test_imgs_names):
        print(idx, img_name)
        test_fd.write(img_name + '\n')

        each_paragraph = []
        current_paragraph = ""

        current_feats_index = img2idx[img_name]
        current_feats = test_feats[current_feats_index]
        current_feats = np.reshape(current_feats, [1, 50, 4096])

        generated_paragraph_indexes, pred, sent_topic_vectors = sess.run(
                                                                         [tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors],
                                                                         feed_dict={
                                                                             tf_feats: current_feats
                                                                         })

        #generated_paragraph = idx2word[generated_paragraph_indexes]
        for sent_index in generated_paragraph_indexes:
            each_sent = []
            for word_index in sent_index:
                each_sent.append(idx2word[word_index])
            each_paragraph.append(each_sent)

        for idx, each_sent in enumerate(each_paragraph):
            # if the current sentence is the end sentence of the paragraph
            # According to the probability distribution:
            # CONTINUE: [1, 0]
            # STOP    : [0, 1]
            # So, if the first item of pred is less than the T_stop
            # the generation process is break
            if pred[idx][0][0] <= T_stop:
                break
            current_sent = ''
            for each_word in each_sent:
                current_sent += each_word + ' '
            current_sent = current_sent.replace('<eos> ', '')
            current_sent = current_sent.replace('<pad> ', '')
            current_sent = current_sent + '.'
            current_sent = current_sent.replace(' .', '.')
            current_sent = current_sent.replace(' ,', ',')
            current_paragraph +=current_sent
            if idx != len(each_paragraph) - 1:
                current_paragraph += ' '

        test_fd.write(current_paragraph + '\n')
    test_fd.close()
    print("Time cost: " + str(time.time()-start_time))

train()


preprocessing word counts and creating vocab based on word count threshold 2
filtered words from 18418 to 9900
Train Images Names ['2385757', '2389264', '2407325', '2337141', '2345813', '2386694', '2368581', '2397809', '185', '2387911', '2343682', '2409165', '2357129', '498337', '2326621', '2320205', '2412782', '2352466', '2414097', '2381526', '2390981', '2375715', '2405096', '2402848', '2346948', '2387882', '2352243', '2337581', '2330919', '2380444', '2377611', '2354637', '2401996', '2391375', '2415510', '2400079', '2366081', '2360527', '2365490', '2336229', '2367475', '2373691', '2376356', '2368952', '2369653', '2355012', '2400892', '2369971', '2414408', '2394007', '2415978', '2405444', '2401822', '2387651', '2350622', '2318603', '2383962', '2377677', '2380510', '2392746', '2412229', '2392926', '2362383', '2398991', '2367805', '2360264', '2400991', '2354133', '2374619', '2413514', '2387844', '2349764', '2403572', '2372008', '2388111', '2366871', '2385053', '2400907', '2379868', '2389

W0711 14:30:03.287303 140399233443584 deprecation.py:323] From <ipython-input-1-ddcb88721579>:68: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
W0711 14:30:03.311513 140399233443584 deprecation.py:323] From <ipython-input-1-ddcb88721579>:86: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
W0711 14:30:03.426924 140399233443584 deprecation.py:506] From /home/student/anaconda3/envs/CPU/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future versio

Wemb output>>>>>>>>>>>> <tf.Variable 'Wemb:0' shape=(9904, 1024) dtype=float32_ref> Tensor("Shape:0", shape=(2,), dtype=int32)
Start build model:


W0711 14:30:03.778566 140399233443584 deprecation.py:323] From <ipython-input-1-ddcb88721579>:166: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

W0711 14:30:04.040371 140399233443584 deprecation.py:323] From <ipython-input-1-ddcb88721579>:197: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success


W0711 14:32:38.357663 140399233443584 deprecation.py:323] From /home/student/anaconda3/envs/CPU/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Start and end (0, 5)
idx:  0  Epoch:  2  loss:  270.91507  loss_sent:  2.7077875  loss_word:  257.37616  Time cost:  125.41162586212158
Start and end (5, 10)
idx:  5  Epoch:  2  loss:  260.9162  loss_sent:  2.1245575  loss_word:  250.2934  Time cost:  74.912433385849
Start and end (10, 15)
idx:  10  Epoch:  2  loss:  210.05959  loss_sent:  1.9769504  loss_word:  200.17484  Time cost:  9.07091999053955
Start and end (15, 20)
idx:  15  Epoch:  2  loss:  238.28853  loss_sent:  4.0089884  loss_word:  218.24356  Time cost:  7.139372110366821
Start and end (20, 25)
idx:  20  Epoch:  2  loss:  229.59947  loss_sent:  2.3953362  loss_word:  217.62276  Time cost:  7.055136680603027
Start and end (25, 30)
idx:  25  Epoch:  2  loss:  265.02457  loss_sent:  2.1991463  loss_word:  254.02881  Time cost:  6.837973356246948
Start and end (30, 35)
idx:  30  Epoch:  2  loss:  230.43161  loss_sent:  1.974234  loss_word:  220.56049  Time cost:  6.759197950363159
Start and end (35, 40)
idx:  35  Epoch:  2  

KeyboardInterrupt: 

In [1]:
#WITH SCOPE FOR SESSION
#!/usr/bin/env python
# coding=utf-8

__author__ = "Xinpeng.Chen"

import os
import sys
import time
import matplotlib.pyplot as plt
import pickle
import numpy as np
import pandas as pd
import random

import h5py
import ipdb

import tensorflow as tf


# ------------------------------------------------------------------------------------------------------
# Initialization class
#  1. Pooling the visual features into a single dense feature
#  2. Then, build sentence LSTM, word LSTM
# ------------------------------------------------------------------------------------------------------
class RegionPooling_HierarchicalRNN():
    def __init__(self, n_words,
                       batch_size,
                       num_boxes,
                       feats_dim,
                       project_dim,
                       sentRNN_lstm_dim,
                       sentRNN_FC_dim,
                       wordRNN_lstm_dim,
                       S_max,
                       N_max,
                       word_embed_dim,
                       bias_init_vector=None):

        self.n_words = n_words
        self.batch_size = batch_size
        self.num_boxes = num_boxes # 50
        self.feats_dim = feats_dim # 4096
        self.project_dim = project_dim # 1024
        self.S_max = S_max # 6
        self.N_max = N_max # 50
        self.word_embed_dim = word_embed_dim # 1024

        self.sentRNN_lstm_dim = sentRNN_lstm_dim # 512 hidden size
        self.sentRNN_FC_dim = sentRNN_FC_dim # 1024 in fully connected layer
        self.wordRNN_lstm_dim = wordRNN_lstm_dim # 512 hidden size
	


        # word embedding, parameters of embedding
        # embedding shape: n_words x wordRNN_lstm_dim
        with tf.device('/cpu:0'):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, word_embed_dim], -0.1, 0.1), name='Wemb')
        print("Wemb output>>>>>>>>>>>>",self.Wemb,tf.shape(self.Wemb))
        #self.bemb = tf.Variable(tf.zeros([word_embed_dim]), name='bemb')

        # regionPooling_W shape: 4096 x 1024
        # regionPooling_b shape: 1024
        self.regionPooling_W = tf.Variable(tf.random_uniform([feats_dim, project_dim], -0.1, 0.1), name='regionPooling_W')
        self.regionPooling_b = tf.Variable(tf.zeros([project_dim]), name='regionPooling_b')

        # sentence LSTM
        self.sent_LSTM = tf.nn.rnn_cell.BasicLSTMCell(sentRNN_lstm_dim, state_is_tuple=True)

        # logistic classifier
        self.logistic_Theta_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, 2], -0.1, 0.1), name='logistic_Theta_W')
        self.logistic_Theta_b = tf.Variable(tf.zeros(2), name='logistic_Theta_b')

        # fc1_W: 512 x 1024, fc1_b: 1024
        # fc2_W: 1024 x 1024, fc2_b: 1024
        self.fc1_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, sentRNN_FC_dim], -0.1, 0.1), name='fc1_W')
        self.fc1_b = tf.Variable(tf.zeros(sentRNN_FC_dim), name='fc1_b')
        self.fc2_W = tf.Variable(tf.random_uniform([sentRNN_FC_dim, 1024], -0.1, 0.1), name='fc2_W')
        self.fc2_b = tf.Variable(tf.zeros(1024), name='fc2_b')
        def get_a_cell(lstm_size):
          lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
          return lstm
        # word LSTM
        self.word_LSTM = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)
        #self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([self.word_LSTM] * 2, state_is_tuple=True)
        self.word_LSTM = tf.nn.rnn_cell.MultiRNNCell([get_a_cell(wordRNN_lstm_dim) for i in range(2)], state_is_tuple=True)
        #self.word_LSTM2 = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)


        self.embed_word_W = tf.Variable(tf.random_uniform([wordRNN_lstm_dim, n_words], -0.1,0.1), name='embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')

    def build_model(self):
        # receive the feats in the current image
        # it's shape is 10 x 50 x 4096
        # tmp_feats: 500 x 4096
        feats = tf.placeholder(tf.float32, [self.batch_size, self.num_boxes, self.feats_dim])
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 500 x 4096 * 4096 x 1024 --> 500 x 1024
        # project_vec: 10 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [self.batch_size, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # receive the [continue:0, stop:1] lists
        # example: [0, 0, 0, 0, 1, 1], it means this paragraph has five sentences
        num_distribution = tf.placeholder(tf.int32, [self.batch_size, self.S_max])

        # receive the ground truth words, which has been changed to idx use word2idx function
        captions = tf.placeholder(tf.int32, [self.batch_size, self.S_max, self.N_max+1])
        #print("Captions:>>>>>>>>>>>>>>>>>>>>>>>",captions)
        captions_masks = tf.placeholder(tf.float32, [self.batch_size, self.S_max, self.N_max+1])

        # ---------------------------------------------------------------------------------------------------------------------
        # The method which initialize the state, is refered from below sites:
        # 1. http://stackoverflow.com/questions/38241410/tensorflow-remember-lstm-state-for-next-batch-stateful-lstm/38417699
        # 2. https://www.tensorflow.org/api_docs/python/rnn_cell/classes_storing_split_rnncell_state#LSTMStateTuple
        # 3. https://medium.com/@erikhallstrm/using-the-tensorflow-lstm-api-3-7-5f2b97ca6b73#.u4w9z6h0h
        # ---------------------------------------------------------------------------------------------------------------------
        sent_state = self.sent_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state1 = self.word_LSTM1.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #word_state2 = self.word_LSTM2.zero_state(batch_size=self.batch_size, dtype=tf.float32)
        #sent_state = tf.zeros([self.batch_size, self.sent_LSTM1.state_size])
        #word_state1 = tf.zeros([self.batch_size, self.word_LSTM1.state_size])
        #word_state2 = tf.zeros([self.batch_size, self.word_LSTM2.state_size])

        probs = []
        loss = 0.0
        loss_sent = 0.0
        loss_word = 0.0
        lambda_sent = 5.0
        lambda_word = 1.0

        print('Start build model:')
        #----------------------------------------------------------------------------------------------
        # Hierarchical RNN: sentence RNN and words RNN
        # The word RNN has the max number, N_max = 50, the number in the papar is 50
        #----------------------------------------------------------------------------------------------
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            # sent_state is a tuple, sent_state = (c, h)
            # 'c': shape=(1, 512) dtype=float32, 'h': shape=(1, 512) dtype=float32
            # The loss here, I refer from the web which is very helpful for me:
            # 1. http://stackoverflow.com/questions/34240703/difference-between-tensorflow-tf-nn-softmax-and-tf-nn-softmax-cross-entropy-with
            # 2. http://stackoverflow.com/questions/35277898/tensorflow-for-binary-classification
            # 3. http://stackoverflow.com/questions/35226198/is-this-one-hot-encoding-in-tensorflow-fast-or-flawed-for-any-reason
            # 4. http://stackoverflow.com/questions/35198528/reshape-y-train-for-binary-text-classification-in-tensorflow
            sentRNN_logistic_mu = tf.nn.xw_plus_b( sent_output, self.logistic_Theta_W, self.logistic_Theta_b )
            sentRNN_label = tf.stack([ 1 - num_distribution[:, i], num_distribution[:, i] ])
            sentRNN_label = tf.transpose(sentRNN_label)
            sentRNN_loss = tf.nn.softmax_cross_entropy_with_logits(logits=sentRNN_logistic_mu, labels=sentRNN_label)
            sentRNN_loss = tf.reduce_sum(sentRNN_loss)/self.batch_size
            loss += sentRNN_loss * lambda_sent
            loss_sent += sentRNN_loss

            # the begining input of word_LSTM is topic vector, and DON'T compute the loss
            # This is follow the paper: Show and Tell
            #word_state = self.word_LSTM.zero_state(batch_size=self.batch_size, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, captions[:, i, j])

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # How to make one-hot encoder, I refer from this excellent web:
                # http://stackoverflow.com/questions/33681517/tensorflow-one-hot-encoder
                labels = tf.reshape(captions[:, i, j+1], [-1, 1])
                #print("Labels and its shape +++++++++++++++",labels,tf.shape(labels))
                indices = tf.reshape(tf.range(0, self.batch_size, 1), [-1, 1])
                #print("Indices and its shape +++++++++++++++",indices,tf.shape(indices))
                concated = tf.concat([indices, labels],1)
                #print("Concated+++++++++++++++++++",concated)
                print("Success")
                onehot_labels = tf.sparse_to_dense(concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0)

                # At each timestep the hidden state of the last LSTM layer is used to predict a distribution
                # over the words in the vocbulary
                logit_words = tf.nn.xw_plus_b(word_output[:], self.embed_word_W, self.embed_word_b)
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
                cross_entropy = cross_entropy * captions_masks[:, i, j]
                loss_wordRNN = tf.reduce_sum(cross_entropy) / self.batch_size
                loss += loss_wordRNN * lambda_word
                loss_word += loss_wordRNN

        return feats, num_distribution, captions, captions_masks, loss, loss_sent, loss_word

    def generate_model(self):
        # feats: 1 x 50 x 4096
        feats = tf.placeholder(tf.float32, [1, self.num_boxes, self.feats_dim])
        # tmp_feats: 50 x 4096
        tmp_feats = tf.reshape(feats, [-1, self.feats_dim])

        # project_vec_all: 50 x 4096 * 4096 x 1024 + 1024 --> 50 x 1024
        project_vec_all = tf.matmul(tmp_feats, self.regionPooling_W) + self.regionPooling_b
        project_vec_all = tf.reshape(project_vec_all, [1, 50, self.project_dim])
        project_vec = tf.reduce_max(project_vec_all, reduction_indices=1)

        # initialize the sent_LSTM state
        sent_state = self.sent_LSTM.zero_state(batch_size=1, dtype=tf.float32)

        # save the generated paragraph to list, here I named generated_sents
        generated_paragraph = []

        # pred
        pred_re = []

        # T_stop: run the sentence RNN forward until the stopping probability p_i (STOP) exceeds a threshold T_stop
        T_stop = tf.constant(0.5)

        # Start build the generation model
        print('Start build the generation model: ')

        # sentence RNN
        #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
        #with tf.variable_scope('word_LSTM'):
        #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
        for i in range(0, self.S_max):
            if i > 0:
                tf.get_variable_scope().reuse_variables()

            # sent_state:
            # LSTMStateTuple(c=<tf.Tensor 'sent_LSTM/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
            #                h=<tf.Tensor 'sent_LSTM/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
            with tf.variable_scope('sent_LSTM',reuse=tf.AUTO_REUSE):
                sent_output, sent_state = self.sent_LSTM(project_vec, sent_state)

            # self.fc1_W: 512 x 1024, self.fc1_b: 1024
            # hidden1: 1 x 1024
            # sent_topic_vec: 1 x 1024
            with tf.name_scope('fc1'):
                hidden1 = tf.nn.relu( tf.matmul(sent_output, self.fc1_W) + self.fc1_b )
            with tf.name_scope('fc2'):
                sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, self.fc2_W) + self.fc2_b )

            sentRNN_logistic_mu = tf.nn.xw_plus_b(sent_output, self.logistic_Theta_W, self.logistic_Theta_b)
            pred = tf.nn.softmax(sentRNN_logistic_mu)
            pred_re.append(pred)

            # save the generated sentence to list, named generated_sent
            generated_sent = []

            # initialize the word LSTM state
            #word_state = self.word_LSTM.zero_state(batch_size=1, dtype=tf.float32)
            #with tf.variable_scope('word_LSTM'):
            #    word_output, word_state = self.word_LSTM(sent_topic_vec, word_state)
            topic = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
            word_state = (topic, topic)
            # word RNN, unrolled to N_max time steps
            for j in range(0, self.N_max):
                if j > 0:
                    tf.get_variable_scope().reuse_variables()

                if j == 0:
                    with tf.device('/cpu:0'):
                        # get word embedding of BOS (index = 0)
                        current_embed = tf.nn.embedding_lookup(self.Wemb, tf.zeros([1], dtype=tf.int64))

                with tf.variable_scope('word_LSTM',reuse=tf.AUTO_REUSE):
                    word_output, word_state = self.word_LSTM(current_embed, word_state)

                # word_state:
                # (
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell0/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>),
                #     LSTMStateTuple(c=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/add_2:0' shape=(1, 512) dtype=float32>,
                #                    h=<tf.Tensor 'word_LSTM_152/MultiRNNCell/Cell1/BasicLSTMCell/mul_2:0' shape=(1, 512) dtype=float32>)
                # )
                logit_words = tf.nn.xw_plus_b(word_output, self.embed_word_W, self.embed_word_b)
                max_prob_index = tf.argmax(logit_words, 1)[0]
                generated_sent.append(max_prob_index)

                with tf.device('/cpu:0'):
                    current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)
                    current_embed = tf.expand_dims(current_embed, 0)

            generated_paragraph.append(generated_sent)

        return feats, generated_paragraph, pred_re, sent_topic_vec


# -----------------------------------------------------------------------------------------------------
# Preparing Functions
# -----------------------------------------------------------------------------------------------------
def preProBuildWordVocab(sentence_iterator, word_count_threshold=5):
    # borrowed this function from NeuralTalk
    print('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold, ))

    word_counts = {}
    nsents = 0

    for sent in sentence_iterator:
        nsents += 1
        tmp_sent = sent.lower().split(' ')
        if '' in tmp_sent:
            tmp_sent.remove('')

        for w in tmp_sent:
           word_counts[w] = word_counts.get(w, 0) + 1

    vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
    print('filtered words from %d to %d' % (len(word_counts), len(vocab)))

    ixtoword = {}
    ixtoword[0] = '<bos>'
    ixtoword[1] = '<eos>'
    ixtoword[2] = '<pad>'
    ixtoword[3] = '<unk>'

    wordtoix = {}
    wordtoix['<bos>'] = 0
    wordtoix['<eos>'] = 1
    wordtoix['<pad>'] = 2
    wordtoix['<unk>'] = 3

    for idx, w in enumerate(vocab):
        wordtoix[w] = idx + 4
        ixtoword[idx+4] = w

    word_counts['<eos>'] = nsents
    word_counts['<bos>'] = nsents
    word_counts['<pad>'] = nsents
    word_counts['<unk>'] = nsents

    bias_init_vector = np.array([1.0 * word_counts[ ixtoword[i] ] for i in ixtoword])
    bias_init_vector /= np.sum(bias_init_vector) # normalize to frequencies
    bias_init_vector = np.log(bias_init_vector)
    bias_init_vector -= np.max(bias_init_vector) # shift to nice numeric range

    return wordtoix, ixtoword, bias_init_vector


#######################################################################################################
# Parameters Setting
#######################################################################################################
batch_size = 5 # Being support batch_size
num_boxes = 50 # number of Detected regions in each image
feats_dim = 4096 # feature dimensions of each regions
project_dim = 1024 # project the features to one vector, which is 1024 dimensions

sentRNN_lstm_dim = 512 # the sentence LSTM hidden units
sentRNN_FC_dim = 1024 # the fully connected units
wordRNN_lstm_dim = 512 # the word LSTM hidden units
word_embed_dim = 1024 # the learned embedding vectors for the words

S_max = 6
N_max = 50
T_stop = 0.5

n_epochs = 500
learning_rate = 0.0001


#######################################################################################################
# Word vocubulary and captions preprocessing stage
#######################################################################################################
img2paragraph = pickle.load(open('./img2paragraph', 'rb'))
all_sentences = []
for key, paragraph in img2paragraph.items():
    for each_sent in paragraph[1]:
        each_sent.replace(',', ' ,')
        all_sentences.append(each_sent)
word2idx, idx2word, bias_init_vector = preProBuildWordVocab(all_sentences, word_count_threshold=2)
np.save('./idx2word_batch', idx2word)

img2paragraph_modify = {}
for img_name, img_paragraph in img2paragraph.items():
    img_paragraph_1 = img_paragraph[1]

    # img_paragraph_1 is a list
    # it may contain the element: '' or ' ', like this:
    # [["a man is walking"], ["the dog is running"], [""], [" "]]
    # so, we should remove them ' ' and '' element
    if '' in img_paragraph_1:
        img_paragraph_1.remove('')
    if ' ' in paragraph[1]:
        img_paragraph_1.remove(' ')

    # the number sents in each paragraph
    # if the sents is bigger than S_max,
    # we force the number of sents to be S_max
    img_num_sents = len(img_paragraph_1)
    if img_num_sents > S_max:
        img_num_sents = S_max

    # if a paragraph has 4 sentences
    # then the img_num_distribution will be like this:
    # [0, 0, 0, 1, 1, 1]
    img_num_distribution = np.zeros([S_max], dtype=np.int32)
    img_num_distribution[img_num_sents-1:] = 1

    # we multiply the number 2, because the <pad> is encoded into 2
    img_captions_matrix = np.ones([S_max, N_max+1], dtype=np.int32) * 2 # zeros([6, 50])
    for idx, img_sent in enumerate(img_paragraph_1):
        # the number of sentences is img_num_sents
        if idx == img_num_sents:
            break

        # because we treat the ',' as a word
        img_sent = img_sent.replace(',', ' ,')

        # Because I have preprocess the paragraph_v1.json file in VScode before,
        # and I delete all the 2, 3, 4...bankspaces
        # so, actually, the 'elif' code will never run
        if img_sent[0] == ' ' and img_sent[1] != ' ':
            img_sent = img_sent[1:]
        elif img_sent[0] == ' ' and img_sent[1] == ' ' and img_sent[2] != ' ':
            img_sent = img_sent[2:]

        # Be careful the last part in a sentence, like this:
        # '...world.'
        # '...world. '
        if img_sent[-1] == '.':
            img_sent = img_sent[0:-1]
        elif img_sent[-1] == ' ' and img_sent[-2] == '.':
            img_sent = img_sent[0:-2]

        # Last, we add the <bos> and the <eos> in each sentences
        img_sent = '<bos> ' + img_sent + ' <eos>'

        # translate each word in a sentence into the unique number in word2idx dict
        # when we meet the word which is not in the word2idx dict, we use the mark: <unk>
        for idy, word in enumerate(img_sent.lower().split(' ')):
            # because the biggest number of words in a sentence is N_max, here is 50
            if idy == N_max:
                break

            if word in word2idx:
                img_captions_matrix[idx, idy] = word2idx[word]
            else:
                img_captions_matrix[idx, idy] = word2idx['<unk>']

    # Pay attention, the value type 'img_name' here is NUMBER, I change it to STRING type
    img2paragraph_modify[str(img_name)] = [img_num_distribution, img_captions_matrix]

with open('./img2paragraph_modify_batch', 'wb') as f:
    pickle.dump(img2paragraph_modify, f)


#######################################################################################################
# Train, validation and testing stage
#######################################################################################################
def train():
    ##############################################################################
    # some preparing work
    ##############################################################################
    model_path = './models_batch/'
    train_feats_path = './im2p_val_output.h5'
    train_output_file = h5py.File(train_feats_path, 'r')
    train_feats = train_output_file.get('feats')
    train_imgs_full_path_lists = open('./imgs_val_path.txt').read().splitlines()
    train_imgs_names = list(map(lambda x: os.path.basename(x).split('.')[0], train_imgs_full_path_lists))
    print("Train Images Names",train_imgs_names)

    # Model Initialization:
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    with tf.variable_scope(tf.get_variable_scope()) as scope:
        model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                              batch_size = batch_size,
                                              num_boxes = num_boxes,
                                              feats_dim = feats_dim,
                                              project_dim = project_dim,
                                              sentRNN_lstm_dim = sentRNN_lstm_dim,
                                              sentRNN_FC_dim = sentRNN_FC_dim,
                                              wordRNN_lstm_dim = wordRNN_lstm_dim,
                                              S_max = S_max,
                                              N_max = N_max,
                                              word_embed_dim = word_embed_dim,
                                              bias_init_vector = bias_init_vector)

        tf_feats, tf_num_distribution, tf_captions_matrix, tf_captions_masks, tf_loss, tf_loss_sent, tf_loss_word = model.build_model()
    #sess = tf.Session()
    with tf.Session() as sess:
        saver = tf.train.Saver(max_to_keep=500, write_version=1)
        with tf.variable_scope('optimizer',reuse= tf.AUTO_REUSE):
            train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
        tf.global_variables_initializer().run(session=sess)
        print("Done w session")

        # when you want to train the model from the front model
        #new_saver = tf.train.Saver(max_to_keep=500)
        #new_saver = tf.train.import_meta_graph('./models_batch/model-92.meta')
        #new_saver.restore(sess, tf.train.latest_checkpoint('./models_batch/'))

        all_vars = tf.trainable_variables()

        # open a loss file to record the loss value
        #loss_fd = open('loss_batch.txt', 'a')
        img2idx = {}
        for idx, img in enumerate(train_imgs_names):
            img2idx[img] = idx
        print("Img2idx",img2idx)

        # plt draw the loss curve
        # refer from: http://stackoverflow.com/questions/11874767/real-time-plotting-in-while-loop-with-matplotlib
        
        loss_to_draw = [315.90247,265.50665,243.4754,229.64827,219.55573,211.94145,204.5995,197.81664,192.10925,186.46211,180.70108,175.55612,170.3046,165.09123,160.21469,160.2537961546135,154.08637935852528,149.32272485827923,144.4875265452814,139.37248678689718,134.57183133995056,129.8052043039155,125.52713392114163,120.7494116071415,115.9398527387333,111.30544829552889,106.93300072288037,102.37911339288473,97.98938349818707,93.8994490746212,89.69688967054606,84.13689,79.04315,75.386566,71.634445,67.22706,63.297745,59.791893,58.797394,54.47476,50.78446,47.608204,44.824448,42.131664,39.22608,36.267944,33.43973,30.94212,29.161013,27.789131,25.733274,23.194603,21.0445,19.291529,18.102518,17.157877,17.008978,14.93577,13.9712105,12.671065,11.739764,10.853795,10.301606,9.915598,9.00906,8.191635,7.7371345,7.485827,7.1387715,7.2393036,7.0062246,6.03907,5.117557,4.7625046,4.913513,4.82957,5.0153527,4.8877826,4.4756083,3.8244674,7.6418276,7.2976747,6.7312045,6.5270514,6.6875095,5.9723153,4.704945,4.7736664,5.8504386,5.804864,4.7677197,3.7789633,3.6227777,3.7564394,4.2505994,4.3439636,3.6212943,3.0515492,3.0061777,3.1802363,3.4463785,3.1947653,2.5623198,3.1410942,3.497574,3.2585106,2.8631153,315.3198,264.98257,243.43536,230.01776,219.96605,211.50975]  #Have to hard code here once it crashes
        print("Before epoch loop->>>>>>>>>>>>>>>>>>>>>>>>>>>>> the len is",len(loss_to_draw))
        try:
            saver.restore(sess, './models_batch/model-112')
            print("pretrained model loaded successfully")
        except:
            print("fail to load pretrained model")
            pass
        
        
        for epoch in range(113, n_epochs):
            loss_to_draw_epoch = []
            # disorganize the order
            random.shuffle(train_imgs_names)

            for start, end in zip(range(0, len(train_imgs_names), batch_size),
                                  range(batch_size, len(train_imgs_names), batch_size)):
                loss_fd = open('loss_batch.txt', 'a')

                start_time = time.time()
                print("Start and end",(start,end))
                img_name = train_imgs_names[start:end]
                current_feats_index = map(lambda x: img2idx[x], img_name)
                current_feats = np.asarray( list(map(lambda x: train_feats[x], current_feats_index) ))

                current_num_distribution = np.asarray( list(map(lambda x: img2paragraph_modify[x][0], img_name) ))
                current_captions_matrix = np.asarray( list(map(lambda x: img2paragraph_modify[x][1], img_name) ))

                current_captions_masks = np.zeros( (current_captions_matrix.shape[0], current_captions_matrix.shape[1], current_captions_matrix.shape[2]) )
                # find the non-zero element
                nonzeros = np.array( list(map(lambda each_matrix: np.array( list(map(lambda x: (x != 2).sum() + 1, each_matrix ) )), current_captions_matrix ) ))
                for i in range(batch_size):
                    for ind, row in enumerate(current_captions_masks[i]):
                        row[:(nonzeros[i, ind]-1)] = 1

                # shape of current_feats: batch_size x 50 x 4096
                # shape of current_num_distribution: batch_size x 6
                # shape of current_captions_matrix: batch_size x 6 x 50
                _, loss_val, loss_sent, loss_word= sess.run(
                                    [train_op, tf_loss, tf_loss_sent, tf_loss_word],
                                    feed_dict={
                                               tf_feats: current_feats,
                                               tf_num_distribution: current_num_distribution,
                                               tf_captions_matrix: current_captions_matrix,
                                               tf_captions_masks: current_captions_masks
                                    })

                # append loss to list in a epoch
                loss_to_draw_epoch.append(loss_val)

                # running information
                print('idx: ', start, ' Epoch: ', epoch, ' loss: ', loss_val, ' loss_sent: ', loss_sent, ' loss_word: ', loss_word, ' Time cost: ', str((time.time() - start_time)))
                loss_fd.write('start: '+ str(start) +' end: ' + str(end) +' epoch ' + str(epoch) + ' loss ' + str(loss_val) + '\n')
                loss_fd.close()
            # draw loss curve every epoch
            loss_to_draw.append(np.mean(loss_to_draw_epoch)) #This is what weve to write in a temp file once it is about to crash
            plt_save_dir = './loss_imgs'
            plt_save_img_name = str(epoch) + '.png'
            plt.plot(range(len(loss_to_draw)), loss_to_draw, color='g')
            plt.grid(True)
            plt.savefig(os.path.join(plt_save_dir, plt_save_img_name))
            MyFile=open('temporary.txt','a')
            if np.mod(epoch, 1) == 0:
                #the list loss_to_draw needs to be saved in a temp file
                l=loss_to_draw
                MyList=map(lambda x:str(x)+',', l)
                MyFile.writelines(MyList)
                MyFile.write('\t'+'<><><><><><><><><><><><><>'+'\n')
                MyFile.close()
                print("Successfully Written to temporary")
                print("Epoch ", epoch, " is done. Saving the model ...")
                saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
        #loss_fd.close()
315.90247,265.50665,243.4754,229.64827,219.55573,211.94145,204.5995,197.81664,192.10925,186.46211,180.70108,175.55612,170.3046,165.09123,160.21469,160.2537961546135,154.08637935852528,149.32272485827923,144.4875265452814,139.37248678689718,134.57183133995056,129.8052043039155,125.52713392114163,120.7494116071415,115.9398527387333,111.30544829552889,106.93300072288037,102.37911339288473,97.98938349818707,93.8994490746212,89.69688967054606,84.13689,79.04315,75.386566,71.634445,67.22706,63.297745,59.791893,58.797394,54.47476,50.78446,47.608204,44.824448,42.131664,39.22608,36.267944,33.43973,30.94212,29.161013,27.789131,25.733274,23.194603,21.0445,19.291529,18.102518,17.157877,17.008978,14.93577,13.9712105,12.671065,11.739764,10.853795,10.301606,9.915598,9.00906,8.191635,7.7371345,7.485827,7.1387715,7.2393036,7.0062246,6.03907,5.117557,4.7625046,4.913513,4.82957,5.0153527,4.8877826,4.4756083,3.8244674,7.6418276,7.2976747,6.7312045,6.5270514,6.6875095,5.9723153,4.704945,4.7736664,5.8504386,5.804864,4.7677197,3.7789633,3.6227777,3.7564394,4.2505994,4.3439636,3.6212943,3.0515492,3.0061777,3.1802363,3.4463785,3.1947653,2.5623198,3.1410942,3.497574,3.2585106,2.8631153

def test():
    
    start_time = time.time()
    # change the model path according to your environment
    model_path = './model-250'

    # It's very important to use Pandas to Series this idx2word dict
    # After this operation, we can use list to extract the word at the same time
    idx2word = pd.Series(np.load('./data/idx2word_batch.npy').tolist())

    test_feats_path = './data/im2p_test_output.h5'
    test_output_file = h5py.File(test_feats_path, 'r')
    test_feats = test_output_file.get('feats')

    test_imgs_full_path_lists = open('./densecap/imgs_test_path.txt').read().splitlines()
    test_imgs_names = map(lambda x: os.path.basename(x).split('.')[0], test_imgs_full_path_lists)
    
    # n_words, batch_size, num_boxes, feats_dim, project_dim, sentRNN_lstm_dim, sentRNN_FC_dim, wordRNN_lstm_dim, S_max, N_max
    test_model = RegionPooling_HierarchicalRNN(n_words = len(word2idx),
                                               batch_size = batch_size,
                                               num_boxes = num_boxes,
                                               feats_dim = feats_dim,
                                               project_dim = project_dim,
                                               sentRNN_lstm_dim = sentRNN_lstm_dim,
                                               sentRNN_FC_dim = sentRNN_FC_dim,
                                               wordRNN_lstm_dim = wordRNN_lstm_dim,
                                               S_max = S_max,
                                               N_max = N_max,
                                               word_embed_dim = word_embed_dim,
                                               bias_init_vector = bias_init_vector)
    

    tf_feats, tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors = test_model.generate_model()
    sess = tf.InteractiveSession()
    #print("Before there >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n\n")

    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    img2idx = {}
    for idx, img in enumerate(test_imgs_names):
        img2idx[img] = idx
    print("IM2idx:::::::::::",img2idx)

    test_fd = open('HRNN_results.txt', 'w')
    for idx, img_name in enumerate(test_imgs_names):
        print(idx, img_name)
        test_fd.write(img_name + '\n')

        each_paragraph = []
        current_paragraph = ""

        current_feats_index = img2idx[img_name]
        current_feats = test_feats[current_feats_index]
        current_feats = np.reshape(current_feats, [1, 50, 4096])

        generated_paragraph_indexes, pred, sent_topic_vectors = sess.run(
                                                                         [tf_generated_paragraph, tf_pred_re, tf_sent_topic_vectors],
                                                                         feed_dict={
                                                                             tf_feats: current_feats
                                                                         })

        #generated_paragraph = idx2word[generated_paragraph_indexes]
        for sent_index in generated_paragraph_indexes:
            each_sent = []
            for word_index in sent_index:
                each_sent.append(idx2word[word_index])
            each_paragraph.append(each_sent)

        for idx, each_sent in enumerate(each_paragraph):
            # if the current sentence is the end sentence of the paragraph
            # According to the probability distribution:
            # CONTINUE: [1, 0]
            # STOP    : [0, 1]
            # So, if the first item of pred is less than the T_stop
            # the generation process is break
            if pred[idx][0][0] <= T_stop:
                break
            current_sent = ''
            for each_word in each_sent:
                current_sent += each_word + ' '
            current_sent = current_sent.replace('<eos> ', '')
            current_sent = current_sent.replace('<pad> ', '')
            current_sent = current_sent + '.'
            current_sent = current_sent.replace(' .', '.')
            current_sent = current_sent.replace(' ,', ',')
            current_paragraph +=current_sent
            if idx != len(each_paragraph) - 1:
                current_paragraph += ' '

        test_fd.write(current_paragraph + '\n')
    test_fd.close()
    print("Time cost: " + str(time.time()-start_time))

train()


preprocessing word counts and creating vocab based on word count threshold 2
filtered words from 18418 to 9900
Train Images Names ['2336489', '2386636', '2317584', '2349895', '2348349', '2393268', '2338512', '2319338', '2318897', '2382861', '2359450', '2393810', '2379103', '2400913', '2381116', '2379607', '2372836', '1592156', '2388737', '2316733', '2328394', '2391668', '2341747', '2319741', '2355380', '2353667', '2316710', '2393529', '1160254', '1159529', '2392926', '2399813', '2335023', '2400892', '2367765', '2375715', '2395072', '2349745', '2384897', '2406617', '2373936', '2318431', '2410717', '2353681', '2414772', '2346311', '2369025', '2367716', '2342700', '2355768', '2361773', '2352565', '2356407', '2393837', '2351900', '2378739', '2394493', '2334825', '2346609', '2355839', '2406971', '2327224', '2407039', '2377971', '2367554', '2368478', '2327958', '2366303', '2360963', '2414977', '2416656', '2378809', '2407520', '2366857', '2374646', '2327849', '2376953', '2373691', '1143', '23

W0724 09:10:24.178529 140197742454528 deprecation.py:323] From <ipython-input-1-0434d5425a35>:69: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
W0724 09:10:24.230486 140197742454528 deprecation.py:323] From <ipython-input-1-0434d5425a35>:87: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.


Wemb output>>>>>>>>>>>> <tf.Variable 'Wemb:0' shape=(9904, 1024) dtype=float32_ref> Tensor("Shape:0", shape=(2,), dtype=int32)


W0724 09:10:24.759396 140197742454528 deprecation.py:506] From /home/student/anaconda3/envs/CPU/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0724 09:10:24.765649 140197742454528 deprecation.py:506] From /home/student/anaconda3/envs/CPU/lib/python3.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py:738: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Start build model:


W0724 09:10:25.021539 140197742454528 deprecation.py:323] From <ipython-input-1-0434d5425a35>:167: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

W0724 09:10:25.306723 140197742454528 deprecation.py:323] From <ipython-input-1-0434d5425a35>:198: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success


W0724 09:11:05.881547 140197742454528 deprecation.py:323] From /home/student/anaconda3/envs/CPU/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Done w session
Img2idx {'2336489': 0, '2386636': 1, '2317584': 2, '2349895': 3, '2348349': 4, '2393268': 5, '2338512': 6, '2319338': 7, '2318897': 8, '2382861': 9, '2359450': 10, '2393810': 11, '2379103': 12, '2400913': 13, '2381116': 14, '2379607': 15, '2372836': 16, '1592156': 17, '2388737': 18, '2316733': 19, '2328394': 20, '2391668': 21, '2341747': 22, '2319741': 23, '2355380': 24, '2353667': 25, '2316710': 26, '2393529': 27, '1160254': 28, '1159529': 29, '2392926': 30, '2399813': 31, '2335023': 32, '2400892': 33, '2367765': 34, '2375715': 35, '2395072': 36, '2349745': 37, '2384897': 38, '2406617': 39, '2373936': 40, '2318431': 41, '2410717': 42, '2353681': 43, '2414772': 44, '2346311': 45, '2369025': 46, '2367716': 47, '2342700': 48, '2355768': 49, '2361773': 50, '2352565': 51, '2356407': 52, '2393837': 53, '2351900': 54, '2378739': 55, '2394493': 56, '2334825': 57, '2346609': 58, '2355839': 59, '2406971': 60, '2327224': 61, '2407039': 62, '2377971': 63, '2367554': 64, '2368478': 

pretrained model loaded successfully
Start and end (0, 5)
idx:  0  Epoch:  113  loss:  380.99615  loss_sent:  3.8504834  loss_word:  361.7437  Time cost:  87.46882843971252
Start and end (5, 10)
idx:  5  Epoch:  113  loss:  339.97424  loss_sent:  3.4248228  loss_word:  322.85013  Time cost:  7.107832908630371
Start and end (10, 15)
idx:  10  Epoch:  113  loss:  416.8274  loss_sent:  3.8823729  loss_word:  397.4155  Time cost:  6.906096935272217
Start and end (15, 20)
idx:  15  Epoch:  113  loss:  416.82837  loss_sent:  4.090528  loss_word:  396.3757  Time cost:  6.872644424438477
Start and end (20, 25)
idx:  20  Epoch:  113  loss:  376.28152  loss_sent:  3.0152397  loss_word:  361.20535  Time cost:  7.041353225708008
Start and end (25, 30)
idx:  25  Epoch:  113  loss:  377.1209  loss_sent:  2.7390094  loss_word:  363.42587  Time cost:  8.141240358352661
Start and end (30, 35)
idx:  30  Epoch:  113  loss:  361.34146  loss_sent:  4.139288  loss_word:  340.64502  Time cost:  7.06324028968

idx:  290  Epoch:  113  loss:  298.66086  loss_sent:  3.0648222  loss_word:  283.33676  Time cost:  6.631594181060791
Start and end (295, 300)
idx:  295  Epoch:  113  loss:  343.85715  loss_sent:  2.7962568  loss_word:  329.87585  Time cost:  6.839798927307129
Start and end (300, 305)
idx:  300  Epoch:  113  loss:  330.5538  loss_sent:  3.1283553  loss_word:  314.91202  Time cost:  6.768357515335083
Start and end (305, 310)
idx:  305  Epoch:  113  loss:  291.88242  loss_sent:  3.714604  loss_word:  273.3094  Time cost:  6.733511447906494
Start and end (310, 315)
idx:  310  Epoch:  113  loss:  316.52112  loss_sent:  3.52972  loss_word:  298.8725  Time cost:  6.789605379104614
Start and end (315, 320)
idx:  315  Epoch:  113  loss:  352.06332  loss_sent:  3.2165115  loss_word:  335.9808  Time cost:  6.6825456619262695
Start and end (320, 325)
idx:  320  Epoch:  113  loss:  286.21902  loss_sent:  2.4085052  loss_word:  274.17648  Time cost:  6.680880308151245
Start and end (325, 330)
idx: 

idx:  580  Epoch:  113  loss:  213.06422  loss_sent:  2.7688742  loss_word:  199.21986  Time cost:  6.933086395263672
Start and end (585, 590)
idx:  585  Epoch:  113  loss:  403.0985  loss_sent:  2.7083018  loss_word:  389.557  Time cost:  6.746628284454346
Start and end (590, 595)
idx:  590  Epoch:  113  loss:  366.28464  loss_sent:  4.066497  loss_word:  345.95215  Time cost:  6.899180173873901
Start and end (595, 600)
idx:  595  Epoch:  113  loss:  316.60895  loss_sent:  3.0060163  loss_word:  301.57886  Time cost:  6.690722703933716
Start and end (600, 605)
idx:  600  Epoch:  113  loss:  346.00534  loss_sent:  2.8974984  loss_word:  331.51788  Time cost:  6.911505699157715
Start and end (605, 610)
idx:  605  Epoch:  113  loss:  263.2942  loss_sent:  2.4926753  loss_word:  250.83083  Time cost:  6.740240097045898
Start and end (610, 615)
idx:  610  Epoch:  113  loss:  288.40073  loss_sent:  2.8011975  loss_word:  274.3947  Time cost:  6.767743825912476
Start and end (615, 620)
idx: 

idx:  870  Epoch:  113  loss:  189.66171  loss_sent:  3.2696757  loss_word:  173.31334  Time cost:  6.795580625534058
Start and end (875, 880)
idx:  875  Epoch:  113  loss:  311.24802  loss_sent:  3.186697  loss_word:  295.3145  Time cost:  6.703605890274048
Start and end (880, 885)
idx:  880  Epoch:  113  loss:  315.97586  loss_sent:  2.7535682  loss_word:  302.20798  Time cost:  6.669618606567383
Start and end (885, 890)
idx:  885  Epoch:  113  loss:  262.65137  loss_sent:  2.7935047  loss_word:  248.68384  Time cost:  6.908123254776001
Start and end (890, 895)
idx:  890  Epoch:  113  loss:  268.44092  loss_sent:  2.7285655  loss_word:  254.79807  Time cost:  6.729360818862915
Start and end (895, 900)
idx:  895  Epoch:  113  loss:  365.2565  loss_sent:  2.8046691  loss_word:  351.23312  Time cost:  6.982182264328003
Start and end (900, 905)
idx:  900  Epoch:  113  loss:  297.1969  loss_sent:  2.9048443  loss_word:  282.67264  Time cost:  6.756896734237671
Start and end (905, 910)
idx

idx:  1155  Epoch:  113  loss:  276.33328  loss_sent:  2.846242  loss_word:  262.10202  Time cost:  6.858116388320923
Start and end (1160, 1165)
idx:  1160  Epoch:  113  loss:  228.20634  loss_sent:  2.1631074  loss_word:  217.39081  Time cost:  6.741997718811035
Start and end (1165, 1170)
idx:  1165  Epoch:  113  loss:  308.0768  loss_sent:  2.744841  loss_word:  294.35266  Time cost:  6.995177745819092
Start and end (1170, 1175)
idx:  1170  Epoch:  113  loss:  181.55412  loss_sent:  2.3293254  loss_word:  169.9075  Time cost:  6.749823570251465
Start and end (1175, 1180)
idx:  1175  Epoch:  113  loss:  291.26205  loss_sent:  3.21319  loss_word:  275.19604  Time cost:  6.841775178909302
Start and end (1180, 1185)
idx:  1180  Epoch:  113  loss:  231.47902  loss_sent:  3.7367435  loss_word:  212.79532  Time cost:  6.707117080688477
Start and end (1185, 1190)
idx:  1185  Epoch:  113  loss:  262.55984  loss_sent:  3.4075818  loss_word:  245.52197  Time cost:  6.6757447719573975
Start and 

idx:  1440  Epoch:  113  loss:  201.33543  loss_sent:  3.064217  loss_word:  186.01436  Time cost:  6.723113775253296
Start and end (1445, 1450)
idx:  1445  Epoch:  113  loss:  316.1602  loss_sent:  3.1949031  loss_word:  300.1857  Time cost:  6.882862329483032
Start and end (1450, 1455)
idx:  1450  Epoch:  113  loss:  214.00539  loss_sent:  3.8781703  loss_word:  194.61452  Time cost:  6.664955139160156
Start and end (1455, 1460)
idx:  1455  Epoch:  113  loss:  297.05005  loss_sent:  2.6295288  loss_word:  283.9024  Time cost:  6.753477096557617
Start and end (1460, 1465)
idx:  1460  Epoch:  113  loss:  339.5487  loss_sent:  2.4883032  loss_word:  327.10718  Time cost:  6.837130546569824
Start and end (1465, 1470)
idx:  1465  Epoch:  113  loss:  242.75093  loss_sent:  3.0105286  loss_word:  227.69829  Time cost:  6.706668853759766
Start and end (1470, 1475)
idx:  1470  Epoch:  113  loss:  229.65067  loss_sent:  2.6336472  loss_word:  216.48245  Time cost:  6.8047401905059814
Start and

idx:  1725  Epoch:  113  loss:  310.5453  loss_sent:  3.0935612  loss_word:  295.07748  Time cost:  6.704421758651733
Start and end (1730, 1735)
idx:  1730  Epoch:  113  loss:  275.82938  loss_sent:  2.669046  loss_word:  262.4842  Time cost:  6.845362901687622
Start and end (1735, 1740)
idx:  1735  Epoch:  113  loss:  326.9716  loss_sent:  2.1787963  loss_word:  316.07758  Time cost:  6.719499588012695
Start and end (1740, 1745)
idx:  1740  Epoch:  113  loss:  263.9364  loss_sent:  3.4668179  loss_word:  246.60236  Time cost:  6.920797109603882
Start and end (1745, 1750)
idx:  1745  Epoch:  113  loss:  318.79584  loss_sent:  2.5267658  loss_word:  306.162  Time cost:  6.642021894454956
Start and end (1750, 1755)
idx:  1750  Epoch:  113  loss:  281.68723  loss_sent:  2.8715205  loss_word:  267.32962  Time cost:  6.691251754760742
Start and end (1755, 1760)
idx:  1755  Epoch:  113  loss:  285.11084  loss_sent:  2.3711047  loss_word:  273.2553  Time cost:  6.784493923187256
Start and end

idx:  2010  Epoch:  113  loss:  296.92255  loss_sent:  2.1798244  loss_word:  286.0234  Time cost:  6.893157482147217
Start and end (2015, 2020)
idx:  2015  Epoch:  113  loss:  334.26923  loss_sent:  3.6156576  loss_word:  316.19092  Time cost:  6.9587929248809814
Start and end (2020, 2025)
idx:  2020  Epoch:  113  loss:  175.40648  loss_sent:  2.6432881  loss_word:  162.19005  Time cost:  6.781388521194458
Start and end (2025, 2030)
idx:  2025  Epoch:  113  loss:  304.90347  loss_sent:  2.6047318  loss_word:  291.87985  Time cost:  6.81556248664856
Start and end (2030, 2035)
idx:  2030  Epoch:  113  loss:  292.72696  loss_sent:  2.522393  loss_word:  280.11505  Time cost:  6.7430219650268555
Start and end (2035, 2040)
idx:  2035  Epoch:  113  loss:  280.09937  loss_sent:  3.7900312  loss_word:  261.1492  Time cost:  6.873278379440308
Start and end (2040, 2045)
idx:  2040  Epoch:  113  loss:  335.56116  loss_sent:  4.172643  loss_word:  314.6979  Time cost:  6.6705427169799805
Start an

idx:  2295  Epoch:  113  loss:  229.55324  loss_sent:  2.1903324  loss_word:  218.6016  Time cost:  6.8650548458099365
Start and end (2300, 2305)
idx:  2300  Epoch:  113  loss:  263.84354  loss_sent:  2.1843386  loss_word:  252.92186  Time cost:  6.957227468490601
Start and end (2305, 2310)
idx:  2305  Epoch:  113  loss:  251.69379  loss_sent:  2.9693165  loss_word:  236.84721  Time cost:  7.066862344741821
Start and end (2310, 2315)
idx:  2310  Epoch:  113  loss:  318.12598  loss_sent:  2.467599  loss_word:  305.788  Time cost:  7.228207349777222
Start and end (2315, 2320)
idx:  2315  Epoch:  113  loss:  398.8962  loss_sent:  3.1844988  loss_word:  382.97372  Time cost:  7.4034583568573
Start and end (2320, 2325)
idx:  2320  Epoch:  113  loss:  284.66437  loss_sent:  3.6368477  loss_word:  266.4801  Time cost:  7.113168239593506
Start and end (2325, 2330)
idx:  2325  Epoch:  113  loss:  262.19263  loss_sent:  2.452576  loss_word:  249.92972  Time cost:  6.988577842712402
Start and end

W0724 10:09:09.613305 140197742454528 saver.py:1134] *******************************************************
W0724 10:09:09.613727 140197742454528 saver.py:1135] TensorFlow's V1 checkpoint format has been deprecated.
W0724 10:09:09.614110 140197742454528 saver.py:1136] Consider switching to the more efficient V2 format:
W0724 10:09:09.614464 140197742454528 saver.py:1137]    `tf.train.Saver(write_version=tf.train.SaverDef.V2)`
W0724 10:09:09.614828 140197742454528 saver.py:1138] now on by default.
W0724 10:09:09.615251 140197742454528 saver.py:1139] *******************************************************


idx:  2480  Epoch:  113  loss:  194.07774  loss_sent:  3.0841806  loss_word:  178.65685  Time cost:  6.697955131530762
Successfully Written to temporary
Epoch  113  is done. Saving the model ...
Start and end (0, 5)
idx:  0  Epoch:  114  loss:  263.93954  loss_sent:  2.1420796  loss_word:  253.22916  Time cost:  6.704392433166504
Start and end (5, 10)
idx:  5  Epoch:  114  loss:  278.46738  loss_sent:  2.3006606  loss_word:  266.96408  Time cost:  6.586405038833618
Start and end (10, 15)
idx:  10  Epoch:  114  loss:  273.84973  loss_sent:  2.351813  loss_word:  262.0906  Time cost:  6.475858449935913
Start and end (15, 20)
idx:  15  Epoch:  114  loss:  239.8149  loss_sent:  2.3208385  loss_word:  228.2107  Time cost:  6.626615762710571
Start and end (20, 25)
idx:  20  Epoch:  114  loss:  243.64311  loss_sent:  3.0705922  loss_word:  228.29016  Time cost:  6.4946980476379395
Start and end (25, 30)
idx:  25  Epoch:  114  loss:  269.88037  loss_sent:  2.7102666  loss_word:  256.3291  Time

idx:  285  Epoch:  114  loss:  197.1803  loss_sent:  2.495774  loss_word:  184.70142  Time cost:  6.5290350914001465
Start and end (290, 295)
idx:  290  Epoch:  114  loss:  234.1469  loss_sent:  3.3449595  loss_word:  217.4221  Time cost:  6.5470569133758545
Start and end (295, 300)
idx:  295  Epoch:  114  loss:  299.0423  loss_sent:  2.529513  loss_word:  286.39468  Time cost:  6.53415584564209
Start and end (300, 305)
idx:  300  Epoch:  114  loss:  301.73077  loss_sent:  1.9413693  loss_word:  292.02396  Time cost:  6.576545715332031
Start and end (305, 310)
idx:  305  Epoch:  114  loss:  184.5377  loss_sent:  2.8845356  loss_word:  170.11502  Time cost:  6.56409478187561
Start and end (310, 315)
idx:  310  Epoch:  114  loss:  343.08585  loss_sent:  1.7202566  loss_word:  334.48456  Time cost:  6.572478771209717
Start and end (315, 320)
idx:  315  Epoch:  114  loss:  281.5258  loss_sent:  2.2845075  loss_word:  270.10327  Time cost:  6.532787084579468
Start and end (320, 325)
idx:  3

idx:  575  Epoch:  114  loss:  268.87256  loss_sent:  2.0021124  loss_word:  258.86197  Time cost:  6.51788067817688
Start and end (580, 585)
idx:  580  Epoch:  114  loss:  263.12943  loss_sent:  2.4230154  loss_word:  251.01437  Time cost:  6.54920506477356
Start and end (585, 590)
idx:  585  Epoch:  114  loss:  274.0552  loss_sent:  2.0856848  loss_word:  263.62677  Time cost:  6.501956939697266
Start and end (590, 595)
idx:  590  Epoch:  114  loss:  216.9658  loss_sent:  2.1345117  loss_word:  206.29324  Time cost:  6.532183647155762
Start and end (595, 600)
idx:  595  Epoch:  114  loss:  249.91379  loss_sent:  2.2390366  loss_word:  238.7186  Time cost:  6.548753023147583
Start and end (600, 605)
idx:  600  Epoch:  114  loss:  271.1223  loss_sent:  2.6808283  loss_word:  257.71823  Time cost:  6.584240913391113
Start and end (605, 610)
idx:  605  Epoch:  114  loss:  230.58629  loss_sent:  2.5169287  loss_word:  218.00166  Time cost:  6.432028532028198
Start and end (610, 615)
idx: 

idx:  865  Epoch:  114  loss:  266.794  loss_sent:  2.092166  loss_word:  256.33316  Time cost:  6.466049432754517
Start and end (870, 875)
idx:  870  Epoch:  114  loss:  325.29248  loss_sent:  2.1743062  loss_word:  314.42096  Time cost:  6.581722021102905
Start and end (875, 880)
idx:  875  Epoch:  114  loss:  283.01608  loss_sent:  1.9532193  loss_word:  273.24997  Time cost:  6.553986310958862
Start and end (880, 885)
idx:  880  Epoch:  114  loss:  222.43887  loss_sent:  2.351213  loss_word:  210.68282  Time cost:  6.5137619972229
Start and end (885, 890)
idx:  885  Epoch:  114  loss:  331.18826  loss_sent:  2.2377372  loss_word:  319.99957  Time cost:  6.53497314453125
Start and end (890, 895)
idx:  890  Epoch:  114  loss:  211.98907  loss_sent:  2.9278407  loss_word:  197.34987  Time cost:  6.46548056602478
Start and end (895, 900)
idx:  895  Epoch:  114  loss:  348.43112  loss_sent:  2.1320174  loss_word:  337.77106  Time cost:  6.49746561050415
Start and end (900, 905)
idx:  90

idx:  1150  Epoch:  114  loss:  262.62073  loss_sent:  2.2934241  loss_word:  251.15364  Time cost:  6.529861927032471
Start and end (1155, 1160)
idx:  1155  Epoch:  114  loss:  337.6861  loss_sent:  2.56524  loss_word:  324.8598  Time cost:  6.5192718505859375
Start and end (1160, 1165)
idx:  1160  Epoch:  114  loss:  261.55286  loss_sent:  2.3616486  loss_word:  249.74458  Time cost:  6.536913871765137
Start and end (1165, 1170)
idx:  1165  Epoch:  114  loss:  208.38551  loss_sent:  2.6495814  loss_word:  195.13759  Time cost:  6.503604173660278
Start and end (1170, 1175)
idx:  1170  Epoch:  114  loss:  299.9939  loss_sent:  2.467617  loss_word:  287.6558  Time cost:  6.453360557556152
Start and end (1175, 1180)
idx:  1175  Epoch:  114  loss:  191.71312  loss_sent:  2.697415  loss_word:  178.22603  Time cost:  6.482064962387085
Start and end (1180, 1185)
idx:  1180  Epoch:  114  loss:  242.46872  loss_sent:  2.7831693  loss_word:  228.55287  Time cost:  6.427148103713989
Start and en

idx:  1435  Epoch:  114  loss:  242.57356  loss_sent:  2.8444788  loss_word:  228.35118  Time cost:  6.546400547027588
Start and end (1440, 1445)
idx:  1440  Epoch:  114  loss:  219.22083  loss_sent:  2.0856526  loss_word:  208.79254  Time cost:  6.458518743515015
Start and end (1445, 1450)
idx:  1445  Epoch:  114  loss:  277.6162  loss_sent:  2.5422952  loss_word:  264.90472  Time cost:  6.443972826004028
Start and end (1450, 1455)
idx:  1450  Epoch:  114  loss:  222.65004  loss_sent:  2.5101602  loss_word:  210.09921  Time cost:  6.51537823677063
Start and end (1455, 1460)
idx:  1455  Epoch:  114  loss:  219.38957  loss_sent:  2.6351683  loss_word:  206.21375  Time cost:  6.518052816390991
Start and end (1460, 1465)
idx:  1460  Epoch:  114  loss:  247.43489  loss_sent:  2.2211633  loss_word:  236.32907  Time cost:  6.404594421386719
Start and end (1465, 1470)
idx:  1465  Epoch:  114  loss:  226.21078  loss_sent:  2.1860561  loss_word:  215.2805  Time cost:  6.534444808959961
Start an

idx:  1720  Epoch:  114  loss:  184.20197  loss_sent:  2.5053036  loss_word:  171.67546  Time cost:  6.426969051361084
Start and end (1725, 1730)
idx:  1725  Epoch:  114  loss:  217.86493  loss_sent:  2.80241  loss_word:  203.85287  Time cost:  6.5381011962890625
Start and end (1730, 1735)
idx:  1730  Epoch:  114  loss:  279.04123  loss_sent:  2.4673183  loss_word:  266.70465  Time cost:  6.487589359283447
Start and end (1735, 1740)
idx:  1735  Epoch:  114  loss:  223.6926  loss_sent:  2.2132711  loss_word:  212.62624  Time cost:  6.472128868103027
Start and end (1740, 1745)
idx:  1740  Epoch:  114  loss:  264.97784  loss_sent:  2.2680159  loss_word:  253.63777  Time cost:  6.5431437492370605
Start and end (1745, 1750)
idx:  1745  Epoch:  114  loss:  304.29984  loss_sent:  3.1744597  loss_word:  288.4275  Time cost:  6.464349269866943
Start and end (1750, 1755)
idx:  1750  Epoch:  114  loss:  207.41975  loss_sent:  2.9163618  loss_word:  192.83795  Time cost:  6.548246622085571
Start a

idx:  2005  Epoch:  114  loss:  165.74709  loss_sent:  1.768556  loss_word:  156.9043  Time cost:  6.668513774871826
Start and end (2010, 2015)
idx:  2010  Epoch:  114  loss:  186.68106  loss_sent:  2.3915  loss_word:  174.72354  Time cost:  6.562763929367065
Start and end (2015, 2020)
idx:  2015  Epoch:  114  loss:  245.80042  loss_sent:  1.9716413  loss_word:  235.9422  Time cost:  6.631821393966675
Start and end (2020, 2025)
idx:  2020  Epoch:  114  loss:  206.98773  loss_sent:  2.2360296  loss_word:  195.80759  Time cost:  6.657048463821411
Start and end (2025, 2030)
idx:  2025  Epoch:  114  loss:  365.096  loss_sent:  2.5588017  loss_word:  352.30203  Time cost:  6.740338325500488
Start and end (2030, 2035)
idx:  2030  Epoch:  114  loss:  255.1575  loss_sent:  2.1449986  loss_word:  244.43254  Time cost:  7.778806686401367
Start and end (2035, 2040)
idx:  2035  Epoch:  114  loss:  216.9164  loss_sent:  2.3050852  loss_word:  205.39095  Time cost:  6.970146656036377
Start and end (

idx:  2290  Epoch:  114  loss:  230.2557  loss_sent:  2.71044  loss_word:  216.7035  Time cost:  6.685961484909058
Start and end (2295, 2300)
idx:  2295  Epoch:  114  loss:  234.54071  loss_sent:  2.6185203  loss_word:  221.44814  Time cost:  6.870643854141235
Start and end (2300, 2305)
idx:  2300  Epoch:  114  loss:  242.9182  loss_sent:  2.0337775  loss_word:  232.74933  Time cost:  6.708220958709717
Start and end (2305, 2310)
idx:  2305  Epoch:  114  loss:  187.96138  loss_sent:  2.4506953  loss_word:  175.7079  Time cost:  7.123295545578003
Start and end (2310, 2315)
idx:  2310  Epoch:  114  loss:  265.97354  loss_sent:  2.5586398  loss_word:  253.18044  Time cost:  7.031448602676392
Start and end (2315, 2320)
idx:  2315  Epoch:  114  loss:  265.67853  loss_sent:  2.0783768  loss_word:  255.28664  Time cost:  7.296682119369507
Start and end (2320, 2325)
idx:  2320  Epoch:  114  loss:  286.40198  loss_sent:  1.6958834  loss_word:  277.92258  Time cost:  6.808310270309448
Start and e

W0724 11:04:20.977010 140197742454528 saver.py:1134] *******************************************************
W0724 11:04:20.977496 140197742454528 saver.py:1135] TensorFlow's V1 checkpoint format has been deprecated.
W0724 11:04:20.978115 140197742454528 saver.py:1136] Consider switching to the more efficient V2 format:
W0724 11:04:20.978400 140197742454528 saver.py:1137]    `tf.train.Saver(write_version=tf.train.SaverDef.V2)`
W0724 11:04:20.978742 140197742454528 saver.py:1138] now on by default.
W0724 11:04:20.979074 140197742454528 saver.py:1139] *******************************************************


idx:  2480  Epoch:  114  loss:  193.55753  loss_sent:  3.17797  loss_word:  177.6677  Time cost:  6.714325189590454
Successfully Written to temporary
Epoch  114  is done. Saving the model ...
Start and end (0, 5)
idx:  0  Epoch:  115  loss:  264.27545  loss_sent:  1.8729893  loss_word:  254.91055  Time cost:  6.634373664855957
Start and end (5, 10)
idx:  5  Epoch:  115  loss:  316.0386  loss_sent:  1.6690073  loss_word:  307.69357  Time cost:  6.540422201156616
Start and end (10, 15)
idx:  10  Epoch:  115  loss:  215.32098  loss_sent:  2.0326111  loss_word:  205.15793  Time cost:  6.662522792816162
Start and end (15, 20)
idx:  15  Epoch:  115  loss:  302.78003  loss_sent:  1.5698578  loss_word:  294.93073  Time cost:  6.666482210159302
Start and end (20, 25)
idx:  20  Epoch:  115  loss:  192.62936  loss_sent:  1.8083738  loss_word:  183.5875  Time cost:  6.59878134727478
Start and end (25, 30)
idx:  25  Epoch:  115  loss:  226.66733  loss_sent:  2.0441759  loss_word:  216.44643  Time c

idx:  285  Epoch:  115  loss:  227.10129  loss_sent:  1.4031343  loss_word:  220.08563  Time cost:  6.544416904449463
Start and end (290, 295)
idx:  290  Epoch:  115  loss:  199.70818  loss_sent:  2.322212  loss_word:  188.0971  Time cost:  6.550437688827515
Start and end (295, 300)
idx:  295  Epoch:  115  loss:  225.34775  loss_sent:  1.97396  loss_word:  215.47795  Time cost:  6.54757022857666
Start and end (300, 305)
idx:  300  Epoch:  115  loss:  226.48012  loss_sent:  1.9856952  loss_word:  216.55164  Time cost:  6.5871922969818115
Start and end (305, 310)
idx:  305  Epoch:  115  loss:  190.08072  loss_sent:  1.5730655  loss_word:  182.21541  Time cost:  6.548961877822876
Start and end (310, 315)
idx:  310  Epoch:  115  loss:  210.22987  loss_sent:  2.1646614  loss_word:  199.40656  Time cost:  6.551495313644409
Start and end (315, 320)
idx:  315  Epoch:  115  loss:  313.97433  loss_sent:  2.5659733  loss_word:  301.14447  Time cost:  6.514719247817993
Start and end (320, 325)
idx

idx:  575  Epoch:  115  loss:  242.19508  loss_sent:  1.8207023  loss_word:  233.09157  Time cost:  6.610602378845215
Start and end (580, 585)
idx:  580  Epoch:  115  loss:  294.96393  loss_sent:  1.8543894  loss_word:  285.692  Time cost:  6.532600164413452
Start and end (585, 590)
idx:  585  Epoch:  115  loss:  232.20868  loss_sent:  2.1417499  loss_word:  221.49991  Time cost:  6.608145713806152
Start and end (590, 595)
idx:  590  Epoch:  115  loss:  228.55351  loss_sent:  3.4392667  loss_word:  211.35718  Time cost:  6.562000036239624
Start and end (595, 600)
idx:  595  Epoch:  115  loss:  259.59753  loss_sent:  1.8479135  loss_word:  250.35796  Time cost:  6.580857276916504
Start and end (600, 605)
idx:  600  Epoch:  115  loss:  211.09381  loss_sent:  2.0770216  loss_word:  200.70871  Time cost:  6.557347059249878
Start and end (605, 610)
idx:  605  Epoch:  115  loss:  280.23077  loss_sent:  1.9085093  loss_word:  270.68823  Time cost:  6.489459276199341
Start and end (610, 615)
i

idx:  865  Epoch:  115  loss:  213.87404  loss_sent:  1.6006912  loss_word:  205.87059  Time cost:  6.573620080947876
Start and end (870, 875)
idx:  870  Epoch:  115  loss:  194.88557  loss_sent:  1.8284088  loss_word:  185.7435  Time cost:  6.560470819473267
Start and end (875, 880)
idx:  875  Epoch:  115  loss:  203.58351  loss_sent:  1.7108375  loss_word:  195.02933  Time cost:  6.61773157119751
Start and end (880, 885)
idx:  880  Epoch:  115  loss:  228.98384  loss_sent:  1.8646425  loss_word:  219.66063  Time cost:  6.540525436401367
Start and end (885, 890)
idx:  885  Epoch:  115  loss:  178.51343  loss_sent:  1.3977183  loss_word:  171.52484  Time cost:  6.562628984451294
Start and end (890, 895)
idx:  890  Epoch:  115  loss:  241.43037  loss_sent:  2.4066086  loss_word:  229.39732  Time cost:  6.578070163726807
Start and end (895, 900)
idx:  895  Epoch:  115  loss:  237.60583  loss_sent:  1.4297636  loss_word:  230.45702  Time cost:  6.531609296798706
Start and end (900, 905)
i

idx:  1150  Epoch:  115  loss:  241.41997  loss_sent:  1.5853776  loss_word:  233.49309  Time cost:  6.558082818984985
Start and end (1155, 1160)
idx:  1155  Epoch:  115  loss:  335.17902  loss_sent:  1.76161  loss_word:  326.37097  Time cost:  6.557438135147095
Start and end (1160, 1165)
idx:  1160  Epoch:  115  loss:  247.49905  loss_sent:  1.9415225  loss_word:  237.79144  Time cost:  6.608539342880249
Start and end (1165, 1170)
idx:  1165  Epoch:  115  loss:  228.19238  loss_sent:  2.186231  loss_word:  217.26125  Time cost:  6.610917091369629
Start and end (1170, 1175)
idx:  1170  Epoch:  115  loss:  343.43307  loss_sent:  1.8384043  loss_word:  334.24106  Time cost:  6.609920978546143
Start and end (1175, 1180)
idx:  1175  Epoch:  115  loss:  150.5342  loss_sent:  1.4695367  loss_word:  143.1865  Time cost:  6.611990928649902
Start and end (1180, 1185)
idx:  1180  Epoch:  115  loss:  206.59203  loss_sent:  1.7988496  loss_word:  197.59776  Time cost:  6.481385707855225
Start and 

idx:  1435  Epoch:  115  loss:  223.71596  loss_sent:  3.4284072  loss_word:  206.57393  Time cost:  6.543074607849121
Start and end (1440, 1445)
idx:  1440  Epoch:  115  loss:  223.51949  loss_sent:  1.7686872  loss_word:  214.67606  Time cost:  6.5240864753723145
Start and end (1445, 1450)
idx:  1445  Epoch:  115  loss:  251.88718  loss_sent:  1.6569717  loss_word:  243.6023  Time cost:  6.531020879745483
Start and end (1450, 1455)
idx:  1450  Epoch:  115  loss:  202.00087  loss_sent:  1.3510231  loss_word:  195.24576  Time cost:  6.475329875946045
Start and end (1455, 1460)
idx:  1455  Epoch:  115  loss:  209.91248  loss_sent:  1.7041551  loss_word:  201.3917  Time cost:  6.44689154624939
Start and end (1460, 1465)
idx:  1460  Epoch:  115  loss:  251.84239  loss_sent:  1.9209813  loss_word:  242.23749  Time cost:  6.525359392166138
Start and end (1465, 1470)
idx:  1465  Epoch:  115  loss:  284.86087  loss_sent:  1.7404497  loss_word:  276.15863  Time cost:  6.439506530761719
Start a

idx:  1720  Epoch:  115  loss:  235.7444  loss_sent:  2.37408  loss_word:  223.87398  Time cost:  6.526697397232056
Start and end (1725, 1730)
idx:  1725  Epoch:  115  loss:  296.78937  loss_sent:  2.088348  loss_word:  286.34766  Time cost:  6.57950496673584
Start and end (1730, 1735)
idx:  1730  Epoch:  115  loss:  252.71794  loss_sent:  1.6555738  loss_word:  244.44008  Time cost:  6.545632839202881
Start and end (1735, 1740)
idx:  1735  Epoch:  115  loss:  241.51262  loss_sent:  2.200041  loss_word:  230.51239  Time cost:  6.518685579299927
Start and end (1740, 1745)
idx:  1740  Epoch:  115  loss:  202.64272  loss_sent:  2.0387526  loss_word:  192.44894  Time cost:  6.5222368240356445
Start and end (1745, 1750)
idx:  1745  Epoch:  115  loss:  264.62445  loss_sent:  1.9806218  loss_word:  254.72136  Time cost:  6.586141109466553
Start and end (1750, 1755)
idx:  1750  Epoch:  115  loss:  238.78598  loss_sent:  1.5175191  loss_word:  231.19838  Time cost:  6.536270618438721
Start and 

idx:  2005  Epoch:  115  loss:  218.5558  loss_sent:  2.0246706  loss_word:  208.43242  Time cost:  6.446069955825806
Start and end (2010, 2015)
idx:  2010  Epoch:  115  loss:  251.82864  loss_sent:  1.9265916  loss_word:  242.19571  Time cost:  6.560038328170776
Start and end (2015, 2020)
idx:  2015  Epoch:  115  loss:  214.91997  loss_sent:  1.568485  loss_word:  207.07753  Time cost:  6.5617899894714355
Start and end (2020, 2025)
idx:  2020  Epoch:  115  loss:  220.8105  loss_sent:  1.84816  loss_word:  211.56972  Time cost:  6.567040920257568
Start and end (2025, 2030)
idx:  2025  Epoch:  115  loss:  301.48276  loss_sent:  1.3959672  loss_word:  294.50287  Time cost:  6.5800440311431885
Start and end (2030, 2035)
idx:  2030  Epoch:  115  loss:  201.01833  loss_sent:  2.3664014  loss_word:  189.1863  Time cost:  6.528951168060303
Start and end (2035, 2040)
idx:  2035  Epoch:  115  loss:  242.8575  loss_sent:  1.751348  loss_word:  234.10077  Time cost:  6.444778919219971
Start and e

idx:  2290  Epoch:  115  loss:  286.65564  loss_sent:  2.1584187  loss_word:  275.86353  Time cost:  7.011322975158691
Start and end (2295, 2300)
idx:  2295  Epoch:  115  loss:  214.82559  loss_sent:  1.9687979  loss_word:  204.98161  Time cost:  6.668293476104736
Start and end (2300, 2305)
idx:  2300  Epoch:  115  loss:  229.82474  loss_sent:  2.5597181  loss_word:  217.02614  Time cost:  6.835778474807739
Start and end (2305, 2310)
idx:  2305  Epoch:  115  loss:  278.28897  loss_sent:  1.5750645  loss_word:  270.41367  Time cost:  6.6488142013549805
Start and end (2310, 2315)
idx:  2310  Epoch:  115  loss:  257.2102  loss_sent:  2.1851792  loss_word:  246.28429  Time cost:  6.523748159408569
Start and end (2315, 2320)
idx:  2315  Epoch:  115  loss:  284.06726  loss_sent:  1.8730042  loss_word:  274.7022  Time cost:  6.595515966415405
Start and end (2320, 2325)
idx:  2320  Epoch:  115  loss:  244.36913  loss_sent:  1.9575689  loss_word:  234.58128  Time cost:  6.668626070022583
Start 

W0724 11:58:59.870782 140197742454528 saver.py:1134] *******************************************************
W0724 11:58:59.871258 140197742454528 saver.py:1135] TensorFlow's V1 checkpoint format has been deprecated.
W0724 11:58:59.871570 140197742454528 saver.py:1136] Consider switching to the more efficient V2 format:
W0724 11:58:59.871879 140197742454528 saver.py:1137]    `tf.train.Saver(write_version=tf.train.SaverDef.V2)`
W0724 11:58:59.872207 140197742454528 saver.py:1138] now on by default.
W0724 11:58:59.872504 140197742454528 saver.py:1139] *******************************************************


idx:  2480  Epoch:  115  loss:  274.12943  loss_sent:  3.4450078  loss_word:  256.9043  Time cost:  6.557159900665283
Successfully Written to temporary
Epoch  115  is done. Saving the model ...
Start and end (0, 5)
idx:  0  Epoch:  116  loss:  214.57721  loss_sent:  1.4643154  loss_word:  207.25565  Time cost:  6.5497119426727295
Start and end (5, 10)
idx:  5  Epoch:  116  loss:  226.42267  loss_sent:  1.5222309  loss_word:  218.81151  Time cost:  6.793993949890137
Start and end (10, 15)
idx:  10  Epoch:  116  loss:  236.79851  loss_sent:  1.5173012  loss_word:  229.21199  Time cost:  6.813993453979492
Start and end (15, 20)
idx:  15  Epoch:  116  loss:  248.33698  loss_sent:  1.5474281  loss_word:  240.59984  Time cost:  6.807343244552612
Start and end (20, 25)
idx:  20  Epoch:  116  loss:  276.98434  loss_sent:  1.3671223  loss_word:  270.1487  Time cost:  6.736278772354126
Start and end (25, 30)
idx:  25  Epoch:  116  loss:  165.45421  loss_sent:  2.9490829  loss_word:  150.70883  T

idx:  285  Epoch:  116  loss:  256.75812  loss_sent:  1.4917855  loss_word:  249.29918  Time cost:  6.836450815200806
Start and end (290, 295)
idx:  290  Epoch:  116  loss:  235.51767  loss_sent:  1.3905865  loss_word:  228.56473  Time cost:  6.8856635093688965
Start and end (295, 300)
idx:  295  Epoch:  116  loss:  296.29495  loss_sent:  1.5092947  loss_word:  288.7485  Time cost:  6.780900716781616
Start and end (300, 305)
idx:  300  Epoch:  116  loss:  285.37378  loss_sent:  1.2355509  loss_word:  279.196  Time cost:  6.6342785358428955
Start and end (305, 310)
idx:  305  Epoch:  116  loss:  310.14587  loss_sent:  1.8777735  loss_word:  300.757  Time cost:  6.624628305435181
Start and end (310, 315)
idx:  310  Epoch:  116  loss:  204.18013  loss_sent:  1.4008405  loss_word:  197.17592  Time cost:  6.641436815261841
Start and end (315, 320)
idx:  315  Epoch:  116  loss:  274.6104  loss_sent:  1.4802868  loss_word:  267.20898  Time cost:  6.650635480880737
Start and end (320, 325)
idx

idx:  575  Epoch:  116  loss:  257.10095  loss_sent:  1.2532676  loss_word:  250.83458  Time cost:  6.732599496841431
Start and end (580, 585)
idx:  580  Epoch:  116  loss:  306.0624  loss_sent:  1.7369485  loss_word:  297.37766  Time cost:  6.686900854110718
Start and end (585, 590)
idx:  585  Epoch:  116  loss:  261.5146  loss_sent:  1.2504791  loss_word:  255.26222  Time cost:  6.701721429824829
Start and end (590, 595)
idx:  590  Epoch:  116  loss:  213.55055  loss_sent:  1.3869649  loss_word:  206.61574  Time cost:  6.690624237060547
Start and end (595, 600)
idx:  595  Epoch:  116  loss:  160.78584  loss_sent:  1.4998825  loss_word:  153.28645  Time cost:  6.725364923477173
Start and end (600, 605)
idx:  600  Epoch:  116  loss:  218.67072  loss_sent:  1.2048901  loss_word:  212.64627  Time cost:  7.337717294692993
Start and end (605, 610)
idx:  605  Epoch:  116  loss:  247.7607  loss_sent:  1.4544078  loss_word:  240.48866  Time cost:  6.927732229232788
Start and end (610, 615)
id

idx:  865  Epoch:  116  loss:  238.18073  loss_sent:  1.6185447  loss_word:  230.08801  Time cost:  6.631990194320679
Start and end (870, 875)
idx:  870  Epoch:  116  loss:  248.19334  loss_sent:  1.9669595  loss_word:  238.35857  Time cost:  6.60932993888855
Start and end (875, 880)
idx:  875  Epoch:  116  loss:  188.81473  loss_sent:  1.04707  loss_word:  183.57938  Time cost:  6.625004291534424
Start and end (880, 885)
idx:  880  Epoch:  116  loss:  186.78175  loss_sent:  1.8117179  loss_word:  177.72316  Time cost:  6.567403078079224
Start and end (885, 890)
idx:  885  Epoch:  116  loss:  263.43686  loss_sent:  2.1137187  loss_word:  252.86833  Time cost:  6.707356214523315
Start and end (890, 895)
idx:  890  Epoch:  116  loss:  250.62712  loss_sent:  1.2053692  loss_word:  244.60023  Time cost:  6.8424506187438965
Start and end (895, 900)
idx:  895  Epoch:  116  loss:  191.50798  loss_sent:  1.3474648  loss_word:  184.77068  Time cost:  6.56720495223999
Start and end (900, 905)
id

idx:  1150  Epoch:  116  loss:  170.81062  loss_sent:  1.1400726  loss_word:  165.11026  Time cost:  6.846503257751465
Start and end (1155, 1160)
idx:  1155  Epoch:  116  loss:  195.16835  loss_sent:  1.4004796  loss_word:  188.16594  Time cost:  6.690985441207886
Start and end (1160, 1165)
idx:  1160  Epoch:  116  loss:  205.12071  loss_sent:  1.6420488  loss_word:  196.91048  Time cost:  6.809703826904297
Start and end (1165, 1170)
idx:  1165  Epoch:  116  loss:  184.79576  loss_sent:  1.7277169  loss_word:  176.15717  Time cost:  6.754482984542847
Start and end (1170, 1175)
idx:  1170  Epoch:  116  loss:  224.18658  loss_sent:  1.823055  loss_word:  215.0713  Time cost:  6.888874769210815
Start and end (1175, 1180)
idx:  1175  Epoch:  116  loss:  175.02159  loss_sent:  1.4687452  loss_word:  167.67786  Time cost:  6.602054119110107
Start and end (1180, 1185)
idx:  1180  Epoch:  116  loss:  203.1011  loss_sent:  1.573884  loss_word:  195.23167  Time cost:  6.970076322555542
Start and

idx:  1435  Epoch:  116  loss:  247.74803  loss_sent:  1.1351839  loss_word:  242.07213  Time cost:  6.591799020767212
Start and end (1440, 1445)
idx:  1440  Epoch:  116  loss:  347.86484  loss_sent:  1.97542  loss_word:  337.98776  Time cost:  6.598008155822754
Start and end (1445, 1450)
idx:  1445  Epoch:  116  loss:  256.5285  loss_sent:  2.0402637  loss_word:  246.3272  Time cost:  6.5604774951934814
Start and end (1450, 1455)
idx:  1450  Epoch:  116  loss:  218.16039  loss_sent:  2.1161335  loss_word:  207.57974  Time cost:  6.600542068481445
Start and end (1455, 1460)
idx:  1455  Epoch:  116  loss:  309.77814  loss_sent:  1.6951352  loss_word:  301.30237  Time cost:  6.607466697692871
Start and end (1460, 1465)
idx:  1460  Epoch:  116  loss:  271.83838  loss_sent:  1.6561826  loss_word:  263.55743  Time cost:  6.58764386177063
Start and end (1465, 1470)
idx:  1465  Epoch:  116  loss:  229.43634  loss_sent:  1.6139278  loss_word:  221.36671  Time cost:  6.672659397125244
Start and

idx:  1720  Epoch:  116  loss:  260.91483  loss_sent:  1.2647294  loss_word:  254.59122  Time cost:  6.5714874267578125
Start and end (1725, 1730)
idx:  1725  Epoch:  116  loss:  153.15689  loss_sent:  1.4084318  loss_word:  146.11472  Time cost:  6.603158473968506
Start and end (1730, 1735)
idx:  1730  Epoch:  116  loss:  303.88687  loss_sent:  1.9322457  loss_word:  294.22562  Time cost:  6.533473491668701
Start and end (1735, 1740)
idx:  1735  Epoch:  116  loss:  197.74818  loss_sent:  2.015944  loss_word:  187.66849  Time cost:  6.69771146774292
Start and end (1740, 1745)
idx:  1740  Epoch:  116  loss:  261.70065  loss_sent:  1.5242091  loss_word:  254.07964  Time cost:  6.621786832809448
Start and end (1745, 1750)
idx:  1745  Epoch:  116  loss:  176.10977  loss_sent:  1.3488815  loss_word:  169.36536  Time cost:  6.638092041015625
Start and end (1750, 1755)
idx:  1750  Epoch:  116  loss:  290.11472  loss_sent:  1.1757708  loss_word:  284.23584  Time cost:  6.485036134719849
Start 

idx:  2005  Epoch:  116  loss:  174.81937  loss_sent:  1.1781232  loss_word:  168.92874  Time cost:  6.6687092781066895
Start and end (2010, 2015)
idx:  2010  Epoch:  116  loss:  156.54439  loss_sent:  1.7322464  loss_word:  147.88313  Time cost:  6.684261798858643
Start and end (2015, 2020)
idx:  2015  Epoch:  116  loss:  311.5389  loss_sent:  2.0236783  loss_word:  301.4205  Time cost:  6.641899824142456
Start and end (2020, 2025)
idx:  2020  Epoch:  116  loss:  252.16397  loss_sent:  2.2485445  loss_word:  240.92126  Time cost:  6.538182973861694
Start and end (2025, 2030)
idx:  2025  Epoch:  116  loss:  219.28587  loss_sent:  1.4650816  loss_word:  211.96045  Time cost:  6.5297887325286865
Start and end (2030, 2035)
idx:  2030  Epoch:  116  loss:  148.0013  loss_sent:  1.6819333  loss_word:  139.59163  Time cost:  6.582320928573608
Start and end (2035, 2040)
idx:  2035  Epoch:  116  loss:  219.84068  loss_sent:  1.2117935  loss_word:  213.78174  Time cost:  6.700220823287964
Start 

idx:  2290  Epoch:  116  loss:  272.47754  loss_sent:  1.2879571  loss_word:  266.03778  Time cost:  6.537327527999878
Start and end (2295, 2300)
idx:  2295  Epoch:  116  loss:  181.50351  loss_sent:  1.5086969  loss_word:  173.96002  Time cost:  6.617474555969238
Start and end (2300, 2305)
idx:  2300  Epoch:  116  loss:  178.2933  loss_sent:  2.01723  loss_word:  168.20715  Time cost:  6.513428449630737
Start and end (2305, 2310)
idx:  2305  Epoch:  116  loss:  196.03473  loss_sent:  1.5574759  loss_word:  188.24736  Time cost:  6.675695419311523
Start and end (2310, 2315)
idx:  2310  Epoch:  116  loss:  226.78693  loss_sent:  1.2810957  loss_word:  220.38147  Time cost:  6.700114727020264
Start and end (2315, 2320)
idx:  2315  Epoch:  116  loss:  197.16026  loss_sent:  1.3686907  loss_word:  190.3168  Time cost:  6.713260650634766
Start and end (2320, 2325)
idx:  2320  Epoch:  116  loss:  155.27032  loss_sent:  1.4687889  loss_word:  147.92638  Time cost:  6.697458982467651
Start and

W0724 12:54:25.701179 140197742454528 saver.py:1134] *******************************************************
W0724 12:54:25.701931 140197742454528 saver.py:1135] TensorFlow's V1 checkpoint format has been deprecated.
W0724 12:54:25.703800 140197742454528 saver.py:1136] Consider switching to the more efficient V2 format:
W0724 12:54:25.704243 140197742454528 saver.py:1137]    `tf.train.Saver(write_version=tf.train.SaverDef.V2)`
W0724 12:54:25.705804 140197742454528 saver.py:1138] now on by default.
W0724 12:54:25.706351 140197742454528 saver.py:1139] *******************************************************


Successfully Written to temporary
Epoch  116  is done. Saving the model ...
Start and end (0, 5)
idx:  0  Epoch:  117  loss:  306.0431  loss_sent:  2.2035294  loss_word:  295.02545  Time cost:  6.699774265289307
Start and end (5, 10)
idx:  5  Epoch:  117  loss:  195.7753  loss_sent:  2.1817558  loss_word:  184.86652  Time cost:  6.577568769454956
Start and end (10, 15)
idx:  10  Epoch:  117  loss:  254.49966  loss_sent:  1.750996  loss_word:  245.74469  Time cost:  6.66348123550415
Start and end (15, 20)
idx:  15  Epoch:  117  loss:  252.05351  loss_sent:  1.5523162  loss_word:  244.29192  Time cost:  6.678164482116699
Start and end (20, 25)
idx:  20  Epoch:  117  loss:  270.15604  loss_sent:  1.5204961  loss_word:  262.55353  Time cost:  6.657594442367554
Start and end (25, 30)
idx:  25  Epoch:  117  loss:  266.9403  loss_sent:  1.2484947  loss_word:  260.69788  Time cost:  6.559634447097778
Start and end (30, 35)
idx:  30  Epoch:  117  loss:  144.37234  loss_sent:  0.9423062  loss_wo

idx:  290  Epoch:  117  loss:  141.43349  loss_sent:  1.2407072  loss_word:  135.22993  Time cost:  6.52098274230957
Start and end (295, 300)
idx:  295  Epoch:  117  loss:  209.18544  loss_sent:  1.1002694  loss_word:  203.68408  Time cost:  6.543124437332153
Start and end (300, 305)
idx:  300  Epoch:  117  loss:  143.52861  loss_sent:  0.9501236  loss_word:  138.77802  Time cost:  6.546809196472168
Start and end (305, 310)
idx:  305  Epoch:  117  loss:  183.43944  loss_sent:  1.4701047  loss_word:  176.08891  Time cost:  6.514309883117676
Start and end (310, 315)
idx:  310  Epoch:  117  loss:  207.7017  loss_sent:  1.2908405  loss_word:  201.24748  Time cost:  6.60737419128418
Start and end (315, 320)
idx:  315  Epoch:  117  loss:  104.48891  loss_sent:  1.2189769  loss_word:  98.394035  Time cost:  6.553439140319824
Start and end (320, 325)
idx:  320  Epoch:  117  loss:  220.7632  loss_sent:  1.0452548  loss_word:  215.53693  Time cost:  6.708653450012207
Start and end (325, 330)
idx

idx:  580  Epoch:  117  loss:  284.6357  loss_sent:  1.0177848  loss_word:  279.54678  Time cost:  6.750156879425049
Start and end (585, 590)
idx:  585  Epoch:  117  loss:  271.42517  loss_sent:  1.4857883  loss_word:  263.99622  Time cost:  6.945807218551636
Start and end (590, 595)
idx:  590  Epoch:  117  loss:  215.48586  loss_sent:  1.1522871  loss_word:  209.7244  Time cost:  6.81252384185791
Start and end (595, 600)
idx:  595  Epoch:  117  loss:  214.94214  loss_sent:  1.2504609  loss_word:  208.68983  Time cost:  6.684616327285767
Start and end (600, 605)
idx:  600  Epoch:  117  loss:  252.13863  loss_sent:  1.2400454  loss_word:  245.9384  Time cost:  6.706771612167358
Start and end (605, 610)
idx:  605  Epoch:  117  loss:  172.0123  loss_sent:  1.0169923  loss_word:  166.92737  Time cost:  6.979783535003662
Start and end (610, 615)
idx:  610  Epoch:  117  loss:  285.20636  loss_sent:  1.1862113  loss_word:  279.27527  Time cost:  7.063607692718506
Start and end (615, 620)
idx:

KeyboardInterrupt: 