In [1]:
import numpy as np
import os
import tensorflow as tf
import random
import re
from time import sleep
import math
import datetime
import time
import pickle
import layers
import functions
import decoder
import encoder
from dataset import PubMed_Dataset
import data_batcher
import models
import summarizer
import data_utils
from sys import stdout
from vocab import Vocab_Lookup
from rouge import Rouge

In [3]:
meta_dir = os.path.join(os.getcwd(), 'PubMed')
log_dir = os.path.join(meta_dir, 'logs')
weights_dir = os.path.join(meta_dir, 'weights')
params_dir = os.path.join(meta_dir, 'params')
data_dir = os.path.join(meta_dir, 'data_cache')

coherence_ckpt_file = os.path.join(meta_dir, 'weights/RNN_Classifier_checkpoints_20171117_114517/RNN_Classifier_weights_epoch_0_itr_38000')
coherence_params = pickle.load(open(os.path.join(meta_dir, 'params/RNN_Classifier_params_20171117_114517.pickle'), 'rb'))
summarize_ckpt_file = os.path.join(meta_dir, 'weights/Seq2Seq_Basic_Attn_checkpoints_20171116_151612/Seq2Seq_Basic_Attn_weights_epoch_1_itr_130000')
summarize_params = pickle.load(open(os.path.join(params_dir, 'Seq2Seq_Basic_Attn_params_20171116_151612.pickle'), 'rb'))

In [5]:
vocab_lookup = pickle.load(open(os.path.join(meta_dir, "vocab_lookup_30000.pickle"), "rb"))

In [6]:
train_files = []
val_files = []
test_files = []
for filename in os.listdir(data_dir):
    if 'train' in filename:
        train_files.append(os.path.join(data_dir, filename))
    elif 'val' in filename:
        val_files.append(os.path.join(data_dir, filename))
    elif 'test' in filename:
        test_files.append(os.path.join(data_dir, filename))

In [7]:
def data_partition_loader(partition_files):
    i = 0
    while True:
        partition_file = partition_files[i]
        i += 1
        yield pickle.load(open(partition_file, 'rb'))

In [8]:
train_partition_loader = data_partition_loader(train_files)
val_partition_loader = data_partition_loader(val_files)
test_partition_loader = data_partition_loader(test_files)

train_data = next(train_partition_loader)
val_data = next(val_partition_loader)
test_data = next(test_partition_loader)

In [9]:
batch_size = 128
train_batcher = data_batcher.Data_Batcher(train_data, batch_size)
val_batcher = data_batcher.Data_Batcher(val_data, batch_size)
test_batcher = data_batcher.Data_Batcher(test_data, batch_size)
deploy_batcher = data_batcher.Data_Batcher(val_data, 1)

In [10]:
d_pad_len = summarize_params['max_enc_len']
s_pad_len = summarize_params['max_dec_len']
embd_dim = summarize_params['embedding_dim']
hidden_size = summarize_params['hidden_size']
n_layers = summarize_params['n_layers']
vocab_size = vocab_lookup.num_words
dropout_keep_prob = 0.8
bidirectional = summarize_params['bidirectional']
shared_embeddings = summarize_params['shared_embeddings']
weight_tying = summarize_params['weight_tying']
teacher_forcing_ratios = [1.0] 
teacher_forcing_steps = [1]

display_interval = 100
val_interval = 1000
deploy_interval = 1000
n_iters = 200000 

lr = 0.0001
DEVICE = 1
USE_CUDA = True
DEBUG_MODE = False

pretrained_embeddings = None 

In [28]:
from nltk.tokenize import ToktokTokenizer
toktok = ToktokTokenizer()
rouge = Rouge()

def convert_text_to_ids(text, word2id_dict, pad_len):
    tokenized = data_utils.tokenize_sentences(text, tokenizer=toktok)
    ids = []
    for word in tokenized:
        try:
            idx = word2id_dict[word]
        except:
            idx = word2id_dict['UNK']
        ids.append(idx)
    if len(ids) < pad_len:
        ids += [match_lstm_word2id['PAD'] for i in range(pad_len - len(ids))]
    elif len(ids) > pad_len:
        ids = ids[:pad_len]
    return ids

def truncate_at_eos(string):
    string = string.split(' ')
    try:
        new_string = ' '.join(string[:string.index('EOS')+1])
    except:
        new_string = ' '.join(string)
    return new_string

def ids_to_text(ids, vocab_lookup):
    texts = [' '.join([vocab_lookup.convert_id2word(idx) for idx in tokens]) for tokens in ids]
    truncated_texts = [truncate_at_eos(string) for string in texts]
    return truncated_texts

def text_to_ids(texts, vocab_lookup, pad_len):
    ids = [[vocab_lookup.convert_word2id(idx) for idx in text.split(' ')] for text in texts]
    ids = [seq + [vocab_lookup.convert_word2id('EOS')] for seq in ids]
    new_ids = []
    for seq in ids:
        if len(seq) < pad_len:
            seq += [vocab_lookup.convert_word2id('PAD') for i in range(pad_len - len(seq))]
        elif len(seq) > pad_len:
            seq = seq[:pad_len-1] + [vocab_lookup.convert_word2id('EOS')]
        new_ids.append(seq)
    return new_ids

def get_text_length(texts):
    return [len(text.split(' ')) for text in texts]

def calc_rouge_reward(summaries, target_texts):
    rouge_scores = rouge.get_scores(summaries, target_texts)
    rouge_reward = np.array([scores['rouge-l']['f'] for scores in rouge_scores])
    return rouge_reward

def calc_unk_reward(summaries):
    summary_lengths = [len(summary.split(' ')) for summary in summaries]
    unk_count = [sum([1 if word == 'UNK' else 0 for word in summary.split(' ')]) for summary in summaries]
    unk_reward = 1 - np.array(unk_count)/np.array(summary_lengths)
    return unk_reward

def calc_length_reward(summaries, max_len):
    summary_lengths = [len(summary.split(' ')) for summary in summaries]
    length_reward = np.array(summary_lengths)/max_len
    return length_reward

def calc_lm_reward(summaries, pad_len):
    summary_ids = text_to_ids(summaries, vocab_lookup, pad_len)
    summary_lens = [len(summary.split(' ')) for summary in summaries]
    feed_dict = {language_net.enc_inputs : summary_ids,
                 language_net.enc_lens : summary_lens,
                 language_net.seed_length : len(summary_ids[0]),
                 language_net.dropout_keep_prob : 1.0}
    lm_reward = language_sess.run(normalized_probs, feed_dict=feed_dict)
    return lm_reward

def calc_coherence_reward(summaries, pad_len):
    summary_ids = text_to_ids(summaries, vocab_lookup, pad_len)
    summary_lens = [len(summary.split(' ')) for summary in summaries]
    feed_dict = {coherence_net.inputs : summary_ids,
                 coherence_net.input_lens : summary_lens,
                 coherence_net.dropout_keep_prob : 1.0}
    coherence_reward = coherence_sess.run(coherence_score, feed_dict=feed_dict)
    return np.squeeze(coherence_reward)

def calculate_reward(summary_ids, target_ids, unk_weight=0.1, length_weight=0.0, rouge_weight=0.7, lm_weight=0.0,
                     coherence_weight=0.2):
    summaries = ids_to_text(summary_ids, vocab_lookup)
    targets = ids_to_text(target_ids, vocab_lookup)

    rouge_reward = calc_rouge_reward(summaries, targets)
    coherence_reward = calc_coherence_reward(summaries, s_pad_len)
    unk_reward = calc_unk_reward(summaries)
#     length_reward = calc_length_reward(summaries, s_pad_len)
#     lm_reward = calc_lm_reward(summaries, s_pad_len)
    total_reward = unk_weight*unk_reward + rouge_weight*rouge_reward + coherence_weight*coherence_reward
    return total_reward.astype(np.float32)

def get_seq_len_wrapper(ids):
    texts = ids_to_text(ids, vocab_lookup)
    return np.array([len(s.split(' ')) for s in texts], dtype=np.int32)

In [29]:
device_name = '/gpu:{}'.format(DEVICE) if USE_CUDA else '/cpu:{}'.format(DEVICE)

if USE_CUDA:
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(DEVICE)

tf.reset_default_graph()
baseline_graph = tf.Graph()
summarize_graph = tf.Graph()
coherence_graph = tf.Graph()

with coherence_graph.as_default(), tf.device(device_name):
    coherence_net = models.RNN_Classifier(1, coherence_params['vocab_size'], s_pad_len, 
                                          embedding_dim=coherence_params['embedding_dim'], 
                                          hidden_size=coherence_params['hidden_size'], 
                                          n_layers=coherence_params['n_layers'], 
                                          bidirectional=coherence_params['bidirectional'], 
                                          pretrained_embeddings=None, trainable_embeddings=True)
    coherence_vars = tf.trainable_variables()
    coherence_score = tf.nn.sigmoid(coherence_net.logits)
    coherence_score = tf.to_float(coherence_score >= 0.5)
    coherence_init = tf.global_variables_initializer()
    functions.count_params(tf.trainable_variables())
    for var in tf.trainable_variables(): print(var)

with baseline_graph.as_default(), tf.device(device_name):
    baseline_net = models.Seq2Seq_Basic_Attn(vocab_size, d_pad_len, s_pad_len, embedding_dim=embd_dim, 
                                              hidden_size=hidden_size, n_layers=n_layers, bidirectional=bidirectional, 
                                              pretrained_embeddings=pretrained_embeddings, trainable_embeddings=True, 
                                              shared_embeddings=shared_embeddings, weight_tying=weight_tying)
    baseline_vars = tf.trainable_variables()
    baseline_softmax = tf.nn.softmax(baseline_net.logits)
    baseline_predictions = tf.to_int32(tf.argmax(baseline_softmax, axis=2))
    
    baseline_init = tf.global_variables_initializer()
    functions.count_params(tf.trainable_variables())
    for var in tf.trainable_variables(): print(var)
    
with summarize_graph.as_default(), tf.device(device_name):
    summarize_net = models.Seq2Seq_Basic_Attn(vocab_size, d_pad_len, s_pad_len, embedding_dim=embd_dim, 
                                              hidden_size=hidden_size, n_layers=n_layers, bidirectional=bidirectional, 
                                              pretrained_embeddings=pretrained_embeddings, trainable_embeddings=True, 
                                              shared_embeddings=shared_embeddings, weight_tying=weight_tying)
    summarize_vars = tf.trainable_variables()
    summarize_logits = summarize_net.logits
    summarize_predictions = summarize_net.generated_words
    summarize_softmax = tf.nn.softmax(summarize_logits)

    indices = tf.reshape(summarize_predictions, [-1])
    indices = indices + tf.range(tf.shape(summarize_softmax)[0]*tf.shape(summarize_softmax)[1])*tf.shape(summarize_softmax)[2]
    action_probs = tf.reshape(tf.gather(tf.reshape(summarize_softmax, [-1]), indices), [-1, s_pad_len])
    
    log_probs = tf.squeeze(tf.log(action_probs))
    generated_seq_lens = tf.py_func(get_seq_len_wrapper, [summarize_predictions], tf.int32)
    generated_seq_lens = tf.reshape(generated_seq_lens, [-1])
    mask = tf.sequence_mask(generated_seq_lens, maxlen=s_pad_len)
    masked_log_probs = tf.to_float(mask)*log_probs
    seq_probs = tf.reduce_sum(masked_log_probs, axis=1)
    
    targets = tf.placeholder(tf.int32, shape=[None, s_pad_len], name='targets')
    reward = tf.py_func(calculate_reward, [summarize_predictions, targets], tf.float32)
    baseline_reward = tf.placeholder(tf.float32, shape=[None], name='baseline_reward')
    
    reward_loss = seq_probs*(baseline_reward - reward)
    loss = tf.reduce_mean(reward_loss)

    gradients, _ = tf.clip_by_global_norm(tf.gradients(loss, summarize_vars), 1) 
    gradient_norm = tf.global_norm(gradients)
    opt_func = tf.train.AdamOptimizer(learning_rate=lr)
    optimizer = opt_func.apply_gradients(zip(gradients, summarize_vars)) 
    
    summarize_init = tf.global_variables_initializer()
    functions.count_params(tf.trainable_variables())
    for var in tf.trainable_variables(): print(var)

# Trainable Parameters: 4456385
<tf.Variable 'embeddings_layer/embeddings:0' shape=(30000, 100) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/gates/kernel:0' shape=(356, 512) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/gates/bias:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/candidate/kernel:0' shape=(356, 256) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/candidate/bias:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/gates/kernel:0' shape=(512, 512) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/gates/bias:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/candidate/kernel:0' shape=(512, 256) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/candidate/bias:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_2/gru_cell/

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


# Trainable Parameters: 8427092
<tf.Variable 'embeddings_layer/embeddings:0' shape=(30000, 100) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/gates/kernel:0' shape=(612, 1024) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/gates/bias:0' shape=(1024,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/candidate/kernel:0' shape=(612, 512) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_0/gru_cell/candidate/bias:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/gates/kernel:0' shape=(1024, 1024) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/gates/bias:0' shape=(1024,) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/candidate/kernel:0' shape=(1024, 512) dtype=float32_ref>
<tf.Variable 'encoder/multi_rnn_cell/cell_1/gru_cell/candidate/bias:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'attention/attn_W:0' shape=(512, 5

In [None]:
params = {key : value for key, value in summarize_net.__dict__.items() if not key.startswith('__') and not key.startswith('_')
          and not callable(key) and str(type(value)).find('tensorflow') == -1}
model_name = summarize_net.__class__.__name__ + '_RL'

if not DEBUG_MODE:
    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
    log_file = os.path.join(log_dir, '{}_train_log_{}.txt'.format(model_name, timestamp))
    log_description = '0.1 unk/0.7 rouge-l-f/0.2 coherence reward, 0.0005 lr, 32 batch size, params: {}\n'.format(params)
    log = open(log_file, 'w')
    log.close()
    functions.write_to_log(log_description, log_file)

baseline_sess = tf.InteractiveSession(graph=baseline_graph, config=tf.ConfigProto(allow_soft_placement=True))
summarize_sess = tf.InteractiveSession(graph=summarize_graph, config=tf.ConfigProto(allow_soft_placement=True)) 
coherence_sess = tf.InteractiveSession(graph=coherence_graph, config=tf.ConfigProto(allow_soft_placement=True))

summarize_sess.run(summarize_init)
if not DEBUG_MODE:
    saver = tf.train.Saver(max_to_keep=100)
    
baseline_sess.run(baseline_init)
coherence_sess.run(coherence_init)

baseline_saver = tf.train.Saver(var_list=baseline_vars)
baseline_saver.restore(baseline_sess, summarize_ckpt_file)

summarize_saver = tf.train.Saver(var_list=summarize_vars)
summarize_saver.restore(summarize_sess, summarize_ckpt_file)

coherence_saver = tf.train.Saver(var_list=coherence_vars)
coherence_saver.restore(coherence_sess, coherence_ckpt_file)

best_val_reward = 0.0
epoch = 0
for itr in range(1, n_iters+1):
    examples, ep = train_batcher.next_batch()
    if ep == 1:
        try:
            train_data = next(train_partition_loader)
        except:
            epoch += 1
            train_partition_loader = data_partition_loader(train_files)
            train_data = next(train_partition_loader)
        train_batcher = data_batcher.Data_Batcher(train_data, batch_size)
        examples, ep = train_batcher.next_batch()
    
    inputs = [example.source_ids for example in examples]
    input_lens = [example.source_len for example in examples]
    dummy_dec_inputs = np.zeros_like([example.target_ids for example in examples], dtype=int)
    #contexts = [example.source_text for example in examples]
    target_ids = [example.target_ids for example in examples]

    # baseline model
    feed_dict = {baseline_net.enc_inputs : inputs,
                 baseline_net.dec_inputs : dummy_dec_inputs,
                 baseline_net.enc_lens : input_lens,
                 baseline_net.dropout_keep_prob : 1.0,
                 baseline_net.teacher_forcing : False,
                 baseline_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                 baseline_net.is_training : False,
                 baseline_net.sample_decoding : False}

    baseline_ids = baseline_sess.run(baseline_predictions, feed_dict=feed_dict)
    
    # greedy decoding
    feed_dict = {summarize_net.enc_inputs : inputs,
                 summarize_net.dec_inputs : dummy_dec_inputs,
                 summarize_net.enc_lens : input_lens,
                 summarize_net.dropout_keep_prob : 1.0,
                 summarize_net.teacher_forcing : False,
                 summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                 summarize_net.is_training : False,
                 summarize_net.sample_decoding : False,
                 targets : target_ids}

    greedy_ids = summarize_sess.run(summarize_predictions, feed_dict=feed_dict)
    greedy_reward = calculate_reward(greedy_ids, target_ids)
    
    # run REINFORCE optimization
    feed_dict = {summarize_net.enc_inputs : inputs,
                 summarize_net.dec_inputs : dummy_dec_inputs,
                 summarize_net.enc_lens : input_lens,
                 summarize_net.dropout_keep_prob : 1.0,
                 summarize_net.teacher_forcing : False,
                 summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                 summarize_net.is_training : False,
                 summarize_net.sample_decoding : True,
                 targets : target_ids,
                 baseline_reward : greedy_reward}
    rl_loss, sampled_reward, grad_norm, _ = summarize_sess.run([loss, reward, gradient_norm, optimizer], feed_dict=feed_dict)
    
    scores = [(score['rouge-1']['r'], score['rouge-2']['r'], score['rouge-l']['r']) for score 
          in rouge.get_scores(ids_to_text(greedy_ids, vocab_lookup), ids_to_text(target_ids, vocab_lookup))]
    rouge_1, rouge_2, rouge_L = tuple(np.mean(np.array(scores), axis=0))
    
    if itr % display_interval == 0 or itr == 1:     
        log_string = ('[%d, %5d] loss: %.3f, greedy reward: %.3f, sampled reward: %.3f, rouge-1: %.3f, rouge-2: %.3f, \
rouge-L: %.3f, grad_norm: %.3f' % (epoch, itr, rl_loss, np.mean(greedy_reward), np.mean(sampled_reward), 
                                   rouge_1, rouge_2, rouge_L, grad_norm))

        if not DEBUG_MODE:
            functions.write_to_log(log_string, log_file)
        print(log_string)
    
    if itr % val_interval == 0:
        val_loss, val_greedy_reward, val_sampled_reward = 0.0, 0.0, 0.0
        baseline_rouge_scores, model_rouge_scores = [], []
        for i in range(int(len(val_batcher.data)/val_batcher.batch_size)):
            examples, ep = val_batcher.next_batch()
            if ep == 1:
                try:
                    val_data = next(val_partition_loader)
                except:
                    val_partition_loader = data_partition_loader(val_files)
                    val_data = next(val_partition_loader)
                val_batcher = data_batcher.Data_Batcher(val_data, batch_size)
                examples, ep = val_batcher.next_batch()

            inputs = [example.source_ids for example in examples]
            input_lens = [example.source_len for example in examples]
            dummy_dec_inputs = np.zeros_like([example.target_ids for example in examples], dtype=int)
            #contexts = [example.source_text for example in examples]
            target_ids = [example.target_ids for example in examples]

            # baseline model
            feed_dict = {baseline_net.enc_inputs : inputs,
                         baseline_net.dec_inputs : dummy_dec_inputs,
                         baseline_net.enc_lens : input_lens,
                         baseline_net.dropout_keep_prob : 1.0,
                         baseline_net.teacher_forcing : False,
                         baseline_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                         baseline_net.is_training : False,
                         baseline_net.sample_decoding : False}

            baseline_ids = baseline_sess.run(baseline_predictions, feed_dict=feed_dict)

            # greedy decoding
            feed_dict = {summarize_net.enc_inputs : inputs,
                         summarize_net.dec_inputs : dummy_dec_inputs,
                         summarize_net.enc_lens : input_lens,
                         summarize_net.dropout_keep_prob : 1.0,
                         summarize_net.teacher_forcing : False,
                         summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                         summarize_net.is_training : False,
                         summarize_net.sample_decoding : False,
                         targets : target_ids}

            greedy_ids = summarize_sess.run(summarize_predictions, feed_dict=feed_dict)
            greedy_reward = calculate_reward(greedy_ids, target_ids)

            # run REINFORCE optimization
            feed_dict = {summarize_net.enc_inputs : inputs,
                         summarize_net.dec_inputs : dummy_dec_inputs,
                         summarize_net.enc_lens : input_lens,
                         summarize_net.dropout_keep_prob : 1.0,
                         summarize_net.teacher_forcing : False,
                         summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                         summarize_net.is_training : False,
                         summarize_net.sample_decoding : True,
                         targets : target_ids,
                         baseline_reward : greedy_reward}
            rl_loss, sampled_reward = summarize_sess.run([loss, reward], feed_dict=feed_dict)
            val_loss += ((rl_loss - val_loss)/(i+1))
            val_greedy_reward += ((np.mean(greedy_reward) - val_greedy_reward)/(i+1))
            val_sampled_reward += ((np.mean(sampled_reward)- val_sampled_reward)/(i+1))
            #val_loss.append(rl_loss)
            #val_greedy_reward.append(np.mean(greedy_reward))
            #val_sampled_reward.append(np.mean(sampled_reward))
            
            baseline_scores = [(score['rouge-1']['r'], score['rouge-2']['r'], score['rouge-l']['r']) for score 
              in rouge.get_scores(ids_to_text(baseline_ids, vocab_lookup), ids_to_text(target_ids, vocab_lookup))]
            baseline_rouge_scores.append(np.mean(np.array(baseline_scores), axis=0))
            model_scores = [(score['rouge-1']['r'], score['rouge-2']['r'], score['rouge-l']['r']) for score 
              in rouge.get_scores(ids_to_text(greedy_ids, vocab_lookup), ids_to_text(target_ids, vocab_lookup))]
            model_rouge_scores.append(np.mean(np.array(model_scores), axis=0))
            
            if (i+1)*val_batcher.batch_size >= 10000:
                break
        #val_loss = np.mean(val_loss)
        #val_greedy_reward = np.mean(val_greedy_reward)
        #val_sampled_reward = np.mean(val_sampled_reward)
        baseline_rouge_scores = np.mean(baseline_rouge_scores, axis=0)
        model_rouge_scores = np.mean(model_rouge_scores, axis=0)
        log_string = ('Validation - loss: %.3f, greedy reward: %.3f, sampled reward: %.3f, baseline rouge 1,2,L: \
{%.3f, %.3f, %.3f}, model rouge 1,2,L {%.3f, %.3f, %.3f}' % (val_loss, val_greedy_reward, val_sampled_reward, 
                                                             baseline_rouge_scores[0], baseline_rouge_scores[1], 
                                                             baseline_rouge_scores[2], model_rouge_scores[0],
                                                             model_rouge_scores[1], model_rouge_scores[2]))

        if not DEBUG_MODE:
            functions.write_to_log(log_string, log_file)
        print(log_string)
            
        if not DEBUG_MODE:
            if val_greedy_reward > best_val_reward:
                best_val_reward = val_greedy_reward
                weights_prefix = '{}_weights_epoch_{}'.format(model_name, epoch)
                log_msg = "Weights saved in file: {}\n".format(os.path.join(weights_dir, weights_prefix))
                print(log_msg)
                #saver.save(summarize_sess, os.path.join(weights_dir, weights_prefix))
                functions.write_to_log(log_msg, log_file) 
                
    if itr % deploy_interval == 0:
        examples, ep = deploy_batcher.next_batch()
    
        inputs = [example.source_ids for example in examples]
        input_lens = [example.source_len for example in examples]
        dummy_dec_inputs = np.zeros_like([example.target_ids for example in examples], dtype=int)
        #contexts = [example.source_text for example in examples]
        target_ids = [example.target_ids for example in examples]

        # baseline model
        feed_dict = {baseline_net.enc_inputs : inputs,
                     baseline_net.dec_inputs : dummy_dec_inputs,
                     baseline_net.enc_lens : input_lens,
                     baseline_net.dropout_keep_prob : 1.0,
                     baseline_net.teacher_forcing : False,
                     baseline_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                     baseline_net.is_training : False,
                     baseline_net.sample_decoding : False}

        baseline_ids = baseline_sess.run(baseline_predictions, feed_dict=feed_dict)
    
        # greedy decoding
        feed_dict = {summarize_net.enc_inputs : inputs,
                     summarize_net.dec_inputs : dummy_dec_inputs,
                     summarize_net.enc_lens : input_lens,
                     summarize_net.dropout_keep_prob : 1.0,
                     summarize_net.teacher_forcing : False,
                     summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                     summarize_net.is_training : False,
                     summarize_net.sample_decoding : False,
                     targets : target_ids}

        greedy_ids = summarize_sess.run(summarize_predictions, feed_dict=feed_dict)
        greedy_reward = calculate_reward(greedy_ids, target_ids)

        # run REINFORCE optimization
        feed_dict = {summarize_net.enc_inputs : inputs,
                     summarize_net.dec_inputs : dummy_dec_inputs,
                     summarize_net.enc_lens : input_lens,
                     summarize_net.dropout_keep_prob : 1.0,
                     summarize_net.teacher_forcing : False,
                     summarize_net.teacher_forcing_mask : [1 for _ in range(s_pad_len)],
                     summarize_net.is_training : False,
                     summarize_net.sample_decoding : True,
                     targets : target_ids,
                     baseline_reward : greedy_reward}
        sampled_ids = summarize_sess.run(summarize_predictions, feed_dict=feed_dict)    

        baseline_summaries = ids_to_text(baseline_ids, vocab_lookup)
        sampled_summaries = ids_to_text(sampled_ids, vocab_lookup)
        greedy_summaries = ids_to_text(greedy_ids, vocab_lookup)
        log_string = ('DOCUMENT:\n{}\nBASELINE:\n{}\nGREEDY:\n{}\nSAMPLED:\n{}\nGROUND TRUTH:\n{}'
                      .format(examples[0].source_text, baseline_summaries[0], greedy_summaries[0],
                              sampled_summaries[0], examples[0].target_text))
        print(log_string)
        if not DEBUG_MODE:
            functions.write_to_log(log_string, log_file)

INFO:tensorflow:Restoring parameters from /home/ai2-leia/Documents/code/paul/deep_NLP/Summarization/PubMed/weights/Seq2Seq_Basic_Attn_checkpoints_20171116_151612/Seq2Seq_Basic_Attn_weights_epoch_1_itr_130000
INFO:tensorflow:Restoring parameters from /home/ai2-leia/Documents/code/paul/deep_NLP/Summarization/PubMed/weights/Seq2Seq_Basic_Attn_checkpoints_20171116_151612/Seq2Seq_Basic_Attn_weights_epoch_1_itr_130000
INFO:tensorflow:Restoring parameters from /home/ai2-leia/Documents/code/paul/deep_NLP/Summarization/PubMed/weights/RNN_Classifier_checkpoints_20171117_114517/RNN_Classifier_weights_epoch_0_itr_38000
[0,     1] loss: -3.128, greedy reward: 0.532, sampled reward: 0.478, rouge-1: 0.426, rouge-2: 0.191, rouge-L: 0.373, grad_norm: 1.000
[0,   100] loss: -2.227, greedy reward: 0.556, sampled reward: 0.508, rouge-1: 0.427, rouge-2: 0.176, rouge-L: 0.376, grad_norm: 0.756
[0,   200] loss: -1.625, greedy reward: 0.571, sampled reward: 0.526, rouge-1: 0.441, rouge-2: 0.193, rouge-L: 0.38