In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import importlib
import numpy as np
import tensorflow as tf
import texar as tx

import tensorflow as tf

import texar as tx
from texar.modules import WordEmbedder, UnidirectionalRNNEncoder, \
        MLPTransformConnector, AttentionRNNDecoder, \
        GumbelSoftmaxEmbeddingHelper, Conv1DClassifier
from texar.core import get_train_op
from texar.utils import collect_trainable_variables, get_batch_size


class RLModel(object):
    """Control
    """

    def __init__(self, inputs, vocab,hparams=None):
        self._hparams = tx.HParams(hparams, None)
        self._build_model(inputs, vocab)

    def _build_model(self, inputs, vocab):
        """Builds the model.
        """
        embedder = WordEmbedder(
            vocab_size=vocab.size,
            hparams=self._hparams.embedder)
        encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder)

        # text_ids for encoder, with BOS token removed
        enc_text_ids = inputs['text_ids'][:, 1:]
        enc_outputs, final_state = encoder(embedder(enc_text_ids),
                                           sequence_length=inputs['length']-1)

        z = final_state
        h = z

        # Teacher-force decoding and the auto-encoding loss for G
        decoder = AttentionRNNDecoder(
            memory=enc_outputs,
            memory_sequence_length=inputs['length']-1,
            cell_input_fn=lambda inputs, attention: inputs,
            vocab_size=vocab.size,
            hparams=self._hparams.decoder)

        connector = MLPTransformConnector(decoder.state_size)

        g_outputs, _, _ = decoder(
            initial_state=connector(h), inputs=inputs['text_ids'],
            embedding=embedder, sequence_length=inputs['length']-1)

        loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(
            labels=inputs['text_ids'][:, 1:],
            logits=g_outputs.logits,
            sequence_length=inputs['length']-1,
            average_across_timesteps=True,
            sum_over_timesteps=False)       
        
        # Greedy decoding, used in eval (and RL training)
        start_tokens = tf.ones_like(inputs['labels']) * vocab.bos_token_id
        end_token = vocab.eos_token_id
        outputs, _, length = decoder(
            decoding_strategy='infer_greedy', initial_state=connector(h),
            embedding=embedder, start_tokens=start_tokens, end_token=end_token)
        
        ######################## RL training
        loss_g_RL2 = tx.losses.sequence_sparse_softmax_cross_entropy(
        labels=outputs.sample_id,
        logits=outputs.logits,
        #sequence_length=tf.convert_to_tensor(outputs.sample_id.shape[1]),
        sequence_length = 30, #好像，如果设置了sum_over_timesteps=True之后，sequence_length就没有影响了，因为反正也是求loss的sum
        average_across_timesteps=False,
        sum_over_timesteps=True)
        
        g_vars_RL2 = collect_trainable_variables([connector, decoder])       
        train_op_g_RL2 = get_train_op(loss_g_RL2, g_vars_RL2, hparams=self._hparams.opt)
        
        self.train_g_RL2 = {
            "loss_g_RL2":loss_g_RL2,
            "train_op_g_RL2": train_op_g_RL2
        }
        
        
        
        
        # Creates optimizers
        g_vars = collect_trainable_variables([embedder, encoder, connector, decoder])       
        train_op_g_ae = get_train_op(loss_g_ae, g_vars, hparams=self._hparams.opt)
        
        # Interface tensors
        self.train_g = {
            "loss_g_ae":loss_g_ae,
            "train_op_g_ae": train_op_g_ae
        }
        self.samples = {
            "batch_size": get_batch_size(inputs['text_ids']),
            "original": inputs['text_ids'][:, 1:],
            "transferred": outputs.sample_id #outputs 是infer_greedy的结果
        }       
        
        
config = importlib.import_module('RLconfig')

# Data
train_data = tx.data.MultiAlignedData(config.train_data)
val_data = tx.data.MultiAlignedData(config.val_data)
test_data = tx.data.MultiAlignedData(config.test_data)
vocab = train_data.vocab(0)

# Each training batch is used twice: once for updating the generator and
# once for updating the discriminator. Feedable data iterator is used for
# such case.
iterator = tx.data.FeedableDataIterator({'train_g': train_data,'val': val_data, 'test': test_data})
batch = iterator.get_next()

model = RLModel(batch, vocab, config.model)

def _train_epoch(sess, epoch, verbose=True):
    avg_meters_g = tx.utils.AverageRecorder(size=10)

    step = 0
    while True:
        try:
            step += 1
            
            feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')}
            vals_g = sess.run(model.train_g, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if verbose and (step == 1 or step % 5 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))

            '''
            if verbose and step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''

        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break
            
def _eval_epoch(sess, epoch, val_or_test='val'):
    avg_meters = tx.utils.AverageRecorder()
    while True:
        try:
            feed_dict = {
                iterator.handle: iterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

            vals = sess.run(model.samples, feed_dict=feed_dict)

            batch_size = vals.pop('batch_size')

            # Computes BLEU
            samples = tx.utils.dict_pop(vals, list(model.samples.keys()))
            hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
            print("samples: ",hyps)

            refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
            refs = np.expand_dims(refs, axis=1)
            print("reference: ",refs)

            bleu = tx.evals.corpus_bleu_moses(refs, hyps)
            vals['bleu'] = bleu

            avg_meters.add(vals, weight=batch_size)

            ###################################!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # Writes samples
            '''
            tx.utils.write_paired_text(
                refs.squeeze(), hyps,
                os.path.join(config.sample_path, 'val.%d'%epoch),
                append=True, mode='v')
            '''
        except tf.errors.OutOfRangeError:
            print('{}: {}'.format(
                val_or_test, avg_meters.to_str(precision=4)))
            break
    return avg_meters.avg()

def write_predif_AE(sess,val_or_test='test'):
    iterator.initialize_dataset(sess)
    sample_sents=[]
    ref_sents=[]
    i=1
    while True:
        print("batch: ",i)
        i=i+1
        try:
            feed_dict = {
                iterator.handle: iterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

            vals = sess.run(model.samples, feed_dict=feed_dict)

            batch_size = vals.pop('batch_size')

            # Computes BLEU
            samples = tx.utils.dict_pop(vals, list(model.samples.keys()))
            hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
            #print("samples: ",hyps)

            refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
            refs = np.expand_dims(refs, axis=1)
            #print("reference: ",refs)
            
            sample_sents.extend(hyps.tolist())
            ref_sents.extend(refs.tolist())
            
            #dif = np.abs(predict_sentiment(str(hyps[0]),frnn,fTEXT)-predict_sentiment(str(hyps[0]),mrnn,mTEXT))

        except tf.errors.OutOfRangeError:
            print("all batches finished")
            break
    return sample_sents,ref_sents

sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())

saver = tf.train.Saver(max_to_keep=None)

iterator.initialize_dataset(sess)

'''
inputs = batch
self = model

"""Builds the model.
"""
embedder = WordEmbedder(
    vocab_size=vocab.size,
    hparams=self._hparams.embedder)
encoder = UnidirectionalRNNEncoder(hparams=self._hparams.encoder)

# text_ids for encoder, with BOS token removed
enc_text_ids = inputs['text_ids'][:, 1:]
enc_outputs, final_state = encoder(embedder(enc_text_ids),
                                   sequence_length=inputs['length']-1)
#z = final_state[:, self._hparams.dim_c:]
z = final_state
h = z


# Teacher-force decoding and the auto-encoding loss for G
decoder = AttentionRNNDecoder(
    memory=enc_outputs,
    memory_sequence_length=inputs['length']-1,
    cell_input_fn=lambda inputs, attention: inputs,
    vocab_size=vocab.size,
    hparams=self._hparams.decoder)

connector = MLPTransformConnector(decoder.state_size)

g_outputs, _, _ = decoder(
    initial_state=connector(h), inputs=inputs['text_ids'],
    embedding=embedder, sequence_length=inputs['length']-1)

loss_g_ae = tx.losses.sequence_sparse_softmax_cross_entropy(
    labels=inputs['text_ids'][:, 1:],
    logits=g_outputs.logits,
    sequence_length=inputs['length']-1,
    average_across_timesteps=True,
    sum_over_timesteps=False)

start_tokens = tf.ones_like(inputs['labels']) * vocab.bos_token_id
end_token = vocab.eos_token_id
outputs, _, length = decoder(
decoding_strategy='infer_greedy', initial_state=connector(h),
embedding=embedder, start_tokens=start_tokens, end_token=end_token)

sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

#batch_size=1的时候， 不需要reshape
loss_g_RL2 = tx.losses.sequence_sparse_softmax_cross_entropy(
    labels=outputs.sample_id,
    logits=outputs.logits,
    #sequence_length=tf.convert_to_tensor(outputs.sample_id.shape[1]),
    sequence_length = 30, #好像，如果设置了sum_over_timesteps=True之后，sequence_length就没有影响了，因为反正也是求loss的sum
    average_across_timesteps=False,
    sum_over_timesteps=True)



feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')} #train mode
vals = sess.run({'a':model.samples,'b':outputs.logits}, feed_dict=feed_dict)

batch_size = vals['a'].pop('batch_size')
# Computes BLEU
samples = tx.utils.dict_pop(vals['a'], list(model.samples.keys()))
hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
refs = np.expand_dims(refs, axis=1)
#dif = np.abs(predict_sentiment(str(hyps[0]),frnn,fTEXT)-predict_sentiment(str(hyps[0]),mrnn,mTEXT))


dif = np.abs(len(hyps[0])/10)
#dif = np.abs(predict_sentiment(str(hyps[0]),frnn,fTEXT)-predict_sentiment(str(hyps[0]),mrnn,mTEXT))

if dif>1:
    loss_g_RL = dif*loss_g_RL2
else:
    loss_g_RL = loss_g_RL2

g_vars_RL = collect_trainable_variables([connector, decoder])       
train_op_g_RL = get_train_op(loss_g_RL, g_vars_RL, hparams=self._hparams.opt)

# Interface tensors
model.train_g_RL = {
    #'dif':dif,
    'loss_g_RL2':loss_g_RL2,
    "loss_g_RL":loss_g_RL,
    "train_op_g_RL": train_op_g_RL,
    "original": inputs['text_ids'][:, 1:],
    "transferred": outputs.sample_id #outputs 是infer_greedy的结果
}

'''
#after running thtough all the newly added variables, we can initialize the variables 
#(this will overwrite the previous loaded train_g_ae model)
# 每次新加进去了变量之后都要initialize
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
#sess.run(tf.tables_initializer())

#saver.restore(sess,'RLsave/mydata_wholemodel.ckpt')

'''
for epoch in range(1, 20):
    # Train
    iterator.restart_dataset(sess, ['train_g'])
    _train_epoch(sess, epoch)
  
saver.save(sess,'RLsave/mydata_wholemodel.ckpt')
'''

"\nfor epoch in range(1, 20):\n    # Train\n    iterator.restart_dataset(sess, ['train_g'])\n    _train_epoch(sess, epoch)\n  \nsaver.save(sess,'RLsave/mydata_wholemodel.ckpt')\n"

In [13]:
#saver.save(sess,'RLsave/mydata_embedded_AEmodel1.ckpt')

In [6]:
saver.restore(sess,'RLsave/mydata_embedded_AEmodel1.ckpt')

INFO:tensorflow:Restoring parameters from RLsave/mydata_embedded_AEmodel1.ckpt


INFO:tensorflow:Restoring parameters from RLsave/mydata_embedded_AEmodel1.ckpt


In [2]:
saver.restore(sess,'RLsave/mydata_embedded_AEmodel64.ckpt')

INFO:tensorflow:Restoring parameters from RLsave/mydata_embedded_AEmodel64.ckpt


In [7]:
feed_dict = {
    iterator.handle: iterator.get_handle(sess, 'val'),
    tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
}

vals = sess.run(model.samples, feed_dict=feed_dict)

batch_size = vals.pop('batch_size')

# Computes BLEU
samples = tx.utils.dict_pop(vals, list(model.samples.keys()))
hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
print("samples: ",hyps)

refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
refs = np.expand_dims(refs, axis=1)
print("reference: ",refs)


samples:  ['the double double is just the best best burger in the']
reference:  [['the double double is just the best best burger in the universe .']]


In [None]:
dif = np.abs(predict_sentiment(str(hyps[0]),frnn,fTEXT)-predict_sentiment(str(hyps[0]),mrnn,mTEXT))

In [6]:
sess.run(model.train_g_RL2,feed_dict=feed_dict)

{'loss_g_RL2': 5.114773, 'train_op_g_RL2': 5.114773}

In [9]:
feed_dict = {
    iterator.handle: iterator.get_handle(sess, 'val'),
    tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
}

In [12]:
type(sess.run(model.samples,feed_dict=feed_dict)['transferred'])

numpy.ndarray

In [13]:
model.samples

{'batch_size': <tf.Tensor 'strided_slice_2:0' shape=() dtype=int32>,
 'original': <tf.Tensor 'strided_slice_3:0' shape=(?, ?) dtype=int64>,
 'transferred': <tf.Tensor 'attention_rnn_decoder_5/decoder/transpose_1:0' shape=(?, ?) dtype=int32>}

In [None]:
for epoch in range(1, 20):
    # Train
    iterator.restart_dataset(sess, ['train_g'])
    _train_epoch(sess, epoch)

In [None]:
feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')}
result = sess.run(model.train_g_RL2,feed_dict=feed_dict)
print('loss_g_RL2: ',result['loss_g_RL2'])

In [None]:
sess.run(model.samples,feed_dict=feed_dict)