In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import importlib
import numpy as np
import tensorflow as tf
import texar as tx

import tensorflow as tf

import texar as tx
from texar.modules import WordEmbedder, UnidirectionalRNNEncoder, \
        MLPTransformConnector, AttentionRNNDecoder, \
        GumbelSoftmaxEmbeddingHelper, Conv1DClassifier,Conv1DNetwork,BidirectionalRNNEncoder
from texar.core import get_train_op
from texar.utils import collect_trainable_variables, get_batch_size

from RL_model3 import RLModel3

tf.set_random_seed(1)

In [2]:
config = importlib.import_module('RLconfig')

# Data
train_data = tx.data.MultiAlignedData(config.train_data)
val_data = tx.data.MultiAlignedData(config.val_data)
test_data = tx.data.MultiAlignedData(config.test_data)
vocab = train_data.vocab(0)

iterator = tx.data.FeedableDataIterator({'train_g': train_data,'val': val_data, 'test': test_data})
batch = iterator.get_next()

# female Data
ftrain_data = tx.data.MultiAlignedData(config.ftrain_data)
fval_data = tx.data.MultiAlignedData(config.fval_data)
ftest_data = tx.data.MultiAlignedData(config.ftest_data)

fiterator = tx.data.FeedableDataIterator({'ftrain_g': ftrain_data,'fval': fval_data, 'ftest': ftest_data})
fbatch = fiterator.get_next()

# male Data
mtrain_data = tx.data.MultiAlignedData(config.mtrain_data)
mval_data = tx.data.MultiAlignedData(config.mval_data)
mtest_data = tx.data.MultiAlignedData(config.mtest_data)

miterator = tx.data.FeedableDataIterator({'mtrain_g': mtrain_data,'mval': mval_data, 'mtest': mtest_data})
mbatch = miterator.get_next()

gamma = 0.05
model = RLModel3(batch, fbatch, mbatch, vocab, 0.05, config.model)

In [3]:
def _train_epoch_AE(sess, epoch, verbose=True):
    avg_meters_g = tx.utils.AverageRecorder(size=5)

    step = 0
    while True:
        try:
            step += 1
            
            feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')}
            vals_g = sess.run(model.train_op_g_ae, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if verbose and (step == 1 or step % 5 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))

            '''
            if verbose and step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''

        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break
            
def _eval_epoch_AE(sess, epoch, val_or_test='val'):
    avg_meters = tx.utils.AverageRecorder()
    while True:
        try:
            feed_dict = {
                iterator.handle: iterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

            vals = sess.run(model.samples, feed_dict=feed_dict)
            batch_size = vals.pop('batch_size')

            # Computes BLEU
            samples = tx.utils.dict_pop(vals, list(model.samples.keys()))
            hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
            print("samples: ",hyps)

            refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
            refs = np.expand_dims(refs, axis=1)
            print("reference: ",refs)

            bleu = tx.evals.corpus_bleu_moses(refs, hyps)
            vals['bleu'] = bleu

            avg_meters.add(vals, weight=batch_size)

            ###################################!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # Writes samples
            '''
            tx.utils.write_paired_text(
                refs.squeeze(), hyps,
                os.path.join(config.sample_path, 'val.%d'%epoch),
                append=True, mode='v')
            '''
        except tf.errors.OutOfRangeError:
            print('{}: {}'.format(
                val_or_test, avg_meters.to_str(precision=4)))
            break
    return avg_meters.avg()

def f_train_epoch_reg(sess, epoch):
    avg_meters_g = tx.utils.AverageRecorder(size=5)
    step = 0
    while True:
        try:
            step += 1          
            feed_dict = {fiterator.handle: fiterator.get_handle(sess, 'ftrain_g')}
            vals_g = sess.run(model.ftrain_op_d, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if (step == 1 or step % 5 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))
            '''
            if step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''
        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break
            
def m_train_epoch_reg(sess, epoch):
    avg_meters_g = tx.utils.AverageRecorder(size=5)
    step = 0
    while True:
        try:
            step += 1           
            feed_dict = {miterator.handle: miterator.get_handle(sess, 'mtrain_g')}
            vals_g = sess.run(model.mtrain_op_d, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if (step == 1 or step % 5 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))
            '''
            if verbose and step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''
        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break

def fevalf_epoch_reg(sess, val_or_test='fval'):
    avg_meters = []
    while True:
        try:
            ffeed_dict = {
                fiterator.handle: fiterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }
            vals = sess.run(model.floss_reg_batch, feed_dict=ffeed_dict)
            #batch_size = config.train_data['batch_size']

            avg_meters.append(vals)

        except tf.errors.OutOfRangeError:
            avg_loss = np.mean(avg_meters)
            print('{}: {}'.format(
                val_or_test, avg_loss))
            break
    return avg_loss

def mevalm_epoch_reg(sess, val_or_test='mval'):
    avg_meters = []
    while True:
        try:
            mfeed_dict = {
                miterator.handle: miterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }
            vals = sess.run(model.mloss_reg_batch, feed_dict=mfeed_dict)
            #batch_size = config.train_data['batch_size']

            avg_meters.append(vals)

        except tf.errors.OutOfRangeError:
            avg_loss = np.mean(avg_meters)
            print('{}: {}'.format(
                val_or_test, avg_loss))
            break
    return avg_loss


def RL_train_epoch(sess,epoch):
    step = 0
    while True:
        try:
            step+=1
            feed_dict = {iterator.handle: iterator.get_handle(sess,'train_g')}
            vals_g = sess.run(model.train_updated_interface,feed_dict=feed_dict)
            
            if step==1 or step%5==0:
                print('step: {}, {}'.format(step,vals_g))
                
        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, vals_g))
            break


def get_sents_AE(sess,val_or_test='test'): 
    iterator.initialize_dataset(sess)
    sample_sents=[]
    #ref_sents=[]
    pre_dif=[]
    i=1
    while True:
        print("batch: ",i)
        i=i+1
        try:
            feed_dict = {
                iterator.handle: iterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

            vals = sess.run({'a':model.samples,'b':model.pre_dif}, feed_dict=feed_dict)

            #batch_size = vals['a'].pop('batch_size')

            # Computes BLEU
            samples = tx.utils.dict_pop(vals['a'], list(model.samples.keys()))
            hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
            #print("samples: ",hyps)

            #refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
            #refs = np.expand_dims(refs, axis=1)
            #print("reference: ",refs)
            
            sample_sents.extend(hyps.tolist())
            pre_dif.extend(vals['b'].tolist())
            #ref_sents.extend(refs.tolist())
            
            #dif = np.abs(predict_sentiment(str(hyps[0]),frnn,fTEXT)-predict_sentiment(str(hyps[0]),mrnn,mTEXT))

        except tf.errors.OutOfRangeError:
            print("all batches finished")
            break
    return sample_sents,pre_dif

In [4]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())

saver = tf.train.Saver(max_to_keep=None)

iterator.initialize_dataset(sess)
# fiterator 和 miterator 其实只是在训练discriminator的时候用，其他时候是用iterator
fiterator.initialize_dataset(sess)
miterator.initialize_dataset(sess)

## 调试的时候用的代码

In [8]:
#saver.save(sess,'RLsave/allmodel64.ckpt')

INFO:tensorflow:RLsave/allmodel64.ckpt is not in all_model_checkpoint_paths. Manually adding it.


'RLsave/allmodel64.ckpt'

In [7]:
saver.save(sess,'RLsave/AE_clas100convnet_finalstate_RL.ckpt') # model after RL training (narrowing pre_dif while keeping the original sent unchanged)

INFO:tensorflow:RLsave/AE_clas100convnet_finalstate_RL.ckpt is not in all_model_checkpoint_paths. Manually adding it.


'RLsave/AE_clas100convnet_finalstate_RL.ckpt'

In [5]:
saver.restore(sess,'RLsave/AE_clas100onlyconvnet_finalstate_RL.ckpt') #model only after AE training

INFO:tensorflow:Restoring parameters from RLsave/AE_clas100onlyconvnet_RL.ckpt


In [5]:
# Train female regression model
for epoch in range(1, 20):
    fiterator.restart_dataset(sess, ['ftrain_g'])
    f_train_epoch_reg(sess,epoch)
    if epoch%3==0:
        print("evaluation on fval:")
        fiterator.restart_dataset(sess)
        fevalf_epoch_reg(sess, val_or_test='fval')

step: 1, 19.7001
step: 5, 17.9688
step: 10, 16.1812
step: 15, 15.0004
step: 20, 12.3564
step: 25, 7.5261
step: 30, 3.4269
step: 35, 1.6063
step: 40, 2.8278
step: 45, 1.7427
step: 50, 1.5196
step: 55, 1.6947
step: 60, 1.4191
step: 65, 1.1881
step: 70, 1.4325
step: 75, 1.4281
step: 80, 1.3928
step: 85, 1.1643
step: 90, 1.2019
step: 95, 1.0786
step: 100, 1.1660
step: 105, 1.3316
step: 110, 1.2507
step: 115, 1.4047
step: 120, 1.0872
epoch: 1, 1.2410
step: 1, 0.9108
step: 5, 1.1863
step: 10, 1.1095
step: 15, 0.9863
step: 20, 0.8375
step: 25, 1.1534
step: 30, 1.1056
step: 35, 0.9419
step: 40, 1.0774
step: 45, 0.9252
step: 50, 1.0302
step: 55, 1.0515
step: 60, 0.9571
step: 65, 0.7929
step: 70, 0.9983
step: 75, 0.8963
step: 80, 0.9598
step: 85, 0.9144
step: 90, 0.8375
step: 95, 0.8398
step: 100, 0.7962
step: 105, 0.9041
step: 110, 0.8748
step: 115, 1.0852
step: 120, 0.8208
epoch: 2, 0.8445
step: 1, 0.7349
step: 5, 0.8511
step: 10, 0.8451
step: 15, 0.7351
step: 20, 0.6977
step: 25, 0.9030
step:

step: 115, 0.2857
step: 120, 0.2735
epoch: 18, 0.2042
evaluation on fval:
fval: 0.9495158195495605
step: 1, 0.2399
step: 5, 0.2705
step: 10, 0.2204
step: 15, 0.1546
step: 20, 0.1938
step: 25, 0.2088
step: 30, 0.1890
step: 35, 0.1981
step: 40, 0.2845
step: 45, 0.2552
step: 50, 0.4724
step: 55, 0.2848
step: 60, 0.5633
step: 65, 0.2971
step: 70, 0.8018
step: 75, 0.3686
step: 80, 0.9621
step: 85, 0.4556
step: 90, 1.1052
step: 95, 0.5561
step: 100, 0.9070
step: 105, 0.5117
step: 110, 0.5892
step: 115, 0.5464
step: 120, 0.3760
epoch: 19, 0.4000


In [6]:
# Train male regression model
for epoch in range(1, 20):
    miterator.restart_dataset(sess, ['mtrain_g'])
    m_train_epoch_reg(sess,epoch)
    if epoch%3==0:
        print("evaluation on mval:")
        miterator.restart_dataset(sess)
        mevalm_epoch_reg(sess, val_or_test='mval')

step: 1, 18.6572
step: 5, 17.1254
step: 10, 15.5842
step: 15, 12.7524
step: 20, 9.0738
step: 25, 5.1732
step: 30, 2.1053
step: 35, 2.2862
step: 40, 2.2012
step: 45, 1.4117
step: 50, 1.4791
step: 55, 1.4668
step: 60, 1.3457
step: 65, 1.2113
step: 70, 1.4553
step: 75, 1.4488
step: 80, 1.2195
step: 85, 1.4517
step: 90, 1.2983
step: 95, 1.1301
step: 100, 1.4036
step: 105, 1.1456
step: 110, 1.2189
step: 115, 1.0333
step: 120, 1.1050
epoch: 1, 1.1050
step: 1, 0.9457
step: 5, 1.1101
step: 10, 1.0587
step: 15, 1.1210
step: 20, 1.0288
step: 25, 1.0764
step: 30, 1.0834
step: 35, 0.9957
step: 40, 1.0759
step: 45, 0.8869
step: 50, 1.0383
step: 55, 0.8966
step: 60, 0.9770
step: 65, 0.9232
step: 70, 0.9566
step: 75, 1.0466
step: 80, 0.9122
step: 85, 0.9730
step: 90, 0.8381
step: 95, 0.8620
step: 100, 1.0127
step: 105, 0.9004
step: 110, 0.9397
step: 115, 0.8705
step: 120, 0.9070
epoch: 2, 0.9070
step: 1, 0.7378
step: 5, 0.8212
step: 10, 0.8186
step: 15, 0.7672
step: 20, 0.7800
step: 25, 0.8313
step: 

step: 115, 0.2014
step: 120, 0.3958
epoch: 18, 0.3958
evaluation on mval:
mval: 0.9198949933052063
step: 1, 0.3514
step: 5, 0.2203
step: 10, 0.4007
step: 15, 0.2320
step: 20, 0.2989
step: 25, 0.3406
step: 30, 0.2238
step: 35, 0.3320
step: 40, 0.2679
step: 45, 0.2816
step: 50, 0.2394
step: 55, 0.2558
step: 60, 0.1674
step: 65, 0.2319
step: 70, 0.1846
step: 75, 0.2321
step: 80, 0.2070
step: 85, 0.2362
step: 90, 0.1751
step: 95, 0.2019
step: 100, 0.1998
step: 105, 0.2182
step: 110, 0.2166
step: 115, 0.2218
step: 120, 0.2924
epoch: 19, 0.2924


# loss for female and male modes 

---------------------------

In [8]:
# evaluate female model loss on female data
fiterator.restart_dataset(sess)
fevalf_epoch_reg(sess, val_or_test='ftest')

ftest: 1.1521238088607788


1.1521238

## 得到 f_on_f 的预测结果

In [10]:
## 得到f_on_fdata的预测结果

all_pre = []
all_Y = []
all_sent = []
val_or_test = 'ftest'

fiterator.restart_dataset(sess)
while True:
    try:
        ffeed_dict = {
            fiterator.handle: fiterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.freg_sample, feed_dict=ffeed_dict)
        all_pre.extend(vals['fprediction'].tolist())
        all_Y.extend(vals['fground_truth'].tolist())
        all_sent.extend(vals['fsent'].tolist())

    except tf.errors.OutOfRangeError:
        print("end of epoch")
        break

allsent2 = []
for x in all_sent:
    allsent2.append(tx.utils.map_ids_to_strs(x,vocab))

l = len(all_pre)
with open('data/gender_data/fmodel_fval_pre','w') as f:
    for i in range(l):
        f.write(str(all_Y[i])+'\t')
        f.write(str(all_pre[i])+'\t')
        f.write(allsent2[i]+'\n')

all_pre = np.array(all_pre)
all_Y = np.array(all_Y)

print(np.sum(abs(all_pre-all_Y)<0.5)/l)

end of epoch
0.4223901098901099


---------------------------

In [57]:
# evaluate male model loss on male data
miterator.restart_dataset(sess)
mevalm_epoch_reg(sess, val_or_test='mtest')

mtest: 0.8676668405532837


0.86766684

## 得到 m_on_m 的预测结果

In [42]:
## 得到f_on_fdata的预测结果

all_pre = []
all_Y = []
all_sent = []
val_or_test = 'mtest'

miterator.restart_dataset(sess)
while True:
    try:
        mfeed_dict = {
            miterator.handle: miterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.mreg_sample, feed_dict=mfeed_dict)
        all_pre.extend(vals['mprediction'].tolist())
        all_Y.extend(vals['mground_truth'].tolist())
        all_sent.extend(vals['msent'].tolist())

    except tf.errors.OutOfRangeError:
        print("end of epoch")
        break

allsent2 = []
for x in all_sent:
    allsent2.append(tx.utils.map_ids_to_strs(x,vocab))

l = len(all_pre)
with open('data/gender_data/mmodel_mtest_pre','w') as f:
    for i in range(l):
        f.write(str(all_Y[i])+'\t')
        f.write(str(all_pre[i])+'\t')
        f.write(allsent2[i]+'\n')

all_pre = np.array(all_pre)
all_Y = np.array(all_Y)

print(np.sum(abs(all_pre-all_Y)<0.5)/l)

end of epoch
0.4097222222222222


## female modle on male data loss (但是注意测试完了之后要把代码改回来)

In [58]:
self = model
#self.fconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.freg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.fconv_output = self.fconvnet(inputs=self.freg_embedder(ids=self.minputs['text_ids'][:, 1:])) #(64, 128)  等一会做一下finputs!!!
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.fdense_layer = tx.core.layers.get_layer(hparams=p)
self.freg_output = self.fdense_layer(inputs=self.fconv_output)

self.fprediction = tf.reshape(self.freg_output,[-1])
self.fground_truth = tf.to_float(self.minputs['labels'])
self.freg_sample = {
        "fprediction": self.fprediction,
        "fground_truth": self.fground_truth,
        "fsent": self.minputs['text_ids'][:, 1:]
    }

self.floss_reg_single = tf.pow(self.fprediction - self.fground_truth,2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
self.floss_reg_batch = tf.reduce_mean(self.floss_reg_single) #对一个batch求和平均的loss


avg_meters = []
val_or_test = 'mtrain_g'
miterator.restart_dataset(sess)
while True:
    try:
        mfeed_dict = {
            miterator.handle: miterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.floss_reg_batch, feed_dict=mfeed_dict)
        avg_meters.append(vals)

    except tf.errors.OutOfRangeError:
        avg_loss = np.mean(avg_meters)
        print('{}: {}'.format(
            val_or_test, avg_loss))
        break
print(avg_loss)

mtrain_g: 0.8748442530632019
0.87484425


### 得到 f_on_m 的预测结果

In [63]:
## 得到f_on_fdata的预测结果

all_pre = []
all_Y = []
all_sent = []
val_or_test = 'mtest'

miterator.restart_dataset(sess)
while True:
    try:
        mfeed_dict = {
            miterator.handle: miterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.freg_sample, feed_dict=mfeed_dict)
        all_pre.extend(vals['fprediction'].tolist())
        all_Y.extend(vals['fground_truth'].tolist())
        all_sent.extend(vals['fsent'].tolist())

    except tf.errors.OutOfRangeError:
        print("end of epoch")
        break

allsent2 = []
for x in all_sent:
    allsent2.append(tx.utils.map_ids_to_strs(x,vocab))

l = len(all_pre)
with open('data/gender_data/fmodel_mtest_pre','w') as f:
    for i in range(l):
        f.write(str(all_Y[i])+'\t')
        f.write(str(all_pre[i])+'\t')
        f.write(allsent2[i]+'\n')

all_pre = np.array(all_pre)
all_Y = np.array(all_Y)

print(np.sum(abs(all_pre-all_Y)<0.5)/l)

end of epoch
0.44027777777777777


In [64]:
## 把代码改回来

self = model
#self.fconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.freg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.fconv_output = self.fconvnet(inputs=self.freg_embedder(ids=self.finputs['text_ids'][:, 1:])) #(64, 128)  等一会做一下finputs!!!
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.fdense_layer = tx.core.layers.get_layer(hparams=p)
self.freg_output = self.fdense_layer(inputs=self.fconv_output)

self.fprediction = tf.reshape(self.freg_output,[-1])
self.fground_truth = tf.to_float(self.finputs['labels'])
self.freg_sample = {
        "fprediction": self.fprediction,
        "fground_truth": self.fground_truth,
        "fsent": self.finputs['text_ids'][:, 1:]
    }

self.floss_reg_single = tf.pow(self.fprediction - self.fground_truth,2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
self.floss_reg_batch = tf.reduce_mean(self.floss_reg_single) #对一个batch求和平均的loss

## male modle on female data (但是注意测试完了之后要把代码改回来)

In [12]:
self = model
#self.mconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.mreg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.mconv_output = self.mconvnet(inputs=self.mreg_embedder(ids=self.finputs['text_ids'][:, 1:])) #(64, 128)
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.mdense_layer = tx.core.layers.get_layer(hparams=p)
self.mreg_output = self.mdense_layer(inputs=self.mconv_output)

self.mprediction = tf.reshape(self.mreg_output,[-1])
self.mground_truth = tf.to_float(self.finputs['labels'])
self.mreg_sample = {
        "mprediction": self.mprediction,
        "mground_truth": self.mground_truth,
        "msent": self.finputs['text_ids'][:, 1:]
}

self.mloss_reg_single = tf.pow(self.mprediction - self.mground_truth,2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
self.mloss_reg_batch = tf.reduce_mean(self.mloss_reg_single) #对一个batch求和平均的loss


avg_meters = []
val_or_test = 'ftest'
fiterator.restart_dataset(sess)
while True:
    try:
        ffeed_dict = {
            fiterator.handle: fiterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.mloss_reg_batch, feed_dict=ffeed_dict)
        avg_meters.append(vals)

    except tf.errors.OutOfRangeError:
        avg_loss = np.mean(avg_meters)
        print('{}: {}'.format(
            val_or_test, avg_loss))
        break
print(avg_loss)

ftest: 0.8671502470970154
0.86715025


In [8]:
## 得到 m_on_fdata的预测结果

all_pre = []
all_Y = []
all_sent = []
val_or_test = 'ftest'

fiterator.restart_dataset(sess)
while True:
    try:
        ffeed_dict = {
            fiterator.handle: fiterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.mreg_sample, feed_dict=ffeed_dict)
        all_pre.extend(vals['mprediction'].tolist())
        all_Y.extend(vals['mground_truth'].tolist())
        all_sent.extend(vals['msent'].tolist())

    except tf.errors.OutOfRangeError:
        print("end of epoch")
        break

allsent2 = []
for x in all_sent:
    allsent2.append(tx.utils.map_ids_to_strs(x,vocab))

l = len(all_pre)
with open('data/gender_data/mmodel_ftest_pre','w') as f:
    for i in range(l):
        f.write(str(all_Y[i])+'\t')
        f.write(str(all_pre[i])+'\t')
        f.write(allsent2[i]+'\n')

all_pre = np.array(all_pre)
all_Y = np.array(all_Y)

print(np.sum(abs(all_pre-all_Y)<0.5)/l)

end of epoch
0.4491758241758242


## 把代码改回来

In [14]:
self = model
#self.mconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.mreg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.mconv_output = self.mconvnet(inputs=self.mreg_embedder(ids=self.minputs['text_ids'][:, 1:])) #(64, 128)
p = {"type": "Dense", "kwargs": {'units':1}}
#self.mdense_layer = tx.core.layers.get_layer(hparams=p)
self.mreg_output = self.mdense_layer(inputs=self.mconv_output)

self.mprediction = tf.reshape(self.mreg_output,[-1])
self.mground_truth = tf.to_float(self.minputs['labels'])
self.mreg_sample = {
        "mprediction": self.mprediction,
        "mground_truth": self.mground_truth,
        "msent": self.minputs['text_ids'][:, 1:]
}

self.mloss_reg_single = tf.pow(self.mprediction - self.mground_truth,2) #这样得到的是单个的loss,可以之后在RL里面对一整个batch进行update
self.mloss_reg_batch = tf.reduce_mean(self.mloss_reg_single) #对一个batch求和平均的loss

## 得到all_data的预测结果 （最后把代码改回来）

In [43]:
self = model
############################ female sentiment regression model
#现在只用了convnet不知道效果，之后可以试试RNN decoding看regression的准确度，或者把两个结合一下（concat成一个向量）
#self.fconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.freg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.fconv_output = self.fconvnet(inputs=self.freg_embedder(ids=self.inputs['text_ids'][:, 1:])) #(64, 128)  等一会做一下finputs!!!
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.fdense_layer = tx.core.layers.get_layer(hparams=p)
self.freg_output = self.fdense_layer(inputs=self.fconv_output)

self.fprediction = tf.reshape(self.freg_output,[-1])
self.fground_truth = tf.to_float(self.inputs['labels'])
self.freg_sample = {
        "fprediction": self.fprediction,
        "fground_truth": self.fground_truth,
        "fsent": self.inputs['text_ids'][:, 1:]
    }

############################ male sentiment regression model
#self.mconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.mreg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.mconv_output = self.mconvnet(inputs=self.mreg_embedder(ids=self.inputs['text_ids'][:, 1:])) #(64, 128)
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.mdense_layer = tx.core.layers.get_layer(hparams=p)
self.mreg_output = self.mdense_layer(inputs=self.mconv_output)

self.mprediction = tf.reshape(self.mreg_output,[-1])
self.mground_truth = tf.to_float(self.inputs['labels'])
self.mreg_sample = {
        "mprediction": self.mprediction,
        "mground_truth": self.mground_truth,
        "msent": self.inputs['text_ids'][:, 1:]
}

In [25]:
all_pre = []
all_Y = []
all_sent = []
val_or_test = 'train_g'

iterator.restart_dataset(sess)
while True:
    try:
        feed_dict = {
            iterator.handle: iterator.get_handle(sess, val_or_test),
            tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
        }
        vals = sess.run(model.mreg_sample, feed_dict=feed_dict)
        all_pre.extend(vals['mprediction'].tolist())
        all_Y.extend(vals['mground_truth'].tolist())
        all_sent.extend(vals['msent'].tolist())

    except tf.errors.OutOfRangeError:
        print("end of epoch")
        break

end of epoch


In [11]:
fpre = []
fY = []
fsent = []

In [18]:
fpre.extend(all_pre)
fY.extend(all_Y)
fsent.extend(all_sent)

In [19]:
len(fpre)

19301

In [21]:
mpre = []
mY = []
msent = []

In [26]:
mpre.extend(all_pre)
mY.extend(all_Y)
msent.extend(all_sent)

In [27]:
# change sent index to sentence
msent2 = []
for x in msent:
    msent2.append(tx.utils.map_ids_to_strs(x,vocab))
fsent2 = []
for x in fsent:
    fsent2.append(tx.utils.map_ids_to_strs(x,vocab))

In [28]:
l = len(fpre)

In [30]:
with open('data/gender_data/fmodel_preall','w') as f:
    for i in range(l):
        f.write(str(fY[i])+'\t')
        f.write(str(fpre[i])+'\t')
        f.write(fsent2[i]+'\n')

In [31]:
with open('data/gender_data/mmodel_preall','w') as f:
    for i in range(l):
        f.write(str(mY[i])+'\t')
        f.write(str(mpre[i])+'\t')
        f.write(msent2[i]+'\n')

## 把代码改回来

In [55]:
############################ female sentiment regression model
#现在只用了convnet不知道效果，之后可以试试RNN decoding看regression的准确度，或者把两个结合一下（concat成一个向量）
#self.fconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.freg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.fconv_output = self.fconvnet(inputs=self.freg_embedder(ids=self.finputs['text_ids'][:, 1:])) #(64, 128)  等一会做一下finputs!!!
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.fdense_layer = tx.core.layers.get_layer(hparams=p)
self.freg_output = self.fdense_layer(inputs=self.fconv_output)

self.fprediction = tf.reshape(self.freg_output,[-1])
self.fground_truth = tf.to_float(self.finputs['labels'])
self.freg_sample = {
        "fprediction": self.fprediction,
        "fground_truth": self.fground_truth,
        "fsent": self.inputs['text_ids'][:, 1:]
    }

############################ male sentiment regression model
#self.mconvnet = Conv1DNetwork(hparams=self._hparams.convnet) #[batch_size, time_steps, embedding_dim] (default input)
#convnet = Conv1DNetwork()
#self.mreg_embedder = WordEmbedder(vocab_size=self.vocab.size,hparams=self._hparams.embedder) #(64, 26, 100) (output shape of clas_embedder(ids=inputs['text_ids'][:, 1:]))
self.mconv_output = self.mconvnet(inputs=self.mreg_embedder(ids=self.minputs['text_ids'][:, 1:])) #(64, 128)
#p = {"type": "Dense", "kwargs": {'units':1}}
#self.mdense_layer = tx.core.layers.get_layer(hparams=p)
self.mreg_output = self.mdense_layer(inputs=self.mconv_output)

self.mprediction = tf.reshape(self.mreg_output,[-1])
self.mground_truth = tf.to_float(self.minputs['labels'])
self.mreg_sample = {
        "mprediction": self.mprediction,
        "mground_truth": self.mground_truth,
        "msent": self.inputs['text_ids'][:, 1:]
}

## 训练 AE 模型

In [None]:
for epoch in range(20):
    iterator.restart_dataset(sess)
    _train_epoch_AE(sess, epoch, verbose=True)

step: 1, 20.8082
step: 5, 19.5076
step: 10, 13.6885
step: 15, 10.1947
step: 20, 8.8356
step: 25, 8.1333
step: 30, 7.8104
step: 35, 7.5171
step: 40, 7.3809
step: 45, 7.2361
step: 50, 7.2126
step: 55, 7.0266
step: 60, 6.8872
step: 65, 6.8924
step: 70, 6.8515
step: 75, 6.7434
step: 80, 6.6031
step: 85, 6.4644
step: 90, 6.2775
step: 95, 6.2208
step: 100, 6.0819
step: 105, 5.9561
step: 110, 6.0665
step: 115, 6.0655
step: 120, 5.9292
step: 125, 5.9144
step: 130, 5.8478
step: 135, 5.9537
step: 140, 5.8630
step: 145, 5.9022
step: 150, 5.8361
step: 155, 5.8827
step: 160, 5.8259
step: 165, 5.6868
step: 170, 5.7853
step: 175, 5.7930
step: 180, 5.8100
step: 185, 5.8827
step: 190, 5.7765
step: 195, 5.8223
step: 200, 5.7640
step: 205, 5.6567
step: 210, 5.7569
step: 215, 5.5886
step: 220, 5.6213
step: 225, 5.6601
step: 230, 5.7383
step: 235, 5.5888
step: 240, 5.7189
epoch: 0, 5.6081
step: 1, 5.5904
step: 5, 5.6259
step: 10, 5.4694
step: 15, 5.5500
step: 20, 5.5463
step: 25, 5.6128
step: 30, 5.5433
st

In [12]:
sample_sents=[]
pre_dif = []
sample_sents,pre_dif = get_sents_AE(sess,'train_g')

batch:  1
batch:  2
batch:  3
batch:  4
batch:  5
batch:  6
batch:  7
batch:  8
batch:  9
batch:  10
batch:  11
batch:  12
batch:  13
batch:  14
batch:  15
batch:  16
batch:  17
batch:  18
batch:  19
batch:  20
batch:  21
batch:  22
batch:  23
batch:  24
batch:  25
batch:  26
batch:  27
batch:  28
batch:  29
batch:  30
batch:  31
batch:  32
batch:  33
batch:  34
batch:  35
batch:  36
batch:  37
batch:  38
batch:  39
batch:  40
batch:  41
batch:  42
batch:  43
batch:  44
batch:  45
batch:  46
batch:  47
batch:  48
batch:  49
batch:  50
batch:  51
batch:  52
batch:  53
batch:  54
batch:  55
batch:  56
batch:  57
batch:  58
batch:  59
batch:  60
batch:  61
batch:  62
batch:  63
batch:  64
batch:  65
batch:  66
batch:  67
batch:  68
batch:  69
batch:  70
batch:  71
batch:  72
batch:  73
batch:  74
batch:  75
batch:  76
batch:  77
batch:  78
batch:  79
batch:  80
batch:  81
batch:  82
batch:  83
batch:  84
batch:  85
batch:  86
batch:  87
batch:  88
batch:  89
batch:  90
batch:  91
batch:  

In [13]:
sample_sents

['great airport ! very clean , true lines were not long . and free dessert is always a',
 'clean and open airport . loved it !',
 'omg i can recommend the salsa ignored . wish this place were 3 to home .',
 'the best !',
 'very cute small charming place . food is amazing .',
 'i am this wall into its low small and dirty and the staff are rude',
 'love the atmosphere but the beers are overpriced . the food is sad , champ though flavor .',
 'love this place !',
 'great sushi . though not all you can eat . the holy tear either is awesome',
 'horrible old food customer service food was so server beef foie sause this place',
 'burgr really grand about helping people feel better and difficult their health .',
 'good beers , good food .',
 'in-n-out expectations ! best meal of the week !',
 'awesome place ! manager room and great service !! great willing selections and awesome time !',
 'slow . dirty . bad food . what i have come to expect from the cheese .',
 'was happy to see they added vio

In [14]:
pre_dif

[0.28280210494995117,
 0.5692028999328613,
 0.34029555320739746,
 0.12000322341918945,
 0.44179677963256836,
 0.038399457931518555,
 0.7282156944274902,
 0.3369460105895996,
 0.14321088790893555,
 0.1899864673614502,
 0.02309131622314453,
 0.029394865036010742,
 0.91827392578125,
 1.253425121307373,
 0.03418707847595215,
 0.038986921310424805,
 0.9803295135498047,
 0.39992380142211914,
 0.1776435375213623,
 0.11961507797241211,
 0.3994426727294922,
 1.5113942623138428,
 0.7163684368133545,
 0.20705127716064453,
 0.16541671752929688,
 0.5437569618225098,
 0.41033172607421875,
 0.15586495399475098,
 1.3468024730682373,
 0.4690227508544922,
 0.33864927291870117,
 1.1696500778198242,
 0.3090333938598633,
 0.8178620338439941,
 0.2736630439758301,
 0.7948358058929443,
 0.10985994338989258,
 2.4021897315979004,
 0.9060754776000977,
 0.4758143424987793,
 0.8565521240234375,
 0.874504566192627,
 0.23162579536437988,
 0.21396207809448242,
 0.5469112396240234,
 0.12356758117675781,
 0.11176729202

In [16]:
with open('data/gender_data/traing_sample_predif','w') as f:
    for i in range(len(sample_sents)):
        f.write(str(pre_dif[i])+'\t'+sample_sents[i]+'\n')

In [None]:
def train_epoch_RL(sess, epoch, verbose=True):
    avg_meters_g = tx.utils.AverageRecorder(size=5)
    step = 0
    while True:
        try:
            step += 1           
            feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')}
            vals_g = sess.run(model.train_updated, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if verbose and (step == 1 or step % 5 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))

            '''
            if verbose and step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''

        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break

In [6]:
def train_epoch_AERL(sess, epoch, verbose=True):
    avg_meters_g = tx.utils.AverageRecorder(size=3)
    step = 0
    while True:
        try:
            step += 1           
            feed_dict = {iterator.handle: iterator.get_handle(sess, 'train_g')}
            vals_g = sess.run(model.train_AERL, feed_dict=feed_dict)
            avg_meters_g.add(vals_g)

            if verbose and (step == 1 or step % 3 == 0):
                print('step: {}, {}'.format(step, avg_meters_g.to_str(4)))

            '''
            if verbose and step % 2 == 0:
                iterator.restart_dataset(sess, 'val')
                _eval_epoch(sess, epoch)
            '''

        except tf.errors.OutOfRangeError:
            print('epoch: {}, {}'.format(epoch, avg_meters_g.to_str(4)))
            break

In [9]:
feed_dict = {
                iterator.handle: iterator.get_handle(sess, 'test'),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

In [7]:
for epoch in range(1):
    iterator.restart_dataset(sess)
    train_epoch_AERL(sess,epoch)

step: 1, 2.2672
step: 3, 2.4824
step: 6, 3.2406
step: 9, 2.5132
step: 12, 2.2575
step: 15, 2.3685
step: 18, 2.1977
step: 21, 2.2019
step: 24, 2.1880
step: 27, 2.1512
step: 30, 2.1358
step: 33, 2.1509
step: 36, 2.0757
step: 39, 2.2304
step: 42, 2.1953
step: 45, 2.0338
step: 48, 2.0274
step: 51, 2.1109
step: 54, 2.1282
step: 57, 2.0467
step: 60, 1.9304
step: 63, 2.1750
step: 66, 1.8937
step: 69, 1.8738
step: 72, 2.1083
step: 75, 2.0120
step: 78, 1.8796
step: 81, 1.9938
step: 84, 1.8826
step: 87, 1.7569
step: 90, 1.9558


KeyboardInterrupt: 

In [10]:
sess.run(model.loss_g_ae,feed_dict=feed_dict)

1.9101827

In [12]:
0.003*sess.run(model.updated_loss_per_batch,feed_dict=feed_dict)

0.6062019653320313

In [13]:
sess.run(model.loss_AERL,feed_dict=feed_dict)

2.3648825

In [14]:
feed_dict = {
    iterator.handle: iterator.get_handle(sess, 'train_g'),
    tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
}
np.mean(sess.run(model.pre_dif,feed_dict=feed_dict))

0.4149837

In [15]:
def get_sents_AERL(sess,val_or_test='test'): 
    iterator.initialize_dataset(sess)
    sample_sents=[]
    ref_sents=[]
    pre_dif=[]
    RL_fpre=[]
    RL_mre=[]
    Ypre_dif=[]
    Yfpre=[]
    Ympre=[]
    label=[]
    i=1
    while True:
        print("batch: ",i)
        i=i+1
        try:
            feed_dict = {
                iterator.handle: iterator.get_handle(sess, val_or_test),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

            vals = sess.run({'a':model.samples,'b':model.pre_dif,'c':model.Ypre_dif,'d':model.inputs['labels'],
                 'e':model.RL_fprediction,'f':model.RL_mprediction,
                 'g':model.YRL_fprediction,'h':model.YRL_mprediction}, feed_dict=feed_dict)


            samples = tx.utils.dict_pop(vals['a'], list(model.samples.keys()))
            hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
            refs = tx.utils.map_ids_to_strs(samples['original'], vocab)
            
            sample_sents.extend(hyps.tolist())
            ref_sents.extend(refs.tolist())
            pre_dif.extend(vals['b'].tolist())
            RL_fpre.extend(vals['e'].tolist())
            RL_mpre.extend(vals['f'].tolist())
            Ypre_dif.extend(vals['c'].tolist())
            Yfpre.extend(vals['g'].tolist())
            Ympre.extend(vals['h'].tolist())
            label.extend(vals['d'].tolist())
            
        except tf.errors.OutOfRangeError:
            print("all batches finished")
            break
    return label,sample_sents,ref_sents,pre_dif,RL_fpre,RL_mpre,Ypre_dif,Yfpre,Ympre

In [16]:
sample_sents=[]
ref_sents=[]
pre_dif=[]
RL_fpre=[]
RL_mpre=[]
Ypre_dif=[]
Yfpre=[]
Ympre=[]
label=[]
label,sample_sents,ref_sents,pre_dif,RL_fpre,RL_mpre,Ypre_dif,Yfpre,Ympre = get_sents_AERL(sess,'test')

batch:  1
batch:  2
batch:  3
batch:  4
batch:  5
batch:  6
batch:  7
batch:  8
batch:  9
batch:  10
batch:  11
batch:  12
batch:  13
batch:  14
batch:  15
batch:  16
batch:  17
batch:  18
batch:  19
batch:  20
batch:  21
batch:  22
batch:  23
batch:  24
batch:  25
batch:  26
batch:  27
batch:  28
batch:  29
batch:  30
batch:  31
batch:  32
batch:  33
batch:  34
batch:  35
batch:  36
batch:  37
batch:  38
batch:  39
batch:  40
batch:  41
batch:  42
batch:  43
batch:  44
batch:  45
batch:  46
batch:  47
all batches finished


In [161]:
len(Yfpre)

2896

In [20]:
with open('pre_dif/AERL_all2','w') as f:
    f.write('label, Ypre_dif, Yfpre, Ympre, generated_pre_dif, generated_fpre, generated_mpre, generated_sents, ref_sents\n')
    for i in range(len(Yfpre)):
        f.write(str(label[i])+'\t'+str(Ypre_dif[i])+'\t'+str(Yfpre[i])+'\t'+str(Ympre[i])+'\t'+str(pre_dif[i])+'\t'+str(RL_fpre[i])+'\t'+str(RL_mpre[i])+'\t'+sample_sents[i]+'\t'+ref_sents[i]+'\n')

In [136]:
vals = sess.run({'a':model.samples,'b':model.pre_dif,'c':model.Ypre_dif,'d':model.inputs['labels'],
                 'e':model.RL_fprediction,'f':model.RL_mprediction,
                 'g':model.YRL_fprediction,'h':model.YRL_mprediction}, feed_dict=feed_dict)

#batch_size = vals['a'].pop('batch_size')

# Computes BLEU
samples = tx.utils.dict_pop(vals['a'], list(model.samples.keys()))
hyps = tx.utils.map_ids_to_strs(samples['transferred'], vocab)
refs = tx.utils.map_ids_to_strs(samples['original'], vocab)

In [49]:
print(vals['b'][6])
print(vals['c'][6])
print(hyps[6])
print(refs[6])

0.76738334
1.2452769
good food for daniel wrap but they can do better with their service ... so also too
good food for amusement park but they can do better with their service ... so darn slow


In [123]:
def lookup(n):
    print("label: ",vals['d'][n])
    print("gpre_dif: ",vals['b'][n])
    print("gfpre: ",vals['e'][n])
    print("gmpre: ",vals['f'][n])
    print("Ypre_dif: ",vals['c'][n])
    print("Yfpre: ",vals['g'][n])
    print("Ympre: ",vals['h'][n])
    print("gen sent: ",hyps[n])
    print("ref sent: ",refs[n])

In [94]:
lookup(46)

label:  5
gpre_dif:  0.19599771
gfpre:  4.517681
gmpre:  4.3216834
Ypre_dif:  1.2463288
Yfpre:  3.713965
Ympre:  4.960294
gen sent:  great burgers - friendly staff - great atmosphere - very good chefs wrap .
ref sent:  great burgers - friendly staff - great atmosphere - very good juke box .


In [97]:
lookup(49)

label:  5
gpre_dif:  0.03486395
gfpre:  4.462853
gmpre:  4.497717
Ypre_dif:  1.5248969
Yfpre:  5.0410676
Ympre:  3.5161707
gen sent:  oh good !!
ref sent:  sooooooooooo good !!


In [130]:
lookup(44)

label:  5
gpre_dif:  0.19107175
gfpre:  3.7938032
gmpre:  3.6027315
Ypre_dif:  1.3165534
Yfpre:  4.9293265
Ympre:  3.6127732
gen sent:  the one rolls fries was even pleasure with big music of one slice active . nice place to see out .
ref sent:  the red bean bread was even heavy with huge filling of red bean paste . nice place to hang out .


In [48]:
lookup(46)

label:  5
gpre_dif:  0.0024580956
gfpre:  4.5441766
gmpre:  4.5466347
Ypre_dif:  1.2463288
Yfpre:  3.713965
Ympre:  4.960294
gen sent:  great burgers - friendly staff - great atmosphere - very good greeting rocks .
ref sent:  great burgers - friendly staff - great atmosphere - very good juke box .


In [49]:
lookup(49)

label:  5
gpre_dif:  0.78909993
gfpre:  3.8056607
gmpre:  3.0165608
Ypre_dif:  1.5248969
Yfpre:  5.0410676
Ympre:  3.5161707
gen sent:  ehhhhhhhhhh good !!
ref sent:  sooooooooooo good !!


In [58]:
lookup(21)

label:  5
gpre_dif:  0.21130896
gfpre:  4.215986
gmpre:  4.4272947
Ypre_dif:  1.2438686
Yfpre:  3.7372086
Ympre:  4.981077
gen sent:  beautiful hotel with banana is delicious !
ref sent:  thai tea with boba is delicious !


In [60]:
lookup(44)

label:  5
gpre_dif:  0.64033294
gfpre:  3.9023876
gmpre:  3.2620547
Ypre_dif:  1.3165534
Yfpre:  4.9293265
Ympre:  3.6127732
gen sent:  the red rolls bread was even ate with huge waiters of red frozen pepper . nice place to party out .
ref sent:  the red bean bread was even heavy with huge filling of red bean paste . nice place to hang out .


In [62]:
lookup(50)

label:  5
gpre_dif:  0.71334195
gfpre:  4.2786164
gmpre:  3.5652745
Ypre_dif:  1.4003229
Yfpre:  4.588028
Ympre:  3.187705
gen sent:  their coffee and cheese are absolutely bad , but absolutely good ... .
ref sent:  their coffee and donuts are soo bad , but soo good ... .


In [77]:
lookup(42)

label:  4
gpre_dif:  0.041282654
gfpre:  4.876683
gmpre:  4.917966
Ypre_dif:  1.065515
Yfpre:  3.5889354
Ympre:  4.6544504
gen sent:  great and authentic fro food . byob , pepper and the beach were its perfect !
ref sent:  great and authentic brazilian food . pastel , coxinha and the feijoada were absolutely perfect !


In [79]:
lookup(48)

label:  5
gpre_dif:  0.1550684
gfpre:  4.1860228
gmpre:  4.341091
Ypre_dif:  1.2760701
Yfpre:  4.6911454
Ympre:  3.4150753
gen sent:  super expensive but coconut pepper that 's some delicious steak .
ref sent:  super expensive but holy cow that 's some delicious fish .


In [80]:
lookup(50)

label:  4
gpre_dif:  0.04850626
gfpre:  4.4799733
gmpre:  4.431467
Ypre_dif:  1.0776818
Yfpre:  4.9712367
Ympre:  3.893555
gen sent:  delicious burger . awesome rolls rolls scotch .
ref sent:  delicious burger . awesome salted caramel shake .


In [90]:
lookup(3)

label:  5
gpre_dif:  0.3456502
gfpre:  4.0551434
gmpre:  4.4007936
Ypre_dif:  1.7670043
Yfpre:  3.8393657
Ympre:  5.60637
gen sent:  service with a loud . two sandwich with byob and prime style fries . what more can i say regret
ref sent:  service with a smile . double double with onions and animal style fries . what more can i say !?


In [110]:
lookup(20)

label:  1
gpre_dif:  0.076150656
gfpre:  2.5423498
gmpre:  2.4661992
Ypre_dif:  2.1590152
Yfpre:  3.3138318
Ympre:  1.1548167
gen sent:  i 'm very alright to see they 've closed . i pepper with them once . the owner was a rude sorry .
ref sent:  i 'm very pleased to see they 've closed . i dealt with them once . the owner was a rude jerk .


In [113]:
lookup(21)

label:  5
gpre_dif:  0.15961504
gfpre:  4.227085
gmpre:  4.3867
Ypre_dif:  1.1530099
Yfpre:  5.040591
Ympre:  3.8875809
gen sent:  all guys pepper of pizza . incredibly awesome ! and i 'll even special pizza .
ref sent:  all different kinds of pizza . looked awesome ! and i dont even eat pizza .


In [127]:
lookup(1)

label:  5
gpre_dif:  0.87766576
gfpre:  2.8992834
gmpre:  3.7769492
Ypre_dif:  2.0916567
Yfpre:  2.5414948
Ympre:  4.6331515
gen sent:  really good pepper mexican food . not too spicy . plenty for the money .
ref sent:  really good northern indian food . not too spicy . plenty for the money .


In [146]:
lookup(20)

label:  1
gpre_dif:  0.75607896
gfpre:  2.4049373
gmpre:  3.1610162
Ypre_dif:  3.85396
Yfpre:  0.8575183
Ympre:  4.711478
gen sent:  i stopped this company to save my pepper . pepper , they never pepper up for the pepper .
ref sent:  i hired this company to move my piano . alas , they never showed up for the appointment .


In [137]:
np.mean(vals['b'])

0.42183742

In [138]:
np.mean(vals['c'])

0.77343106

In [139]:
for i in range(64):
    if vals['b'][i]<1 and vals['c'][i]>1:
        print(i)

4
10
13
17
19
20
25
27
32
45
49
56
61
62
63


In [36]:
vals['b']

array([0.36216307, 0.30543947, 0.38792276, 0.5459199 , 0.38009953,
       0.1040616 , 0.10247993, 0.04295826, 0.33033323, 0.3867247 ,
       0.15309477, 0.73789597, 0.5417867 , 0.24735498, 1.0762341 ,
       0.4127636 , 0.24814463, 0.03303075, 0.38600206, 0.14841413,
       0.12921095, 0.71510434, 0.33403778, 0.59716797, 0.2893989 ,
       0.4707098 , 0.5163326 , 0.7867689 , 0.5233877 , 0.3358922 ,
       0.5827298 , 0.09356833, 1.3232322 , 0.54391336, 1.046077  ,
       0.13771152, 0.68413544, 0.5137086 , 0.14108849, 0.31278467,
       0.02537799, 0.58587813, 0.23181534, 0.18943167, 0.16966772,
       0.2742324 , 0.0024581 , 0.26849937, 2.365126  , 0.78909993,
       0.8932824 , 0.78257084, 0.01815891, 0.00260496, 0.05158186,
       0.53678656, 0.57671165, 0.19879508, 1.6551509 , 0.5097556 ,
       0.41045904, 0.69842887, 0.20966816, 0.10012388], dtype=float32)

In [41]:
vals['c']

array([0.41189623, 0.365592  , 0.7485919 , 0.6365714 , 0.46536994,
       0.1040616 , 0.10247993, 0.33363867, 0.46035767, 0.5871074 ,
       0.663012  , 0.80483484, 0.03988123, 0.27080154, 1.1454906 ,
       0.74564886, 0.065979  , 0.1920805 , 0.6960838 , 0.03319931,
       0.6046605 , 0.8600862 , 0.48538375, 0.8758068 , 0.2893989 ,
       0.10156727, 0.50549126, 0.19585228, 0.9982352 , 0.6889248 ,
       0.10049009, 0.09356833, 1.4158175 , 0.54391336, 0.8257165 ,
       0.13771152, 1.5350257 , 0.6037557 , 0.71349883, 0.31278467,
       0.6965594 , 1.7977023 , 0.2367034 , 0.11106157, 0.58528495,
       0.5143666 , 1.2463288 , 0.5325208 , 2.4507515 , 1.5248969 ,
       0.47877312, 0.4834838 , 0.24330378, 0.4525633 , 0.16298246,
       0.53678656, 2.821585  , 0.16184902, 0.52494454, 0.3347969 ,
       0.73049974, 0.9197955 , 0.9120352 , 0.62906075], dtype=float32)

In [42]:
(vals['c']-vals['b'])/vals['c']*100

array([   12.074197 ,    16.453459 ,    48.179672 ,    14.240588 ,
          18.323145 ,     0.       ,     0.       ,    87.12432  ,
          28.24422  ,    34.1305   ,    76.9092   ,     8.317095 ,
       -1258.5004   ,     8.658208 ,     6.0460157,    44.643703 ,
        -276.09634  ,    82.803696 ,    44.54661  ,  -347.03983  ,
          78.63083  ,    16.856667 ,    31.180683 ,    31.815104 ,
           0.       ,  -363.44635  ,    -2.1447194,  -301.71545  ,
          47.568703 ,    51.243996 ,  -479.88782  ,     0.       ,
           6.5393543,     0.       ,   -26.68719  ,     0.       ,
          55.431664 ,    14.914496 ,    80.22582  ,     0.       ,
          96.35667  ,    67.40961  ,     2.065056 ,   -70.564545 ,
          71.01109  ,    46.685425 ,    99.80277  ,    49.579548 ,
           3.4938526,    48.252243 ,   -86.5774   ,   -61.860825 ,
          92.53653  ,    99.4244   ,    68.35128  ,     0.       ,
          79.56072  ,   -22.827484 ,  -215.30013  ,   -52.2581

In [36]:
fiterator.restart_dataset(sess)

In [6]:
self = model

In [7]:
feed_dict = {
                iterator.handle: iterator.get_handle(sess, 'val'),
                tx.context.global_mode(): tf.estimator.ModeKeys.EVAL
            }

In [19]:
sess.run(tf.map_fn(lambda x:tf.size(x),self.outputs.sample_id),feed_dict=feed_dict)
#sess.run(self.outputs.sample_id,feed_dict=feed_dict)

array([30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], dtype=int32)

In [18]:
sess.run(tf.size(self.outputs.sample_id[0]),feed_dict=feed_dict)

30

In [10]:
sess.run(self.inputs['length']-1,feed_dict=feed_dict)

array([ 5, 21, 20, 19,  3,  4,  9, 18, 14, 16, 14, 21, 22,  3, 20, 21, 17,
       14, 22, 20, 14, 17, 10, 19, 23, 17, 14, 23, 21, 18, 16, 18, 22, 16,
       15, 14, 23, 14, 13, 16,  8, 13,  7, 20, 14, 18, 14, 16, 17,  7, 14,
       21, 17, 20, 18, 18, 12, 20, 10, 22, 18, 16, 21, 18], dtype=int32)

In [43]:
sess.run(self.fconv_output,feed_dict=feed_dict).shape

(64, 128)

In [44]:
sess.run(self.freg_output,feed_dict=feed_dict).shape

(64, 1)

In [45]:
sess.run(self.ffinal_state,feed_dict=feed_dict).shape

(64, 700)

In [46]:
sess.run(tf.concat([self.fconv_output, self.ffinal_state], -1),feed_dict=feed_dict).shape

(64, 828)

In [18]:
self.freg_output = self.fdense_layer(inputs = tf.concat([self.fconv_output, self.ffinal_state], -1))

TypeError: 'Tensor' object is not callable