In [32]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# pylint: disable=invalid-name, no-member, too-many-locals
# pylint: disable=too-many-branches, too-many-statements, redefined-variable-type

import os
import sys
import time
import importlib
import numpy as np
import tensorflow as tf
import texar as tx

#flags = tf.flags

#flags.DEFINE_string("config", "config", "config_trans_ptb.py")

#FLAGS = flags.FLAGS

config = importlib.import_module('config_lstm_mydata')

def kl_dvg(means, logvars):
    """compute the KL divergence between Gaussian distribution
    """
    kl_cost = -0.5 * (logvars - tf.square(means) -
                      tf.exp(logvars) + 1.0)
    kl_cost = tf.reduce_mean(kl_cost, 0)

    return tf.reduce_sum(kl_cost)


In [2]:
config.dataset

'mydata'

In [3]:
config.train_data_hparams

{'num_epochs': 1,
 'batch_size': 32,
 'seed': 123,
 'dataset': {'files': './data/mydata/mydata.train.txt',
  'vocab_file': './data/mydata/vocab.txt'}}

In [4]:
# Data
train_data = tx.data.MonoTextData(config.train_data_hparams)
val_data = tx.data.MonoTextData(config.val_data_hparams)
test_data = tx.data.MonoTextData(config.test_data_hparams)
iterator = tx.data.TrainTestDataIterator(train=train_data,
                                         val=val_data,
                                         test=test_data)
data_batch = iterator.get_next()

opt_vars = {
    'learning_rate': config.lr_decay_hparams["init_lr"],
    'best_valid_nll': 1e100,
    'steps_not_improved': 0,
    'kl_weight': config.kl_anneal_hparams["start"]
}

decay_cnt = 0
max_decay = config.lr_decay_hparams["max_decay"]
decay_factor = config.lr_decay_hparams["decay_factor"]
decay_ts = config.lr_decay_hparams["threshold"]

save_dir = "./models/%s" % config.dataset

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

suffix = "%s_%sDecoder.ckpt" % \
        (config.dataset, config.decoder_hparams["type"])

save_path = os.path.join(save_dir, suffix)

In [5]:
save_dir

'./models/mydata'

In [6]:
save_path

'./models/mydata/mydata_lstmDecoder.ckpt'

In [7]:
# KL term annealing rate
anneal_r = 1.0 / (config.kl_anneal_hparams["warm_up"] * \
    (train_data.dataset_size() / config.batch_size))

In [8]:
anneal_r

3.2

In [9]:
# Model architecture
embedder = tx.modules.WordEmbedder(
    vocab_size=train_data.vocab.size, hparams=config.emb_hparams)


input_embed = embedder(data_batch["text_ids"])
output_embed = embedder(data_batch["text_ids"][:, :-1])

In [10]:
if config.enc_keep_prob_in < 1:
    input_embed = tf.nn.dropout(
        input_embed, tx.utils.switch_dropout(config.enc_keep_prob_in))

if config.dec_keep_prob_in < 1:
    output_embed = tf.nn.dropout(
        output_embed, tx.utils.switch_dropout(config.dec_keep_prob_in))

In [11]:
encoder = tx.modules.UnidirectionalRNNEncoder(
    hparams={"rnn_cell": config.enc_cell_hparams})

In [12]:
encoder

<texar.modules.encoders.rnn_encoders.UnidirectionalRNNEncoder at 0x7f82b7124860>

In [13]:
if config.decoder_hparams["type"] == "lstm":
    decoder = tx.modules.BasicRNNDecoder(
        vocab_size=train_data.vocab.size,
        hparams={"rnn_cell": config.dec_cell_hparams})
    decoder_initial_state_size = decoder.cell.state_size
elif config.decoder_hparams["type"] == 'transformer':
    decoder = tx.modules.TransformerDecoder(
        embedding=embedder.embedding,
        hparams=config.trans_hparams)
    decoder_initial_state_size = tf.TensorShape(
        [1, config.emb_hparams["dim"]])
else:
    raise NotImplementedError

In [14]:
connector_mlp = tx.modules.MLPTransformConnector(
    config.latent_dims * 2)

connector_stoch = tx.modules.ReparameterizedStochasticConnector(
    decoder_initial_state_size)

_, ecdr_states = encoder(
    input_embed,
    sequence_length=data_batch["length"])

mean_logvar = connector_mlp(ecdr_states)
mean, logvar = tf.split(mean_logvar, 2, 1)
kl_loss = kl_dvg(mean, logvar)

dst = tf.contrib.distributions.MultivariateNormalDiag(
    loc=mean,
    scale_diag=tf.exp(0.5 * logvar))

dcdr_states, _ = connector_stoch(dst)

Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.


In [15]:
# decoder
if config.decoder_hparams["type"] == "lstm":
    outputs, _, _ = decoder(
        initial_state=dcdr_states,
        decoding_strategy="train_greedy",
        inputs=output_embed,
        sequence_length=data_batch["length"]-1)
else:
    outputs = decoder(
        inputs=output_embed,
        memory=dcdr_states,
        memory_sequence_length=tf.ones(tf.shape(dcdr_states)[0]))

In [16]:
logits = outputs.logits

In [17]:
seq_lengths = data_batch["length"] - 1

In [18]:
# Losses & train ops
rc_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
    labels=data_batch["text_ids"][:, 1:],
    logits=logits,
    sequence_length=data_batch["length"]-1)

# KL annealing
kl_weight = tf.placeholder(tf.float32, shape=())

nll = rc_loss + kl_weight * kl_loss

learning_rate = tf.placeholder(dtype=tf.float32, shape=(),
                               name='learning_rate')
train_op = tx.core.get_train_op(nll, learning_rate=learning_rate,
                                hparams=config.opt_hparams)

In [19]:
sess = tf.InteractiveSession()

In [20]:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())

In [21]:
# Counts trainable parameters
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape() # shape is an array of tf.Dimension
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    total_parameters += variable_parameters
print("%d total parameters" % total_parameters)

best_nll = best_ppl = 0.

7521060 total parameters


In [22]:
epoch = 1

In [23]:
_, _ = _run_epoch(sess, epoch, 'train', display=200)
val_nll, _ = _run_epoch(sess, epoch, 'valid')
test_nll, test_ppl = _run_epoch(sess, epoch, 'test')

NameError: name '_run_epoch' is not defined

In [24]:
mode_string = 'test'
iterator.switch_to_test_data(sess)

step = 0
start_time = time.time()
num_words = num_sents = 0
nll_ = 0.
kl_loss_ = rc_loss_ = 0.

In [25]:
fetches = {"nll": nll,
           "kl_loss": kl_loss,
           "rc_loss": rc_loss,
           "lengths": seq_lengths}
if mode_string == 'train':
    fetches["train_op"] = train_op
    opt_vars["kl_weight"] = min(
        1.0, opt_vars["kl_weight"] + anneal_r)

    kl_weight_ = opt_vars["kl_weight"]
else:
    kl_weight_ = 1.0

In [26]:
fetches

{'nll': <tf.Tensor 'add_1:0' shape=() dtype=float32>,
 'kl_loss': <tf.Tensor 'Sum:0' shape=() dtype=float32>,
 'rc_loss': <tf.Tensor 'sequence_sparse_softmax_cross_entropy/Mean:0' shape=() dtype=float32>,
 'lengths': <tf.Tensor 'sub_4:0' shape=(?,) dtype=int32>}

In [31]:
mode = (tf.estimator.ModeKeys.TRAIN if mode_string == 'train'
        else tf.estimator.ModeKeys.EVAL)

feed = {tx.global_mode(): mode,
        kl_weight: kl_weight_,
        learning_rate: opt_vars["learning_rate"]}

fetches_ = sess.run(fetches, feed_dict=feed)

batch_size = len(fetches_["lengths"])
num_sents += batch_size

num_words += sum(fetches_["lengths"])
nll_ += fetches_["nll"] * batch_size
kl_loss_ += fetches_["kl_loss"] * batch_size
rc_loss_ += fetches_["rc_loss"] * batch_size

OutOfRangeError: End of sequence
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?], [?,?]], output_types=[DT_INT32, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/device:CPU:0"](data_iterator_1/Iterator)]]

Caused by op 'IteratorGetNext', defined at:
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-bcfa53e37f28>", line 8, in <module>
    data_batch = iterator.get_next()
  File "/home/guojy/texar/texar/data/data/data_iterators.py", line 167, in get_next
    return self._iterator.get_next()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 410, in get_next
    name=name)), self._output_types,
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 2069, in iterator_get_next
    output_shapes=output_shapes, name=name)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

OutOfRangeError (see above for traceback): End of sequence
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?], [?,?]], output_types=[DT_INT32, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/device:CPU:0"](data_iterator_1/Iterator)]]


In [None]:
dic = sess.run({'rec':outputs.sample_id[0],'ref':data_batch['text_ids'][0]},feed_dict=feed)
dic

In [261]:
dic = sess.run({'rec':outputs.sample_id,'ref':data_batch['text_ids']},feed_dict=feed)
dic

{'rec': array([[7938, 2324, 2324, ..., 6370, 6370, 6370],
        [2445, 7701, 2324, ..., 6680,  742, 6370],
        [4906, 2765, 2765, ..., 6370, 6370, 6370],
        ...,
        [6581, 9580, 9564, ..., 6370, 6370, 6370],
        [4009, 4009, 4009, ..., 6370, 6370, 6370],
        [3867, 8481, 8481, ..., 6370, 6370, 6370]], dtype=int32),
 'ref': array([[   1,    4,    5, ...,    0,    0,    0],
        [   1,   14,    4, ...,    0,    0,    0],
        [   1,  135,   24, ...,    0,    0,    0],
        ...,
        [   1,   63,  296, ...,    0,    0,    0],
        [   1,    6,   13, ...,    0,    0,    0],
        [   1, 2368,   90, ...,    0,    0,    0]])}

In [262]:
dic['rec'][0]

array([7938, 2324, 2324, 2324, 3019, 1318,  136,  429, 8868, 5695, 3146,
       3649, 7401, 6932, 8520, 4061, 8375, 5021, 8047,  982, 6242, 9651,
        350, 9651, 7011, 1471,  742, 5499, 6370, 6370, 6370, 6370, 6370,
       6370, 6370, 6370, 6370, 6370], dtype=int32)

In [246]:
dic['rec'].shape

(32, 46)

In [247]:
dic['ref'].shape

(32, 47)

In [260]:
print(' '.join([train_data.vocab.id_to_token_map_py[i] for i in dic['rec'][1]]))
print('\n')
print(' '.join([train_data.vocab.id_to_token_map_py[i] for i in dic['ref'][1]]))

tap mural mural mural absolutely survived vacation alleges colonial rick steppenwolf wants spreads coors shippers fda junk coming credibility miami-based generation whitten vincent emerging assumptions dayton corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt corrupt


<BOS> lincoln 's parent company american continental corp. entered bankruptcy-law proceedings this april N and regulators seized the thrift the next day <EOS> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>


In [251]:
ref = sess.run(data_batch['text_ids'][0],feed_dict=feed)
' '.join([train_data.vocab.id_to_token_map_py[i] for i in ref])

'<BOS> his advice you ought to be there with a basket catching them <EOS> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>'

In [138]:
rec = sess.run(outputs.sample_id[0],feed_dict=feed)
rec

array([1318, 5908, 9419, 2495,   40, 8161, 9666, 2174, 2955, 5722, 9123,
       4445, 5977, 2123, 1608, 9419, 7903, 3009, 5599,  171, 4115, 7563,
       8803, 3633, 3828, 3342, 5029,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0], dtype=int32)

In [139]:
' '.join([train_data.vocab.id_to_token_map_py[i] for i in rec])

'measures arrive sights memory company inland discouraged generation scenario manner loaded column cosby audience consulting sights brewer downward bearing month examination monopolies crashes row exchequer academic sweetened <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>'

In [253]:
saver = tf.train.Saver()

In [255]:
saver.restore(sess, save_path)

INFO:tensorflow:Restoring parameters from ./models/ptb/ptb_lstmDecoder.ckpt


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key OptimizeLoss_1/basic_rnn_decoder/decoder/dense/bias/Adam not found in checkpoint
	 [[Node: save_2/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_2/Const_0_0, save_2/RestoreV2/tensor_names, save_2/RestoreV2/shape_and_slices)]]

Caused by op 'save_2/RestoreV2', defined at:
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-253-416a2593f909>", line 1, in <module>
    saver = tf.train.Saver()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1281, in __init__
    self.build()
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1293, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1330, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 778, in _build_internal
    restore_sequentially, reshape)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 397, in _AddRestoreOps
    restore_sequentially)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 829, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1463, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/guojy/anaconda3/envs/pt4/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key OptimizeLoss_1/basic_rnn_decoder/decoder/dense/bias/Adam not found in checkpoint
	 [[Node: save_2/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_2/Const_0_0, save_2/RestoreV2/tensor_names, save_2/RestoreV2/shape_and_slices)]]


In [95]:
def _run_epoch(sess, epoch, mode_string, display=10):
    if mode_string == 'train':
        iterator.switch_to_train_data(sess)
    elif mode_string == 'valid':
        iterator.switch_to_val_data(sess)
    elif mode_string == 'test':
        iterator.switch_to_test_data(sess)

    step = 0
    start_time = time.time()
    num_words = num_sents = 0
    nll_ = 0.
    kl_loss_ = rc_loss_ = 0.

    while True:
        try:
            fetches = {"nll": nll,
                       "kl_loss": kl_loss,
                       "rc_loss": rc_loss,
                       "lengths": seq_lengths}

            if mode_string == 'train':
                fetches["train_op"] = train_op
                opt_vars["kl_weight"] = min(
                    1.0, opt_vars["kl_weight"] + anneal_r)

                kl_weight_ = opt_vars["kl_weight"]
            else:
                kl_weight_ = 1.0

            mode = (tf.estimator.ModeKeys.TRAIN if mode_string == 'train'
                    else tf.estimator.ModeKeys.EVAL)

            feed = {tx.global_mode(): mode,
                    kl_weight: kl_weight_,
                    learning_rate: opt_vars["learning_rate"]}

            fetches_ = sess.run(fetches, feed_dict=feed)

            batch_size = len(fetches_["lengths"])
            num_sents += batch_size

            num_words += sum(fetches_["lengths"])
            nll_ += fetches_["nll"] * batch_size
            kl_loss_ += fetches_["kl_loss"] * batch_size
            rc_loss_ += fetches_["rc_loss"] * batch_size

            if step % display == 0 and mode_string == 'train':
                print('%s: epoch %d, step %d, nll %.4f, klw: %.4f, ' \
                       'KL %.4f,  rc %.4f, log_ppl %.4f, ppl %.4f, ' \
                       'time elapsed: %.1fs' % \
                      (mode_string, epoch, step, nll_ / num_sents,
                       opt_vars["kl_weight"], kl_loss_ / num_sents,
                       rc_loss_ / num_sents, nll_ / num_words,
                       np.exp(nll_ / num_words), time.time() - start_time))

                sys.stdout.flush()

            step += 1

        except tf.errors.OutOfRangeError:
            print('\n%s: epoch %d, nll %.4f, KL %.4f, rc %.4f, ' \
                  'log_ppl %.4f, ppl %.4f\n' %
                  (mode_string, epoch, nll_ / num_sents,
                   kl_loss_ / num_sents, rc_loss_ / num_sents,
                   nll_ / num_words, np.exp(nll_ / num_words)))
            break

    return nll_ / num_sents, np.exp(nll_ / num_words)

In [101]:
#saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    sess.run(tf.tables_initializer())

    # Counts trainable parameters
    total_parameters = 0
    for variable in tf.trainable_variables():
        shape = variable.get_shape() # shape is an array of tf.Dimension
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    print("%d total parameters" % total_parameters)

    best_nll = best_ppl = 0.

    for epoch in range(10):
        _, _ = _run_epoch(sess, epoch, 'train', display=200)
        val_nll, _ = _run_epoch(sess, epoch, 'valid')
        test_nll, test_ppl = _run_epoch(sess, epoch, 'test')

        if val_nll < opt_vars['best_valid_nll']:
            opt_vars['best_valid_nll'] = val_nll
            opt_vars['steps_not_improved'] = 0
            best_nll = test_nll
            best_ppl = test_ppl
            #saver.save(sess, save_path)
        else:
            opt_vars['steps_not_improved'] += 1
            if opt_vars['steps_not_improved'] == decay_ts:
                old_lr = opt_vars['learning_rate']
                opt_vars['learning_rate'] *= decay_factor
                opt_vars['steps_not_improved'] = 0
                new_lr = opt_vars['learning_rate']

                print('-----\nchange lr, old lr: %f, new lr: %f\n-----' %
                      (old_lr, new_lr))

                #saver.restore(sess, save_path)

                decay_cnt += 1
                if decay_cnt == max_decay:
                    break
    
    # Beam search decoding
    outputs_bs, _, _ = tx.modules.beam_search_decode(
    decoder,
    embedding=embedder,
    start_tokens=[train_data.vocab.bos_token_id]*3,
    end_token=train_data.vocab.eos_token_id,
    beam_width = 3)
    
    print("outputs_bs",outputs_bs)
                    
                    
    print('\nbest testing nll: %.4f, best testing ppl %.4f\n' %
          (best_nll, best_ppl))

18795987 total parameters
train: epoch 0, step 0, nll 322.3953, klw: 0.1002, KL 0.0106,  rc 322.3942, log_ppl 14.1131, ppl 1346557.8560, time elapsed: 2.7s
train: epoch 0, step 200, nll 173.2578, klw: 0.1154, KL 2.4226,  rc 172.9999, log_ppl 7.8037, ppl 2449.6856, time elapsed: 239.0s
train: epoch 0, step 400, nll 157.0820, klw: 0.1306, KL 1.4062,  rc 156.9297, log_ppl 7.0718, ppl 1178.2695, time elapsed: 482.6s
train: epoch 0, step 600, nll 149.6566, klw: 0.1458, KL 0.9452,  rc 149.5541, log_ppl 6.7545, ppl 857.9290, time elapsed: 729.2s
train: epoch 0, step 800, nll 145.2516, klw: 0.1610, KL 0.7182,  rc 145.1732, log_ppl 6.5561, ppl 703.5114, time elapsed: 982.6s
train: epoch 0, step 1000, nll 141.8951, klw: 0.1762, KL 0.6362,  rc 141.8219, log_ppl 6.4119, ppl 609.0537, time elapsed: 1227.1s
train: epoch 0, step 1200, nll 139.3261, klw: 0.1914, KL 0.6286,  rc 139.2470, log_ppl 6.3038, ppl 546.6211, time elapsed: 1455.8s

train: epoch 0, nll 138.1212, KL 0.6371, rc 138.0366, log_ppl 6

KeyboardInterrupt: 

In [104]:
with tf.Session() as sess:
    # Beam search decoding
    outputs_bs, _, _ = tx.modules.beam_search_decode(
    decoder,
    embedding=embedder,
    start_tokens=[train_data.vocab.bos_token_id]*3,
    end_token=train_data.vocab.eos_token_id,
    beam_width = 3)
    print("outputs_bs",type(outputs_bs))

outputs_bs <class 'tensorflow.contrib.seq2seq.python.ops.beam_search_decoder.FinalBeamSearchDecoderOutput'>


In [None]:
import texar as tx

# Data 

data = tx.data.PairedTextData(hparams=config.train_data_hparams) # Hyperparameter configs in `hparams` 
iterator = tx.data.DataIterator(data)
batch = iterator.get_next() # A data mini-batch




# Model architecture
embedder = tx.modules.WordEmbedder(data.target_vocab.size, hparams=hparams_emb)
encoder = tx.modules.TransformerEncoder(hparams=hparams_encoder)
outputs_enc = encoder(inputs=embedder(batch['source_text_ids']),
                      sequence_length=batch['source_length'])
                      
decoder = tx.modules.AttentionRNNDecoder(memory=output_enc, 
                                         memory_sequence_length=batch['source_length'],
                                         hparams=hparams_decoder)
outputs, _, _ = decoder(inputs=embedder(batch['target_text_ids']),
                        sequence_length=batch['target_length']-1)
                        
# Loss for maximum likelihood learning
loss = tx.losses.sequence_sparse_softmax_cross_entropy(
    labels=batch['target_text_ids'][:, 1:],
    logits=outputs.logits,
    sequence_length=batch['target_length']-1) # Automatic masks

In [76]:
decoder

<texar.modules.decoders.transformer_decoders.TransformerDecoder at 0x7f02a80d2a58>

In [96]:
# Beam search decoding
outputs_bs, _, _ = tx.modules.beam_search_decode(
    decoder,
    embedding=embedder,
    start_tokens=[train_data.vocab.bos_token_id]*3,
    end_token=train_data.vocab.eos_token_id,
    beam_width = 3)

In [106]:
dir(outputs_bs)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_fields',
 '_make',
 '_replace',
 '_source',
 'beam_search_decoder_output',
 'count',
 'index',
 'predicted_ids']

In [109]:
outputs_bs.beam_search_decoder_output

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_fields',
 '_make',
 '_replace',
 '_source',
 'count',
 'index',
 'parent_ids',
 'predicted_ids',
 'scores']

In [113]:
outputs_bs.beam_search_decoder_output.scores

<tf.Tensor 'basic_rnn_decoder_6/decoder/transpose_1:0' shape=(3, ?, 3) dtype=float32>