In [1]:
%load_ext autoreload
%autoreload
from IPython.display import clear_output

import os
import sys
import subprocess
import pdb
import time
import datetime
import math
import random
import _pickle as cPickle
from collections import defaultdict

from six.moves import zip_longest
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import distributions as tfd
from tensorflow.keras.preprocessing.sequence import pad_sequences

from data_structure import get_batches, get_test_batches
from components import tf_log, sample_latents, compute_kl_loss, dynamic_rnn, dynamic_bi_rnn

from topic_beam_search_decoder import BeamSearchDecoder

In [2]:
PAD = '<pad>' # This has a vocab id, which is used to pad the encoder input, decoder input and target sequence
UNK = '<unk>' # This has a vocab id, which is used to represent out-of-vocabulary words
BOS = '<p>' # This has a vocab id, which is used at the beginning of every decoder input sequence
EOS = '</p>' # This has a vocab id, which is used at the end of untruncated target sequences

# load data & set config

In [3]:
def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)

del_all_flags(tf.flags.FLAGS)

flags = tf.app.flags

flags.DEFINE_string('gpu', '1', 'visible gpu')

flags.DEFINE_string('mode', 'train', 'set train or eval')

flags.DEFINE_string('data_path', 'data/bags/instances.pkl', 'path of data')
flags.DEFINE_string('modeldir', 'model/topic_vae', 'directory of model')
flags.DEFINE_string('modelname', 'bags', 'name of model')

flags.DEFINE_integer('epochs', 50, 'epochs')
flags.DEFINE_integer('batch_size', 64, 'number of sentences in each batch')
flags.DEFINE_integer('log_period', 1000, 'valid period')

flags.DEFINE_string('opt', 'Adagrad', 'optimizer')
flags.DEFINE_float('lr', 0.1, 'lr')
flags.DEFINE_float('reg', 1., 'regularization term')
flags.DEFINE_float('grad_clip', 5., 'grad_clip')

flags.DEFINE_float('keep_prob', 0.8, 'dropout rate')
flags.DEFINE_float('word_keep_prob', 0.75, 'word dropout rate')

flags.DEFINE_bool('warmup', True, 'flg of warming up')
flags.DEFINE_integer('epochs_cycle', 5, 'number of epochs within a cycle')
flags.DEFINE_float('r_cycle', 0.5, 'proportion used to increase beta within a cycle')
flags.DEFINE_integer('warmup_topic', 0, 'warmup period for KL of topic')

flags.DEFINE_integer('beam_width', 2, 'beam_width')
flags.DEFINE_float('length_penalty_weight', 0.0, 'length_penalty_weight')

flags.DEFINE_integer('n_topic', 20, 'number of topic')
flags.DEFINE_integer('dim_hidden_bow', 256, 'dim of hidden bow')
flags.DEFINE_integer('dim_latent_bow', 32, 'dim of latent topic')
flags.DEFINE_integer('dim_emb', 256, 'dim_emb')
flags.DEFINE_integer('dim_hidden', 512, 'dim_hidden')
flags.DEFINE_integer('dim_hidden_topic', 512, 'dim_hidden_topic')
flags.DEFINE_integer('dim_latent', 32, 'dim_latent')
flags.DEFINE_bool('bidirectional', True, 'flg of bidirectional encoding')

# for evaluation
flags.DEFINE_string('refdir', 'ref', 'refdir')
flags.DEFINE_string('outdir', 'out', 'outdir')

flags.DEFINE_string('f', '', 'kernel')
flags.DEFINE_bool('logtostderr', True, 'kernel')
flags.DEFINE_bool('showprefixforinfo', False, '')
flags.DEFINE_bool('verbosity', False, '')
# flags.DEFINE_integer('stderrthreshold', 20, 'kernel')

config = flags.FLAGS

flags.DEFINE_string('modelpath', os.path.join(config.modeldir, config.modelname), 'path of model')

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu

In [5]:
instances_train, instances_dev, instances_test, word_to_idx, idx_to_word, bow_idxs = cPickle.load(open(config.data_path,'rb'))

In [6]:
train_batches = get_batches(instances_train, config.batch_size)
dev_batches = get_batches(instances_dev, config.batch_size)
test_batches = get_test_batches(instances_test, config.batch_size)

In [7]:
flags.DEFINE_integer('PAD_IDX', word_to_idx[PAD], 'PAD_IDX')
flags.DEFINE_integer('UNK_IDX', word_to_idx[UNK], 'UNK_IDX')
flags.DEFINE_integer('BOS_IDX', word_to_idx[BOS], 'BOS_IDX')
flags.DEFINE_integer('EOS_IDX', word_to_idx[EOS], 'EOS_IDX')

flags.DEFINE_integer('n_vocab', len(word_to_idx), 'n_vocab')
flags.DEFINE_integer('dim_bow', len(bow_idxs), 'dim_bow')

maximum_iterations = max([max([instance.max_sent_l for instance in batch]) for ct, batch in dev_batches])
flags.DEFINE_integer('maximum_iterations', maximum_iterations, 'maximum_iterations')

flags.DEFINE_integer('cycle_steps', len(train_batches)*config.epochs_cycle, 'number of steps for each cycle')

# build language model 

In [8]:
def debug_shape(variables):
    sample_batch = dev_batches[0][1]
    feed_dict = get_feed_dict(sample_batch)
    _variables = sess.run(variables, feed_dict=feed_dict)
    for _variable, variable in zip(_variables, variables):
        if hasattr(variable, 'name'):
            print(variable.name, ':', _variable.shape)
        else:
            print(_variable.shape)

def debug_value(variables, return_value=False):
    sample_batch = test_batches[0][1]
    feed_dict = get_feed_dict(sample_batch)
    _variables = sess.run(variables, feed_dict=feed_dict)

    if return_value: 
        return _variables
    else:
        for _variable, variable in zip(_variables, variables):
            if hasattr(variable, 'name'):
                print(variable.name, ':', _variable)
            else:
                print(_variable)
                
def check_shape(variables):
    if 'sess' in globals(): raise
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    sample_batch = test_batches[0][1]
    feed_dict = get_feed_dict(sample_batch)
    _variables = sess.run(variables, feed_dict=feed_dict)
    for _variable, variable in zip(_variables, variables):
        if hasattr(variable, 'name'):
            print(variable.name, ':', _variable.shape)
        else:
            print(_variable.shape)
            
    sess.close()
    
def check_value(variables):
    if 'sess' in globals(): raise
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    sample_batch = test_batches[0][1]
    feed_dict = get_feed_dict(sample_batch)
    _variables = sess.run(variables, feed_dict=feed_dict)
    for _variable, variable in zip(_variables, variables):
        if hasattr(variable, 'name'):
            print(variable.name, ':', _variable)
        else:
            print(_variable.shape)
            
    sess.close()    
    

# run model 

## init

In [9]:
def get_feed_dict(batch, mode='train'):
    bow = np.array([instance.bow for instance in batch]).astype(np.float32)
    keep_prob = config.keep_prob if mode == 'train' else 1.0
    feed_dict = {
                t_variables['bow']: bow, 
                t_variables['keep_prob']: keep_prob
    }
    return  feed_dict

In [52]:
tf.reset_default_graph()

t_variables = {}
t_variables['bow'] = tf.placeholder(tf.float32, [None, config.dim_bow])
t_variables['keep_prob'] = tf.placeholder(tf.float32)

# tree_idxs = {0:[1, 2], 
#               1:[10, 11], 2:[20, 21, 22], 
#               10: [100, 101], 11: [110, 111, 112], 20: [200, 201], 21: [210, 211], 22:[220, 221, 222]
#               }
# sticks_topic = np.tile(np.arange(start=1, stop=21)[:, None, None] / 20, [1, 2, 1])
# sticks_branch = np.tile(np.arange(start=1, stop=21)[:, None, None] / 20, [1, 2, 1])

tree_idxs = {0:[1, 2], 
                      1:[10, 11], 2:[20, 21, 22]}
sticks_topic = np.tile(np.arange(start=1, stop=9)[:, None, None] / 10, [1, 2, 1])
sticks_branch = np.tile(np.arange(start=1, stop=9)[:, None, None] / 10, [1, 2, 1])

topic_idxs = [0] + [idx for child_idxs in tree_idxs.values() for idx in child_idxs]

## doubly rnn

In [53]:
class DoublyRNNCell:
    def __init__(self, dim_hidden, output_layer=None):
        self.dim_hidden = dim_hidden
        
        self.a_input_layer=tf.layers.Dense(units=dim_hidden, activation=tf.nn.tanh, name='ancestral')
        self.f_input_layer=tf.layers.Dense(units=dim_hidden, activation=tf.nn.tanh, name='fraternal')

        self.a_output_layer=tf.layers.Dense(units=dim_hidden, use_bias=False, name='ancestral')
        self.f_output_layer=tf.layers.Dense(units=dim_hidden, use_bias=False, name='fraternal')
        
        self.output_layer=output_layer
        
    def __call__(self, state_ancestral, state_fraternal, reuse=True):
        with tf.variable_scope('doubly_rnn/input', reuse=reuse):
            state_ancestral = self.a_input_layer(state_ancestral)
            state_fraternal = self.f_input_layer(state_fraternal)

        with tf.variable_scope('doubly_rnn/output', reuse=reuse):
            output = tf.nn.tanh(self.a_output_layer(state_ancestral) + self.f_output_layer(state_fraternal))
            if self.output_layer is not None: output = self.output_layer(output)
            
        return output, state_ancestral, state_fraternal
    
    def get_initial_state(self, name):
        initial_state = tf.get_variable(name, [1, self.dim_hidden], dtype=tf.float32)
        return initial_state

In [54]:
def doubly_rnn(dim_hidden, tree_idxs, initial_state_parent=None, initial_state_sibling=None, output_layer=None):
    outputs, states_parent = {}, {}
    
    doubly_rnn_cell = DoublyRNNCell(dim_hidden, output_layer)
    
    if initial_state_parent is None: initial_state_parent = doubly_rnn_cell.get_initial_state('parent')
    if initial_state_sibling is None: initial_state_sibling = doubly_rnn_cell.get_initial_state('sibling')
    output, state_parent, _ = doubly_rnn_cell(initial_state_parent, initial_state_sibling, reuse=False)
    outputs[0], states_parent[0] = output, state_parent
    
    for parent_idx, child_idxs in tree_idxs.items():
        state_parent = states_parent[parent_idx]
        state_sibling = initial_state_sibling
        for child_idx in child_idxs:
            output, state_child, state_sibling = doubly_rnn_cell(state_parent, state_sibling)
            outputs[child_idx], states_parent[child_idx] = output, state_child

    return outputs

## stick break

In [55]:
def sticks_to_prob(sticks_):
    sticks = tf.concat([sticks_[:, :-1], tf.ones([tf.shape(sticks_)[0], 1], dtype=tf.float32)], 1)

    rest_sticks_ = tf.transpose(tf.scan(fn=lambda a, x: a*(1.-x), elems=tf.transpose(sticks), initializer=tf.ones(tf.shape(sticks)[0], dtype=tf.float32)))
    rest_sticks = tf.concat([tf.ones([tf.shape(sticks)[0], 1], dtype=tf.float32), rest_sticks_[:, :-1]], 1)

    prob = tf.multiply(sticks, rest_sticks)

    return prob

In [56]:
def hierarchical_sbp(sticks_):
    sticks = tf.concat([sticks_[:, :-1], tf.ones([tf.shape(sticks_)[0], 1], dtype=tf.float32)], 1)

    rest_sticks_ = tf.transpose(tf.scan(fn=lambda a, x: a*(1.-x), elems=tf.transpose(sticks), initializer=tf.ones(tf.shape(sticks)[0], dtype=tf.float32)))
    rest_sticks = tf.concat([tf.ones([tf.shape(sticks)[0], 1], dtype=tf.float32), rest_sticks_[:, :-1]], 1)

    prob = tf.multiply(sticks, rest_sticks)

    return prob

In [57]:
tree_sticks_topic = {topic_idx: tf.constant(stick_topic, dtype=tf.float32) for topic_idx, stick_topic in zip(topic_idxs, sticks_topic)}
tree_sticks_branch = {topic_idx: tf.constant(stick_branch, dtype=tf.float32) for topic_idx, stick_branch in zip(topic_idxs, sticks_branch)}

In [105]:
tree_prob_topic = {}
rest_topics = {}

# calculate topic probability and save
stick_topic = tree_sticks_topic[0]
tree_prob_topic[0] = stick_topic
rest_topics[0] = 1.-stick_topic
for parent_idx, child_idxs in tree_idxs.items():
    rest_topic = rest_topics[parent_idx]
    rest_branch = 1.
    for child_idx in child_idxs:
        # calculate topic probability
        if child_idx == child_idxs[-1]: # last child
            prob_branch = rest_branch # phi
        else:
            stick_branch = tree_sticks_branch[child_idx] # psi
            prob_branch = stick_branch * rest_branch # phi
            
        if not child_idx in tree_idxs: # leaf childs
            prob_topic = prob_branch * rest_topic # pi
        else:
            stick_topic = tree_sticks_topic[child_idx] # upsilon
            prob_topic = stick_topic * prob_branch * rest_topic # pi
        
        # save topic probability and update rest stick length
        tree_prob_topic[child_idx] = prob_topic
        rest_branch = (1.- stick_branch) * rest_branch
        rest_topics[child_idx] = (1.-stick_topic)*prob_branch * rest_topic

In [106]:
prob_topic = tf.concat(list(tree_prob_topic.values()), 1)

In [108]:
prob_topic.eval(session=tf.Session())

array([[0.1       , 0.036     , 0.216     , 0.0576    , 0.08640001,
        0.30240002, 0.14111999, 0.06048   ],
       [0.1       , 0.036     , 0.216     , 0.0576    , 0.08640001,
        0.30240002, 0.14111999, 0.06048   ]], dtype=float32)

In [109]:
tf.reduce_sum(prob_topic, 1).eval(session=tf.Session())

array([1., 1.], dtype=float32)

In [110]:
tree_idxs

{0: [1, 2], 1: [10, 11], 2: [20, 21, 22]}

In [99]:
#1
0.2*0.2*(1-0.1)*1

0.03600000000000001

In [100]:
#2
0.3*(1-0.2)*(1-0.1)*1

0.216

In [102]:
#10
0.4*(1.-0.2)*0.2*(1-0.1)*1

0.05760000000000001

In [103]:
#11
(1-0.4)*(1.-0.2)*0.2*(1-0.1)*1

0.0864

In [104]:
#20
0.6*(1.-0.3)*(1-0.2)*(1-0.1)*1

0.3024

## build model

In [183]:
# encode bow
with tf.variable_scope('topic/enc', reuse=False):
    hidden_bow_ = tf.layers.Dense(units=config.dim_hidden_bow, activation=tf.nn.relu, name='hidden_bow')(t_variables['bow'])
    hidden_bow = tf.layers.Dropout(t_variables['keep_prob'])(hidden_bow_)
    means_bow = tf.layers.Dense(units=config.dim_latent_bow, name='mean_bow')(hidden_bow)
    logvars_bow = tf.layers.Dense(units=config.dim_latent_bow, kernel_initializer=tf.constant_initializer(0), bias_initializer=tf.constant_initializer(0), name='logvar_bow')(hidden_bow)
    latents_bow = sample_latents(means_bow, logvars_bow) # sample latent vectors
    prob_layer = lambda h: tf.nn.sigmoid(tf.matmul(latents_bow, h, transpose_b=True))
    
    tree_sticks_topic = doubly_rnn(config.dim_latent_bow, tree_idxs, output_layer=prob_layer)
    sticks_topic = tf.concat(list(tree_sticks_topic.values()), 1)

    prob_topic = sticks_to_prob(sticks_topic)

# decode bow
with tf.variable_scope('shared', reuse=False):
    embeddings = tf.get_variable('emb', [config.n_vocab, config.dim_emb], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # embeddings of vocab

bow_embeddings = tf.nn.embedding_lookup(embeddings, bow_idxs) # embeddings of each bow features

with tf.variable_scope('topic/dec', reuse=False):
    tree_topic_embeddings = doubly_rnn(config.dim_emb, tree_idxs)
    topic_embeddings = tf.concat(list(tree_topic_embeddings.values()), 0)
    
    topic_bow = tf.nn.softmax(tf.matmul(topic_embeddings, bow_embeddings, transpose_b=True), 1) # bow vectors for each topic
    logits_bow = tf_log(tf.matmul(prob_topic, topic_bow)) # predicted bow distribution

In [184]:
tree_sticks_topic

{0: <tf.Tensor 'topic/enc/doubly_rnn/output/Sigmoid:0' shape=(?, 1) dtype=float32>,
 1: <tf.Tensor 'topic/enc/doubly_rnn/output_1/Sigmoid:0' shape=(?, 1) dtype=float32>,
 2: <tf.Tensor 'topic/enc/doubly_rnn/output_2/Sigmoid:0' shape=(?, 1) dtype=float32>,
 10: <tf.Tensor 'topic/enc/doubly_rnn/output_3/Sigmoid:0' shape=(?, 1) dtype=float32>,
 11: <tf.Tensor 'topic/enc/doubly_rnn/output_4/Sigmoid:0' shape=(?, 1) dtype=float32>,
 20: <tf.Tensor 'topic/enc/doubly_rnn/output_5/Sigmoid:0' shape=(?, 1) dtype=float32>,
 21: <tf.Tensor 'topic/enc/doubly_rnn/output_6/Sigmoid:0' shape=(?, 1) dtype=float32>,
 22: <tf.Tensor 'topic/enc/doubly_rnn/output_7/Sigmoid:0' shape=(?, 1) dtype=float32>}

In [116]:
tree_topic_embeddings

{0: <tf.Tensor 'topic/dec/doubly_rnn/output/Tanh:0' shape=(1, 256) dtype=float32>,
 1: <tf.Tensor 'topic/dec/doubly_rnn/output_1/Tanh:0' shape=(1, 256) dtype=float32>,
 2: <tf.Tensor 'topic/dec/doubly_rnn/output_2/Tanh:0' shape=(1, 256) dtype=float32>,
 10: <tf.Tensor 'topic/dec/doubly_rnn/output_3/Tanh:0' shape=(1, 256) dtype=float32>,
 11: <tf.Tensor 'topic/dec/doubly_rnn/output_4/Tanh:0' shape=(1, 256) dtype=float32>,
 20: <tf.Tensor 'topic/dec/doubly_rnn/output_5/Tanh:0' shape=(1, 256) dtype=float32>,
 21: <tf.Tensor 'topic/dec/doubly_rnn/output_6/Tanh:0' shape=(1, 256) dtype=float32>,
 22: <tf.Tensor 'topic/dec/doubly_rnn/output_7/Tanh:0' shape=(1, 256) dtype=float32>,
 100: <tf.Tensor 'topic/dec/doubly_rnn/output_8/Tanh:0' shape=(1, 256) dtype=float32>,
 101: <tf.Tensor 'topic/dec/doubly_rnn/output_9/Tanh:0' shape=(1, 256) dtype=float32>,
 110: <tf.Tensor 'topic/dec/doubly_rnn/output_10/Tanh:0' shape=(1, 256) dtype=float32>,
 111: <tf.Tensor 'topic/dec/doubly_rnn/output_11/Tanh:0

In [100]:
sticks_topic

<tf.Tensor 'concat:0' shape=(?, 20) dtype=float32>

In [117]:
check_shape([sticks_topic, prob_topic, logits_bow])

topic/enc/concat:0 : (53, 20)
topic/enc/Mul_2:0 : (53, 20)
Log:0 : (53, 1035)


## define loss

In [32]:
# define losses
topic_losses_recon = -tf.reduce_sum(tf.multiply(t_variables['bow'], logits_bow), 1)
topic_loss_recon = tf.reduce_mean(topic_losses_recon) # negative log likelihood of each words

topic_loss_kl = compute_kl_loss(means_bow, logvars_bow) # KL divergence b/w latent dist & gaussian std

topic_bow_norm = topic_bow / tf.norm(topic_bow, axis=1, keepdims=True)
topic_dots = tf.clip_by_value(tf.matmul(topic_bow_norm, tf.transpose(topic_bow_norm)), -1., 1.)
topic_loss_reg = tf.reduce_mean(tf.square(topic_dots - tf.eye(config.n_topic)))

global_step = tf.Variable(0, name='global_step',trainable=False)

loss = topic_loss_recon + topic_loss_kl + config.reg * topic_loss_reg

# define optimizer
if config.opt == 'Adam':
    optimizer = tf.train.AdamOptimizer(config.lr)
elif config.opt == 'Adagrad':
    optimizer = tf.train.AdagradOptimizer(config.lr)

grad_vars = optimizer.compute_gradients(loss)
clipped_grad_vars = [(tf.clip_by_value(grad, -config.grad_clip, config.grad_clip), var) for grad, var in grad_vars]
opt = optimizer.apply_gradients(clipped_grad_vars, global_step=global_step)

# monitor
n_bow = tf.reduce_sum(t_variables['bow'], 1)
topic_ppls = tf.divide(topic_losses_recon, tf.maximum(1e-5, n_bow))
topics_freq_bow_indices = tf.nn.top_k(topic_bow, 10, name='topic_freq_bow').indices

In [51]:
def get_loss(sess, batches):
    losses = []
    ppl_list = []
    for ct, batch in batches:
        feed_dict = get_feed_dict(batch, mode='test')
        loss_batch, topic_loss_recon_batch, topic_loss_kl_batch, topic_loss_reg_batch, ppls_batch \
            = sess.run([loss, topic_loss_recon, topic_loss_kl, topic_loss_reg, topic_ppls], feed_dict = feed_dict)
        losses += [[loss_batch, topic_loss_recon_batch, topic_loss_kl_batch, topic_loss_reg_batch]]
        ppl_list += list(ppls_batch)
    loss_mean, topic_loss_recon_mean, topic_loss_kl_mean, topic_loss_reg_mean = np.mean(losses, 0)
    ppl_mean = np.exp(np.mean(ppl_list))
    return loss_mean, topic_loss_recon_mean, topic_loss_kl_mean, topic_loss_reg_mean, ppl_mean

def print_topic_sample():
    topics_freq_bow_idxs = bow_idxs[sess.run(topics_freq_bow_indices)]
    assert len(topics_freq_bow_idxs) == len(topic_idxs)
    for topic_idx, topic_freq_bow_idxs in zip(topic_idxs, topics_freq_bow_idxs):
        print(topic_idx, ' BOW:', ' '.join([idx_to_word[idx] for idx in topic_freq_bow_idxs]))

In [34]:
if 'sess' in globals(): sess.close()
sess = tf.Session()
sess.run(tf.global_variables_initializer())

losses_train = []
ppls_train = []
loss_min = np.inf
beta_eval = 1.
epoch = 0
train_batches = get_batches(instances_train, config.batch_size, iterator=True)
saver = tf.train.Saver(max_to_keep=10)

log_df = pd.DataFrame(columns=pd.MultiIndex.from_tuples(
                    list(zip(*[['','','','TRAIN:','TM','','','','VALID:','TM','','',''],
                            ['Time','Ep','Ct','LOSS','PPL','NLL','KL','REG','LOSS','PPL','NLL','KL','REG']]))))

In [53]:
if len(log_df) == 0:
    cmd_rm = 'rm -r %s' % config.modeldir
    res = subprocess.call(cmd_rm.split())

    cmd_mk = 'mkdir %s' % config.modeldir
    res = subprocess.call(cmd_mk.split())

time_start = time.time()
while epoch < config.epochs:
    for ct, batch in train_batches:
        feed_dict = get_feed_dict(batch)

        _, loss_batch, topic_loss_recon_batch, topic_loss_kl_batch, topic_loss_reg_batch, ppls_batch = \
        sess.run([opt, loss, topic_loss_recon, topic_loss_kl, topic_loss_reg, topic_ppls], feed_dict = feed_dict)
            
        losses_train += [[loss_batch, topic_loss_recon_batch, topic_loss_kl_batch, topic_loss_reg_batch]]
        ppls_train += list(ppls_batch)

        if ct%config.log_period==0:
            loss_train, topic_loss_recon_train, topic_loss_kl_train, topic_loss_reg_train = np.mean(losses_train, 0)
            ppl_train = np.exp(np.mean(ppls_train))
            loss_dev, topic_loss_recon_dev, topic_loss_kl_dev, topic_loss_reg_dev, ppl_dev = get_loss(sess, dev_batches)
            global_step_log = sess.run(tf.train.get_global_step())
            
#             if loss_dev < loss_min:
#                 loss_min = loss_dev
#                 saver.save(sess, config.modelpath, global_step=global_step_log)

            clear_output()
    
            time_log = int(time.time() - time_start)
            log_series = pd.Series([time_log, epoch, ct, \
                    '%.2f'%loss_train, '%.0f'%ppl_train, '%.2f'%topic_loss_recon_train, '%.2f'%topic_loss_kl_train, '%.2f'%topic_loss_reg_train, \
                    '%.2f'%loss_dev, '%.0f'%ppl_dev, '%.2f'%topic_loss_recon_dev, '%.2f'%topic_loss_kl_dev, '%.2f'%topic_loss_reg_dev],
                    index=log_df.columns)
            log_df.loc[global_step_log] = log_series
            display(log_df)
            
            # visualize topic
            print_topic_sample()

            time_start = time.time()
            
    epoch += 1
    train_batches = get_batches(instances_train, config.batch_size, iterator=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TRAIN:,TM,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,VALID:,TM,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0
Unnamed: 0_level_1,Time,Ep,Ct,LOSS,PPL,NLL,KL,REG,LOSS,PPL.1,NLL,KL,REG
1,1,0,0,136.85,1037,135.42,0.49,0.95,116.98,1007,115.73,0.3,0.95
1001,18,0,1000,114.02,571,113.51,0.22,0.29,105.47,524,104.93,0.29,0.25
2001,18,0,2000,113.31,552,112.68,0.37,0.26,105.08,505,104.27,0.59,0.22
2276,5,1,0,113.14,547,112.49,0.4,0.26,105.35,516,104.58,0.55,0.22
3276,18,1,1000,112.81,537,112.09,0.47,0.24,104.87,500,104.13,0.53,0.22
4276,18,1,2000,112.77,532,112.01,0.52,0.24,104.77,497,104.01,0.56,0.21
4551,5,2,0,112.69,530,111.92,0.53,0.23,104.92,498,104.15,0.56,0.2
5551,18,2,1000,112.57,527,111.78,0.56,0.23,104.72,494,103.92,0.6,0.2
6551,18,2,2000,112.46,523,111.66,0.58,0.22,104.51,487,103.69,0.66,0.16
6826,5,3,0,112.44,522,111.64,0.59,0.22,104.5,485,103.69,0.66,0.15


0  BOW: ! cover love color price bottom nice bought pro 'm
1  BOW: carry pockets strap shoulder back work handle straps ! quality
2  BOW: pocket & room ; ipad power mouse small perfect inside
10  BOW: quality zipper inside material design made inch size big find
11  BOW: sleeve & ; protection neoprene inch snug air pro inside
20  BOW: quality zipper inside material design made inch size big find
21  BOW: sleeve & ; protection neoprene inch snug air pro inside
22  BOW: nice inside ; sleeve & zipper inch protection air material
100  BOW: chemical tab charge print odor display suppose kinda happier packaging
101  BOW: ! nice tab bought perfect & protecting perfectly chemical hard
110  BOW: chemical tab charge print odor display suppose kinda happier packaging
111  BOW: ! nice tab bought perfect & protecting perfectly chemical hard
112  BOW: nice ! tab perfect bought perfectly & - stars small
200  BOW: chemical tab charge print odor display suppose kinda happier packaging
201  BOW: ! nice 

KeyboardInterrupt: 

# confirm variables

In [None]:
debug_value([topic_losses_recon, n_bow])

In [None]:
debug_value([tf.exp(-tf.divide(topic_losses_recon, n_bow))])

### test

In [12]:
debug_shape([bow, hidden_bow, latents_bow, prob_topic, bow_embeddings, topic_embeddings, topic_bow, prob_bow])

Placeholder:0 : (32, 4022)
topic/enc/dropout/cond/Merge:0 : (32, 256)
topic/enc/add:0 : (32, 32)
topic/enc/prob/Softmax:0 : (32, 50)
embedding_lookup:0 : (4022, 256)
topic/dec/topic_emb:0 : (50, 256)
topic/dec/Softmax:0 : (50, 4022)
topic/dec/Log:0 : (32, 4022)


In [27]:
debug_shape([topic_losses_recon, topic_loss_recon, n_bow, ppls, topic_embeddings_norm, tf.expand_dims(topic_angles_mean, -1), topic_angles_vars])

Sum:0 : (32,)
Neg:0 : ()
Sum_2:0 : (32,)
Neg_1:0 : (32,)
truediv_1:0 : (50, 256)
ExpandDims_1:0 : (1,)
Mean_3:0 : ()


In [14]:
debug_value([tf.reduce_sum(tf.square(topic_embeddings_norm), 1)], return_value=True)[0]

array([1.        , 0.99999994, 0.9999999 , 1.        , 1.        ,
       1.0000001 , 0.9999999 , 1.0000001 , 1.0000001 , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.9999999 , 0.9999999 , 0.99999994, 1.        ,
       1.        , 0.9999999 , 1.0000001 , 1.        , 1.        ,
       1.        , 0.99999994, 1.        , 0.99999994, 0.99999994,
       1.0000001 , 0.9999999 , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.9999999 , 1.        ,
       0.9999999 , 1.        , 1.        , 1.        , 0.99999994,
       1.0000001 , 1.        , 1.        , 0.99999994, 1.        ],
      dtype=float32)

In [15]:
debug_value([tf.reduce_sum(prob_topic, -1), tf.reduce_sum(topic_bow, -1), tf.reduce_sum(tf.exp(prob_bow), 1)])

Sum_4:0 : [1.0000001  1.         1.0000001  0.99999994 1.         1.
 1.         0.99999994 0.99999994 1.         1.         1.
 0.99999994 1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         0.9999999
 1.         0.99999994 1.         1.0000001  0.99999994 1.
 1.         1.        ]
Sum_5:0 : [1.         0.99999994 1.         0.99999994 0.99999994 1.
 1.         0.9999998  1.         1.         1.         1.
 1.         1.         1.         1.0000001  0.99999994 1.
 1.         0.99999994 0.9999999  0.99999994 1.0000001  1.
 1.         1.         1.         0.99999994 0.9999999  1.
 1.         1.         0.99999994 1.         0.99999994 1.
 1.         0.9999999  1.         1.         1.         1.
 0.99999994 1.         0.99999994 0.99999994 0.99999994 0.99999994
 1.         1.        ]
Sum_6:0 : [1.         1.         1.0000001  0.99999994 1.         1.
 0.9999999  0.99999994 0.9999999  1.         1.0000001  1.
 0.9999999  1.        

In [16]:
sigma_bow = tf.exp(0.5 * logvars_bow)
dist_bow = tfd.Normal(means_bow, sigma_bow)
dist_std = tfd.Normal(0., 1.)
topic_loss_kl_tmp = tf.reduce_mean(tf.reduce_sum(tfd.kl_divergence(dist_bow, dist_std), 1))

In [17]:
debug_value([topic_loss_recon, topic_loss_kl, topic_loss_kl_tmp])

Neg:0 : 405.38312
Mean_1:0 : 0.32056683
Mean_4:0 : 0.32056683


In [110]:
_logvars, _means, _kl_losses, _latents, _output_logits = sess.run([logvars, means, kl_losses, latents, output_logits], feed_dict=feed_dict)


In [111]:
_logvars.shape, _means.shape, _kl_losses.shape, _latents.shape

((32, 32), (32, 32), (32,), (32, 32))

In [112]:
_output_logits

array([[[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]],

       [[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]],

       [[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]],

       ...,

       [[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]],

       [[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]],

       [[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]]], dtype=float32)

In [109]:
_output_logits, _dec_target_idxs_do, _dec_mask_tokens_do, _recon_loss, _kl_losses, _ = sess.run([output_logits, dec_target_idxs_do, dec_mask_tokens_do, recon_loss, kl_losses, opt], feed_dict=feed_dict)


NameError: name 'dec_target_idxs_do' is not defined

In [44]:
tf.reduce_max(output_logits, 2).eval(session=sess, feed_dict=feed_dict).shape

(120, 46)

In [31]:
_output_logits.shape, _dec_target_idxs_do.shape, _dec_mask_tokens_do.shape

((120, 46, 20000), (120, 46), (120, 46))

In [32]:
_logits = np.exp(_output_logits) / np.sum(np.exp(_output_logits), 2)[:, :, None]

In [33]:
_idxs = _dec_target_idxs_do

In [35]:
_losses = np.array([[-np.log(_logits[i, j, _idxs[i, j]]) for j in range(_idxs.shape[1])] for i in range(_idxs.shape[0])]) * _dec_mask_tokens_do

In [36]:
np.sum(_losses)/np.sum(_dec_mask_tokens_do)

9.903732

In [37]:
_recon_loss

9.903732

In [38]:
_kl_losses.shape

(120,)