In [1]:
import tensorflow as tf
import numpy as np
from tqdm import tqdm

In [2]:
maxlen = 20
max_vocab = 20000

In [3]:
word2idx = tf.keras.datasets.imdb.get_word_index()
word2idx = {k: (v + 4) for k, v in word2idx.items()}
word2idx['<PAD>'] = 0
word2idx['<START>'] = 1
word2idx['<UNK>'] = 2
word2idx['<END>'] = 3
idx2word = {i: w for w, i in word2idx.items()}

In [4]:
(train_X, _), (test_X, _) = tf.contrib.keras.datasets.imdb.load_data(num_words = max_vocab, index_from= 4)

In [5]:
X = np.concatenate([train_X, test_X])

In [6]:
X = np.concatenate((tf.keras.preprocessing.sequence.pad_sequences(
                            X, maxlen, truncating='post', padding='post'),
                        tf.keras.preprocessing.sequence.pad_sequences(
                            X, maxlen, truncating='pre', padding='post')))

In [7]:
Y_input = X[:]
Y_output = np.concatenate([X[:, 1:], np.full([X.shape[0], 1], word2idx['<END>'])], 1)
X = X[:, 1:]

In [8]:
X.shape, Y_input.shape, Y_output.shape

((100000, 19), (100000, 20), (100000, 20))

In [9]:
from tensorflow.python.util import nest
from tensorflow.contrib.seq2seq.python.ops.beam_search_decoder import _beam_search_step
import tensorflow as tf
import numpy as np


class ModifiedBasicDecoder(tf.contrib.seq2seq.BasicDecoder):
    def __init__(self, cell, helper, initial_state, concat_z, output_layer=None):
        super().__init__(cell, helper, initial_state, output_layer = output_layer)
        self.z = concat_z

    def initialize(self, name=None):
        (finished, first_inputs, initial_state) =  self._helper.initialize() + (self._initial_state,)
        first_inputs = tf.concat([first_inputs, self.z], -1)
        return (finished, first_inputs, initial_state)

    def step(self, time, inputs, state, name=None):
        with tf.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
            cell_outputs, cell_state = self._cell(inputs, state)
        print(self._output_layer)
        if self._output_layer is not None:
            cell_outputs = self._output_layer(cell_outputs)
        print(cell_outputs)
        sample_ids = self._helper.sample(
            time=time, outputs=cell_outputs, state=cell_state)
        (finished, next_inputs, next_state) = self._helper.next_inputs(
            time=time,
            outputs=cell_outputs,
            state=cell_state,
            sample_ids=sample_ids)
        outputs = tf.contrib.seq2seq.BasicDecoderOutput(cell_outputs, sample_ids)
        next_inputs = tf.concat([next_inputs, self.z], -1)
        return (outputs, next_state, next_inputs, finished)


class ModifiedBeamSearchDecoder(tf.contrib.seq2seq.BeamSearchDecoder):
    def __init__(self,
                 cell,
                 embedding,
                 start_tokens,
                 end_token,
                 initial_state,
                 beam_width,
                 concat_z,
                 output_layer=None,
                 length_penalty_weight=0.0):
        super().__init__(cell, embedding, start_tokens, end_token, initial_state, beam_width, output_layer, length_penalty_weight)
        self.z = concat_z

    def initialize(self, name=None):
        finished, start_inputs = self._finished, self._start_inputs

        start_inputs = tf.concat([start_inputs, self.z], -1)

        log_probs = tf.one_hot(  # shape(batch_sz, beam_sz)
            tf.zeros([self._batch_size], dtype=tf.int32),
            depth=self._beam_width,
            on_value=0.0,
            off_value=-np.Inf,
            dtype=nest.flatten(self._initial_cell_state)[0].dtype)

        initial_state = tf.contrib.seq2seq.BeamSearchDecoderState(
            cell_state=self._initial_cell_state,
            log_probs=log_probs,
            finished=finished,
            lengths=tf.zeros(
                [self._batch_size, self._beam_width], dtype=tf.int64),
            accumulated_attention_probs=())

        return (finished, start_inputs, initial_state)

    def step(self, time, inputs, state, name=None):
        batch_size = self._batch_size
        beam_width = self._beam_width
        end_token = self._end_token
        length_penalty_weight = self._length_penalty_weight

        with tf.name_scope(name, "BeamSearchDecoderStep", (time, inputs, state)):
            cell_state = state.cell_state
            inputs = nest.map_structure(
                lambda inp: self._merge_batch_beams(inp, s=inp.shape[2:]), inputs)
            cell_state = nest.map_structure(self._maybe_merge_batch_beams, cell_state,
                                            self._cell.state_size)
            cell_outputs, next_cell_state = self._cell(inputs, cell_state)
            cell_outputs = nest.map_structure(
                lambda out: self._split_batch_beams(out, out.shape[1:]), cell_outputs)
            next_cell_state = nest.map_structure(
                self._maybe_split_batch_beams, next_cell_state, self._cell.state_size)
            print(self._output_layer)
            if self._output_layer is not None:
                cell_outputs = self._output_layer(cell_outputs)

            beam_search_output, beam_search_state = _beam_search_step(
                time=time,
                logits=cell_outputs,
                next_cell_state=next_cell_state,
                beam_state=state,
                batch_size=batch_size,
                beam_width=beam_width,
                end_token=end_token,
                length_penalty_weight=length_penalty_weight,
                coverage_penalty_weight = 0.0)

            finished = beam_search_state.finished
            sample_ids = beam_search_output.predicted_ids
            next_inputs = tf.cond(
                tf.reduce_all(finished), lambda: self._start_inputs,
                lambda: self._embedding_fn(sample_ids))

            next_inputs = tf.concat([next_inputs, self.z], -1)

        return (beam_search_output, beam_search_state, next_inputs, finished)

In [10]:
class VAE:
    def __init__(self, size_layer, num_layers, embedded_size, dict_size, learning_rate,
                beam_size = 15, latent_size = 16, anneal_max = 1.0, anneal_bias = 6000):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.GRUCell(size_layer, reuse=reuse)
        
        def kl_w_fn(global_step):
            return anneal_max * tf.sigmoid((10 / anneal_bias) * \
                                           (tf.to_float(global_step) - tf.constant(anneal_bias / 2)))
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y_input = tf.placeholder(tf.int32, [None, None])
        self.Y_output = tf.placeholder(tf.int32, [None, None])
        
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y_input, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        main = tf.strided_slice(self.Y_input, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], word2idx['<START>']), main], 1)
        
        embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        x = tf.nn.embedding_lookup(embeddings, self.X)
        
        _, encoder_state = tf.nn.dynamic_rnn(
            cell = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)]), 
            inputs = x,
            sequence_length = self.X_seq_len,
            dtype = tf.float32)
        encoder_state = encoder_state[-1]
        
        z_mean = tf.layers.dense(encoder_state, latent_size)
        z_var = tf.layers.dense(encoder_state, latent_size)
        
        posterior = tf.contrib.distributions.MultivariateNormalDiag(z_mean, z_var)
        prior = tf.contrib.distributions.MultivariateNormalDiag(tf.zeros_like(z_mean),
                                                            tf.ones_like(z_var))
        z = posterior.sample()
        init_state = tf.layers.dense(z, size_layer, tf.nn.elu)
        print(dict_size)
        output_proj = tf.layers.Dense(dict_size)
        print(output_proj)
        decoder_cell = cells()
        
        helper = tf.contrib.seq2seq.TrainingHelper(
                inputs = tf.nn.embedding_lookup(embeddings, decoder_input),
                sequence_length = self.Y_seq_len)
        
        decoder = ModifiedBasicDecoder(
                cell = decoder_cell,
                helper = helper,
                initial_state = init_state,
                output_layer = output_proj,
                concat_z = z)
        
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_seq_len))
        
        self.training_logits = decoder_output.rnn_output
        out_dist = tf.distributions.Categorical(self.training_logits)
        global_step = tf.Variable(0, trainable=False)
        self.out_dist = out_dist.log_prob(self.Y_output)
        nll_loss = -tf.reduce_sum(self.out_dist)
        self.nll_loss = nll_loss
        kl_w = kl_w_fn(global_step)
        self.kl_w = kl_w
        kl_loss = tf.reduce_sum(tf.distributions.kl_divergence(posterior, prior))
        self.kl_loss = kl_loss
        self.cost = nll_loss + kl_w * kl_loss
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost, 
                                                                        global_step = global_step)
        
        tiled_z = tf.tile(tf.expand_dims(z, 1), [1, beam_size, 1])
        decoder = ModifiedBeamSearchDecoder(
                cell = decoder_cell,
                embedding = embeddings,
                start_tokens = tf.tile(tf.constant([word2idx['<START>']], tf.int32),
                                       [batch_size]),
                end_token = word2idx['<END>'],
                initial_state = tf.contrib.seq2seq.tile_batch(init_state, beam_size),
                beam_width = beam_size,
                output_layer = output_proj,
                concat_z = tiled_z)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                maximum_iterations = tf.reduce_max(self.X_seq_len),
                decoder = decoder)
        self.predict_ids = decoder_output.predicted_ids[:, :, 0]

In [11]:
size_layer = 128
num_layers = 2
embedded_size = 128
learning_rate = 1e-3
batch_size = 16
epoch = 20

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = VAE(size_layer, num_layers, embedded_size, len(word2idx), learning_rate,
           latent_size = size_layer)
sess.run(tf.global_variables_initializer())

Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
88588
<tensorflow.python

In [13]:
def word_dropout(x):
    is_dropped = np.random.binomial(1, 0.5, x.shape)
    fn = np.vectorize(lambda x, k: word2idx['<UNK>'] if (
                      k and (x not in range(4))) else x)
    return fn(x, is_dropped)

def inf_inp(test_strs):
    x = [[word2idx.get(w, 2) for w in s.split()] for s in test_strs]
    x = tf.keras.preprocessing.sequence.pad_sequences(
        x, maxlen, truncating='post', padding='post')
    return x

In [14]:
test_strings = ['i love this film and i think it is one of the best films',
             'this movie is a waste of time and there is no point to watch it']

In [15]:
inf_inp(test_strings)

array([[ 14, 120,  15,  23,   6,  14, 105,  13,  10,  32,   8,   5, 119,
        109,   0,   0,   0,   0,   0,   0],
       [ 15,  21,  10,   7, 438,   8,  59,   6,  51,  10,  58, 214,   9,
        107,  13,   0,   0,   0,   0,   0]], dtype=int32)

In [16]:
y_input = word_dropout(Y_input[:2])
y_output = Y_output[:2]
x = X[:2]

In [17]:
sess.run([model.cost, model.out_dist, model.nll_loss, model.kl_loss, model.training_logits],
         feed_dict = {model.X: x, model.Y_input: y_input,
                      model.Y_output: y_output})

[459.78128,
 array([[-11.39117  , -11.388631 , -11.388212 , -11.389152 , -11.397253 ,
         -11.3795185, -11.378984 , -11.402033 , -11.378487 , -11.396691 ,
         -11.401232 , -11.407129 , -11.379465 , -11.389895 , -11.420394 ,
         -11.384628 , -11.385522 , -11.402591 , -11.404229 , -11.391129 ],
        [-11.390028 , -11.358953 , -11.391425 , -11.400146 , -11.411399 ,
         -11.379161 , -11.400605 , -11.399052 , -11.405903 , -11.384191 ,
         -11.404421 , -11.401221 , -11.382693 , -11.3972645, -11.398771 ,
         -11.386691 , -11.408179 , -11.412223 , -11.3879795, -11.381235 ]],
       dtype=float32),
 455.73785,
 604.13904,
 array([[[ 1.1628436e-02, -8.6111017e-05, -1.8049239e-03, ...,
          -3.8907779e-04, -1.8487602e-02,  1.6926985e-02],
         [ 1.6618120e-02, -3.0542999e-03, -5.1304079e-03, ...,
          -1.0260832e-03, -1.5437857e-02,  1.8255688e-02],
         [ 1.4503996e-02,  1.7808471e-04, -1.0922158e-02, ...,
           3.9447471e-03, -9.6551636e-0

In [18]:
r_aug = sess.run(model.predict_ids, feed_dict = {model.X: inf_inp(test_strings)})[0]
' '.join([idx2word[r] for r in r_aug])

"enthusast maisie casket casket casket casket casket evasive evasive beaux beaux glyn glyn pigeon 'f"

In [19]:
epoch = 10
batch_size = 32

In [20]:
for e in range(epoch):
    pbar = tqdm(
        range(0, len(X), batch_size), desc = 'minibatch loop')
    cost = 0
    for i in pbar:
        index = min(i + batch_size, len(X))
        y_input = word_dropout(Y_input[i: index])
        y_output = Y_output[i: index]
        x = X[i: index]
        c, _ = sess.run([model.cost, model.optimizer],
         feed_dict = {model.X: x, model.Y_input: y_input,
                      model.Y_output: y_output})
        cost += c
        pbar.set_postfix(cost = c)
    cost /= (len(X) / batch_size)
    r_aug = sess.run(model.predict_ids, feed_dict = {model.X: inf_inp(test_strings)})[0]
    print('epoch %d, average loss %f'%(e + 1, cost))
    print('real string: %s'%(test_strings[0]))
    print('augmented string: %s'%(' '.join([idx2word[r] for r in r_aug])))

minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.26it/s, cost=3.66e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 1, average loss 3930.666338
real string: i love this film and i think it is one of the best films
augmented string: this a movie you it a of movie you to it the of time this


minibatch loop: 100%|██████████| 3125/3125 [12:14<00:00,  4.26it/s, cost=3.7e+3] 
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 2, average loss 3849.483341
real string: i love this film and i think it is one of the best films
augmented string: to it i it a movie i it a movie i it it the of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.25it/s, cost=3.69e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 3, average loss 3807.555884
real string: i love this film and i think it is one of the best films
augmented string: i recommend movie i it the of i it i it i it the of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.24it/s, cost=3.64e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 4, average loss 3771.349666
real string: i love this film and i think it is one of the best films
augmented string: i saw movie i it the was i it i it it a of and


minibatch loop: 100%|██████████| 3125/3125 [12:14<00:00,  4.25it/s, cost=3.59e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 5, average loss 3741.504500
real string: i love this film and i think it is one of the best films
augmented string: i to it the i this was the movie i it have seen a of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.25it/s, cost=3.61e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 6, average loss 3718.190249
real string: i love this film and i think it is one of the best films
augmented string: to this i this is one the i ever it i it have the of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.25it/s, cost=3.58e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 7, average loss 3697.350334
real string: i love this film and i think it is one of the best films
augmented string: i to it it a movie i it to it i it a out 10


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.25it/s, cost=3.54e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 8, average loss 3680.469965
real string: i love this film and i think it is one of the best films
augmented string: i this was a movie the of film i recommend to it anyone the of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.26it/s, cost=3.58e+3]
minibatch loop:   0%|          | 0/3125 [00:00<?, ?it/s]

epoch 9, average loss 3664.853960
real string: i love this film and i think it is one of the best films
augmented string: that would been to it the is a movie i it have seen a of


minibatch loop: 100%|██████████| 3125/3125 [12:15<00:00,  4.25it/s, cost=3.54e+3]

epoch 10, average loss 3650.547580
real string: i love this film and i think it is one of the best films
augmented string: film i this is a movie is a movie i recommend to it anyone likes





In [21]:
r_aug = sess.run(model.predict_ids, feed_dict = {model.X: inf_inp(test_strings)})[0]
print('augmented string: %s'%(' '.join([idx2word[r] for r in r_aug])))

augmented string: there no but the was too but was i it have the and it a
