In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.contrib.rnn as tf_rnn
import json
from jamesbot.utils.padding import pad_sequences, pad_eos_indices
from tensorflow.contrib import seq2seq
from tensorflow.python.layers.core import Dense

In [3]:
word_dict = json.load(open('./data/movie_dialogs/word_dict.json'))
embeddings = json.load(open('./data/movie_dialogs/word_embeddings.json'))
train_data = json.load(open('./data/movie_dialogs/samples_train.json'))
corpus_filtered = json.load(open('./data/movie_dialogs/corpus_filtered.json'))

word_index = {val: key for (key, val) in word_dict.items()}

In [334]:
import nltk

def embed(text):
    return [word_dict.get(token, 2) for token in nltk.word_tokenize(str(text).lower())]

def decode(token_ids):
    return ' '.join([word_index[token_id] for token_id in token_ids[:-1]])

def discount_rewards(rewards, discount_factor=0.8):
    result = np.zeros(len(rewards))
    total_reward = 0.0
    
    for i in reversed(range(len(rewards))):
        total_reward = discount_factor*total_reward + rewards[i]
        result[i] = total_reward
    
    return result     

def gather_token_probabilities(decoder_outputs, token_ids):
    batch_size, n_tokens, _ = tf.unstack(tf.shape(decoder_outputs))

    gather_indices = tf.concat([
        tf.tile(tf.reshape(tf.range(batch_size), [batch_size, 1, 1]), [1, n_tokens, 1]),
        tf.tile(tf.reshape(tf.range(n_tokens), [1, n_tokens, 1]), [batch_size, 1, 1]),
        tf.expand_dims(token_ids, -1)
    ], -1)


    return tf.gather_nd(decoder_outputs, gather_indices)

In [418]:
class ChatbotAgent:
    
    def __init__(self, inputs, inputs_length, previous_outputs, previous_outputs_length, targets=None, targets_length=None, word_embeddings_shape=None, cell_size=300, train=False, decoder_sampling_p = 0.0):
        assert word_embeddings_shape is not None
        if train:
            assert targets is not None
            assert targets_length is not None
        
        self._inputs = inputs
        self._inputs_length = inputs_length
        self._previous_outputs = previous_outputs
        self._previous_outputs_length = previous_outputs_length
        
        self._targets = targets
        self._targets_length = targets_length
        
        self._word_embeddings_shape = list(word_embeddings_shape)
        self._cell_size = int(cell_size)
        self._train = bool(train)
        self._decoder_sampling_p = decoder_sampling_p
        
        self._embeddings_module()
        self._input_encoder_module()
        self._output_decoder_module()
        
        print('Embeddings:', self._word_embeddings_shape)
    
    def _base_rnn_cell(self):
        return tf_rnn.GRUCell(self._cell_size, activation=tf.nn.tanh)
    
    def _encoder(self, inputs, inputs_length, reuse=False):
        with tf.variable_scope('encoder', reuse = reuse):
            _birnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                self._base_rnn_cell(), self._base_rnn_cell(),
                inputs = inputs,
                sequence_length = inputs_length,
                dtype = tf.float32
            )
            
            _outputs, _state = tf.nn.dynamic_rnn(
                self._base_rnn_cell(),
                inputs = tf.concat(_birnn_outputs, -1),
                sequence_length = inputs_length,
                dtype = tf.float32
            )
            
            return _outputs, _state
    
    def _embeddings_module(self):
        with tf.name_scope('embeddings_module'):
            self._word_embeddings = tf.Variable(tf.random_normal(self._word_embeddings_shape, stddev=.3), trainable=True)
            
            self._inputs_embedded = tf.nn.embedding_lookup(self._word_embeddings, self._inputs)
            self._previous_outputs_embedded = tf.nn.embedding_lookup(self._word_embeddings, self._previous_outputs)
    
    def _input_encoder_module(self):
        with tf.name_scope('input_encoder'):
            self._input_encoder_outputs, self._input_encoder_state = self._encoder(self._inputs_embedded, self._inputs_length)
            
        with tf.name_scope('previous_output_encoder'):
            self._previous_output_encoder_outputs, self._previous_output_encoder_state = self._encoder(self._previous_outputs_embedded, self._previous_outputs_length, reuse=True)

    def _output_decoder_module(self):
        with tf.variable_scope('output_decoder_module'):
            batch_size, _ = tf.unstack(tf.shape(self._inputs))

            if self._train:
                # Training helper
                embedded_targets = tf.nn.embedding_lookup(self._word_embeddings, self._targets)
                helper = seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs = embedded_targets,
                    sequence_length = self._targets_length,
                    embedding = self._word_embeddings,
                    sampling_probability = self._decoder_sampling_p
                )  
            else:
                # Inference helper
                print('Inference')
                helper = seq2seq.GreedyEmbeddingHelper(
                    embedding = self._word_embeddings,
                    start_tokens = tf.tile([0], [batch_size]),
                    end_token = 1
                )

            cell, initial_state = self._decoder_cell()            
            decoder = seq2seq.BasicDecoder(
                cell = cell,
                helper = helper,
                initial_state = initial_state,
                output_layer = Dense(self._word_embeddings_shape[0])
            )
            
            decoder_outputs, _, _ = seq2seq.dynamic_decode(
                decoder = decoder,
                impute_finished = True,
                maximum_iterations = 51
            )
            self._decoder_logits = decoder_outputs.rnn_output
            self.decoder_token_p = tf.nn.softmax(self._decoder_logits, -1)
            self.decoder_token_ids = tf.argmax(self._decoder_logits, -1, output_type=tf.int32)
    
    def _decoder_cell(self):
        batch_size, _ = tf.unstack(tf.shape(self._inputs))
        
        # Project encoder state to lower dimensionality
        _encoder_state_proj = tf.layers.dense(
            tf.concat([self._input_encoder_state, self._previous_output_encoder_state], -1),
            self._cell_size
        )

        _attention_mechanism = seq2seq.BahdanauAttention(
            num_units = self._cell_size,
            memory = self._input_encoder_outputs,
            memory_sequence_length = self._inputs_length
        )
        
        _attentive_cell = seq2seq.AttentionWrapper(
            cell = self._base_rnn_cell(),
            attention_mechanism = _attention_mechanism,
            attention_layer_size = self._cell_size,
            initial_cell_state = _encoder_state_proj
        )
        
        stacked_cell = tf_rnn.MultiRNNCell([
            _attentive_cell,
            self._base_rnn_cell()
        ])        
        
        initial_state = tuple([
            _attentive_cell.zero_state(batch_size, dtype=tf.float32),
            _encoder_state_proj
        ])
        
        return stacked_cell, initial_state
    

class ChatbotAgentRunner:
    
    def __init__(self, embeddings_shape, checkpoint_path):
        self._graph = tf.Graph()
        self._sess = tf.InteractiveSession(graph=self._graph)
        
        with self._graph.as_default():
            self.inputs = tf.placeholder(tf.int32, [None, None])
            self.inputs_length = tf.placeholder(tf.int32, [None])
            self.previous_outputs = tf.placeholder(tf.int32, [None, None])
            self.previous_outputs_length = tf.placeholder(tf.int32, [None])
            
            self.agent = ChatbotAgent(
                self.inputs, self.inputs_length, self.previous_outputs, self.previous_outputs_length,
                word_embeddings_shape=embeddings_shape, train=False,
            )
            
            self._update_ops()
            
            saver = tf.train.Saver()
            saver.restore(self._sess, checkpoint_path)
            
    def _update_ops(self):
        self.responses = tf.placeholder(tf.int32, [None, None])
        self.responses_length = tf.placeholder(tf.int32, [None])
        self.rewards = tf.placeholder(tf.float32, [None])
        
        token_mask = tf.sequence_mask(self.responses_length, tf.reduce_max(self.responses_length), dtype=tf.float32)
        self._token_probabilities = tf.log(gather_token_probabilities(self.agent.decoder_token_p, self.responses)) * token_mask
        
        self.loss = -tf.reduce_mean(self._token_probabilities * tf.expand_dims(self.rewards, -1))
        
            
    def run(self, inputs, previous_outputs):
        ops = [
            self.agent.decoder_token_ids,
            self.agent.decoder_token_p,
            tf.concat([
                self.agent._input_encoder_state,
                self.agent._previous_output_encoder_state
            ], -1)
        ]
        
        fd = {
            self.inputs: inputs,
            self.inputs_length: list(map(len, inputs)),
            self.previous_outputs: previous_outputs,
            self.previous_outputs_length: list(map(len, previous_outputs))   
        }
        
        return self._sess.run(ops, feed_dict=fd)

class Trainer:
    pass
    
class CrossEntropyTrainer(Trainer):
    
    def __init__(self, embeddings_shape):
        self.inputs = tf.placeholder(tf.int32, [None, None])
        self.inputs_length = tf.placeholder(tf.int32, [None])
        self.previous_outputs = tf.placeholder(tf.int32, [None, None])
        self.previous_outputs_length = tf.placeholder(tf.int32, [None])
        
        self.targets = tf.placeholder(tf.int32, [None, None])
        self.targets_length = tf.placeholder(tf.int32, [None])
        self._padded_targets = pad_eos_indices(self.targets, self.targets_length)
        self._padded_targets_length = self.targets_length + 2
        
        self.agent = ChatbotAgent(
            self.inputs, self.inputs_length, self.previous_outputs, self.previous_outputs_length,
            self._padded_targets, self._padded_targets_length,
            word_embeddings_shape=embeddings_shape, train=True,
            decoder_sampling_p=0.0
        )
        
        self._build_loss()
    
    def embeddings_initializer(self):
        placeholder = tf.placeholder(tf.float32)
        init_op = self.agent._word_embeddings.assign(placeholder)
        return placeholder, init_op
        
    def _build_loss(self):
        loss_targets = pad_eos_indices(self.targets, self.targets_length, prepad=False)
        
        stepwise_ce = tf.nn.softmax_cross_entropy_with_logits(
            labels = tf.one_hot(loss_targets, self.agent._word_embeddings_shape[0]),
            logits = self.agent._decoder_logits
        )
        self.loss = tf.reduce_mean(stepwise_ce)
        
        self.accuracy = tf.reduce_mean(tf.reduce_mean(tf.cast(tf.equal(loss_targets, self.agent.decoder_token_ids), tf.float32), -1))
        
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy)
        
class A2CTrainer(Trainer):
    
    DULL_SET = [
        "I don't know what you're talking about.",
        "I don't know.",
        "You don't know.",
        "You know what I mean.",
        "I know what you mean.",
        "You know what I'm saying.",
        "You don't know anything.",
        "I'm not sure."
    ]
    
    LR = 1e-2
    
    def __init__(self, embeddings_shape, seq2seq_checkpoint):
        self._seq2seq_checkpoint = seq2seq_checkpoint
        self._dull_set_embedded = list(map(embed, self.DULL_SET))
        self._previous_state = None    
            
        self._agent1 = ChatbotAgentRunner(embeddings_shape, seq2seq_checkpoint)
        self._agent2 = ChatbotAgentRunner(embeddings_shape, seq2seq_checkpoint)
        
        with self._agent1._graph.as_default():
            self._agent1_train = tf.train.GradientDescentOptimizer(learning_rate=self.LR).minimize(self._agent1.loss)
            
        with self._agent2._graph.as_default():
            self._agent2_train = tf.train.GradientDescentOptimizer(learning_rate=self.LR).minimize(self._agent2.loss)
            
    def _dull_response_reward(self, decoder_token_p):
        result = []
        for tokens in decoder_token_p:
            n_tokens = tokens.shape[0] - 1
            dull_p = []
            
            for dull_seq in self._dull_set_embedded:
                if n_tokens > len(dull_seq):
                    dull_seq_p = tokens[range(len(dull_seq)), dull_seq]
                else:
                    dull_seq_p = tokens[range(n_tokens), dull_seq[:n_tokens]]

                dull_p.append(np.log(np.prod(dull_seq_p) + 1e-12) / len(dull_seq))

            result.append(-np.sum(dull_p) / len(self._dull_set_embedded))
            
        if np.any(np.isnan(result)):
            print('dull:', result)
        
        return np.asarray(result, dtype=np.float32)
    
    def _information_flow_reward(self, previous_state, state):
        if previous_state is None:
            return np.asarray([1.0]*state.shape[0])
        
        vec1_norm = np.sqrt(np.sum(np.square(previous_state), axis=-1))
        vec2_norm = np.sqrt(np.sum(np.square(state), axis=-1))
        
        result = -np.log(np.clip(np.dot(previous_state, state.T) / (vec1_norm * vec2_norm), 1e-12, np.inf))
        
        if np.any(np.isnan(result)):
            print('info:', result, vec1_norm, vec2_norm, np.sum(previous_state * state, axis=-1))
            
        return result

    def _num_unk_reward(self, decoder_token_p):
        token_ids = np.argmax(decoder_token_p, axis=-1)
        
        return token_ids[token_ids == 2].sum()

    def _reward(self, decoder_token_p, previous_state, state):
        dull_response_reward = self._dull_response_reward(decoder_token_p)
        information_flow_reward = self._information_flow_reward(previous_state, state)
        unk_reward = self._num_unk_reward(decoder_token_p)
        
        return 0.25*dull_response_reward + 0.25*information_flow_reward - 0.25*unk_reward
    
    def _append_agent_state(self, agent, inputs, previous_output, response, state, reward):
        self._state[agent]['inputs'].append(list(inputs))
        self._state[agent]['previous_outputs'].append(list(previous_output))
        self._state[agent]['responses'].append(list(response))
        self._state[agent]['rewards'].append(reward)
        self._state[agent]['previous_state'] = state
        
    def _parse_states(self, agent):
        previous_outputs = self._state[agent]['previous_outputs']
        previous_outputs_length = list(map(len, previous_outputs))
        inputs = self._state[agent]['inputs']
        inputs_length = list(map(len, inputs))
        responses = self._state[agent]['responses']
        responses_length = list(map(len, responses))
        
        return {
            'previous_outputs': pad_sequences(previous_outputs, max(previous_outputs_length)),
            'previous_outputs_length': previous_outputs_length,
            'inputs': pad_sequences(inputs, max(inputs_length)),
            'inputs_length': inputs_length,
            'responses': pad_sequences(responses, max(responses_length)),
            'responses_length': responses_length,
            'discounted_rewards': discount_rewards(self._state[agent]['rewards'])
        }   
    
    def reset(self):
        self._state = {
            agent: {
                'inputs': [],
                'previous_outputs': [],
                'responses': [],
                'rewards': [],
                'previous_state': None
            } for agent in ['agent_1', 'agent_2']
        }

    def simulate(self, inputs, display=True):
        if display:
            print('Initial:', decode(inputs[0]))
        
        ag1_output, token_p, state = self._agent1.run(inputs, [[0]])
        self._append_agent_state('agent_1', inputs[0], [0], ag1_output[0], state, self._reward(token_p, self._state['agent_1']['previous_state'], state)[0])
        if display:
            print('Agent 1:', decode(ag1_output[0]))
        
        ag2_output, token_p, state = self._agent2.run(ag1_output, inputs)
        self._append_agent_state('agent_2', ag1_output[0], inputs[0], ag2_output[0], state, self._reward(token_p, self._state['agent_2']['previous_state'], state)[0])
        if display:
            print('Agent 2:', decode(ag2_output[0]))
            print('----------------------------')
            
        for i in range(np.random.randint(5, 12)):
            ag1_previous_output = ag1_output
            ag1_output, token_p, state = self._agent1.run(ag2_output, ag1_previous_output)
            self._append_agent_state('agent_1', ag2_output[0], ag1_previous_output[0], ag1_output[0], state, self._reward(token_p, self._state['agent_1']['previous_state'], state)[0])
            if display:
                print('Agent 1:', decode(ag1_output[0]))
            
            ag2_previous_output = ag2_output
            ag2_output, token_p, state = self._agent2.run(ag1_output, ag2_output)
            self._append_agent_state('agent_2', ag1_output[0], ag2_previous_output[0], ag2_output[0], state, self._reward(token_p, self._state['agent_2']['previous_state'], state)[0])
            if display:
                print('Agent 2:', decode(ag2_output[0]))
                print('----------------------------')  
    
    def update(self):
        agent = self._agent1
        state = self._parse_states('agent_1')
    
        _, loss_val = agent._sess.run([self._agent1_train, self._agent1.loss], feed_dict = {
            agent.inputs: state['inputs'],
            agent.inputs_length: state['inputs_length'],
            agent.previous_outputs: state['previous_outputs'],
            agent.previous_outputs_length: state['previous_outputs_length'],
            agent.responses: state['responses'],
            agent.responses_length: state['responses_length'],
            agent.rewards: state['discounted_rewards']
        })
        
        print('Agent 1 - loss:', loss_val, 'total reward:', np.sum(state['discounted_rewards']))
        
        agent = self._agent2
        state = self._parse_states('agent_2')
    
        _, loss_val = agent._sess.run([self._agent2_train, self._agent2.loss], feed_dict = {
            agent.inputs: state['inputs'],
            agent.inputs_length: state['inputs_length'],
            agent.previous_outputs: state['previous_outputs'],
            agent.previous_outputs_length: state['previous_outputs_length'],
            agent.responses: state['responses'],
            agent.responses_length: state['responses_length'],
            agent.rewards: state['discounted_rewards']
        })
        
        print('Agent 2 - loss:', loss_val, 'total reward:', np.sum(state['discounted_rewards']))
        
trainer = A2CTrainer([len(embeddings), 300], './models/gcloud_simple_agent_15/model.ckpt-19')

Inference
Embeddings: [9858, 300]
INFO:tensorflow:Restoring parameters from ./models/gcloud_simple_agent_15/model.ckpt-19
Inference
Embeddings: [9858, 300]
INFO:tensorflow:Restoring parameters from ./models/gcloud_simple_agent_15/model.ckpt-19


In [419]:
for dialog in corpus_filtered:
    trainer.reset()
    trainer.simulate([embed(dialog['turns'][0])], display=False)
    trainer.update()

Agent 1 - loss: 1.19111 total reward: 50.9938213825
Agent 2 - loss: 1.74954 total reward: 44.6959036589
Agent 1 - loss: 0.932775 total reward: 24.4135916233
Agent 2 - loss: 1.49049 total reward: 24.332575202
Agent 1 - loss: 1.1643 total reward: 46.1533446312
Agent 2 - loss: 0.420521 total reward: 29.7255089283
Agent 1 - loss: 0.893503 total reward: 19.075879097
Agent 2 - loss: 1.29521 total reward: 21.700063467
Agent 1 - loss: 1.45319 total reward: 24.724006772
Agent 2 - loss: 0.224982 total reward: 12.8042414784
Agent 1 - loss: 1.6543 total reward: 19.9990317822
Agent 2 - loss: 1.00361 total reward: 14.936003089
Agent 1 - loss: 1.14114 total reward: 22.4859746695
Agent 2 - loss: 1.17767 total reward: 18.1365824938
Agent 1 - loss: 1.4884 total reward: 44.5918121338
Agent 2 - loss: 1.02417 total reward: 41.6990408897
Agent 1 - loss: 1.49253 total reward: 42.5035976171
Agent 2 - loss: -0.518873 total reward: 5.13156783581
Agent 1 - loss: 1.11955 total reward: 35.2150957584
Agent 2 - loss

KeyboardInterrupt: 

In [421]:
token_ids, _, _ = trainer._agent1.run([embed('What are you doing?')], [[0]])
decode(token_ids[0])

"i 'm not ."

In [363]:
token_ps

2.0641787

In [340]:
sess = tf.InteractiveSession()
ps = tf.constant([[[0.1, 1.1, 2.2, 3.3, 4.4]]*5]*3, tf.float32)

chosen_tokens = tf.constant([
    [1, 3, 0, 0, 0],
    [1, 2, 3, 0, 0],
    [2, 4, 1, 2, 0],
], tf.int32)

batch_size, n_tokens, _ = tf.unstack(tf.shape(ps))

token_indices = tf.concat([
    tf.tile(tf.reshape(tf.range(batch_size), [batch_size, 1, 1]), [1, n_tokens, 1]),
    tf.tile(tf.reshape(tf.range(n_tokens), [1, n_tokens, 1]), [batch_size, 1, 1]),
    tf.expand_dims(chosen_tokens, -1)
], -1)


p = tf.gather_nd(ps, token_indices)


sess.run(tf.sequence_mask([1, 2, 4], 4))

array([[ True, False, False, False],
       [ True,  True, False, False],
       [ True,  True,  True,  True]], dtype=bool)

In [303]:
sess.run(ps)

array([[[ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.]],

       [[ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.]],

       [[ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.],
        [ 0.,  1.,  2.,  3.,  4.]]], dtype=float32)

In [274]:
token_ps = np.argmax(token_ps, axis=-1)

In [282]:
for i, response in enumerate(trainer._state['agent_1']['responses']):
    print(i, np.all(token_ps[i][:len(response)] == response))

0 True
1 True
2 True
3 True
4 True
5 True


In [239]:
agent = 'agent_1'

for i in range(len(trainer._state[agent]['responses'])):
    print('Previous output:', decode(trainer._state[agent]['previous_outputs'][i]))
    print('Input:', decode(trainer._state[agent]['inputs'][i]))
    print('Response:', decode(trainer._state[agent]['responses'][i]))
    print('Reward:', trainer._state[agent]['rewards'][i])
    print()

Previous output: 
Input: how are you today
Response: fine .
Reward: 1.20768195391

Previous output: fine .
Input: no harm has n't missed his books .
Response: no . let 's stick up .
Reward: 3.23449

Previous output: no . let 's stick up .
Input: no .
Response: 
Reward: 0.133586

Previous output: 
Input: push her burnt station .
Response: they did n't even notice it .
Reward: 1.7796

Previous output: they did n't even notice it .
Input: come on .
Response: but you ca n't move out night , <UNK> <UNK> alone in the next day , did n't he ?
Reward: 2.54831

Previous output: but you ca n't move out night , <UNK> <UNK> alone in the next day , did n't he ?
Input: shoot .
Response: do n't probably stop one . it 's probably a good story .
Reward: 2.19818



In [190]:
discount_rewards(ag1_rewards, discount_factor=0.8)

array([ 9.35192207,  9.36028142,  7.65724176,  9.40456923,  9.53121285,
        8.7286303 ,  8.16306739,  7.44175142,  7.95393311,  8.05174573,
        6.67757938,  6.43976879,  6.93575635,  5.41345495,  4.03126113,
        2.78128719])

In [146]:
print(trainer._reward([embed('What would you like to do tomorrow?')]))
print(trainer._reward([embed('How about dinner?')]))

[ 1.56754887]
[ 2.33131266]


In [130]:
trainer._previous_state = np.asarray([[1, 1, 1, 1]])
trainer._information_flow_reward(np.asarray([[1, 1, 1, 1]]))

array([-0.])

In [121]:
np.log(1.)

0.0

In [52]:
p[[0, 1], [2, 8]]

array([  1.31403327e-01,   5.40749334e-10], dtype=float32)

In [97]:
def samples_iterator(data, batch_size=64, step = 2):
    def turns_iterator():
        for dialog in data:
            for turn_idx in range(0, len(dialog)-1, 2):
                yield dialog[turn_idx], dialog[turn_idx+1], [0] if turn_idx == 0 else dialog[turn_idx-1]
        
    batch_inputs, batch_targets, batch_previous_outputs = [], [], []
    batch_input_lengths, batch_target_lengths, batch_previous_output_lengths = [], [], []
    for (inputs, targets, previous_outputs) in turns_iterator():
        batch_inputs.append(inputs)
        batch_targets.append(targets)
        batch_previous_outputs.append(previous_outputs)
        batch_input_lengths.append(len(inputs))
        batch_target_lengths.append(len(targets))
        batch_previous_output_lengths.append(len(previous_outputs))
        
        if len(batch_inputs) >= batch_size:
            yield {
                'inputs': pad_sequences(batch_inputs, min(50, max(batch_input_lengths))),
                'inputs_length': np.clip(batch_input_lengths, 0, 50).tolist(),
                'previous_outputs': pad_sequences(batch_previous_outputs, min(50, max(batch_previous_output_lengths))),
                'previous_outputs_length': np.clip(batch_previous_output_lengths, 0, 50).tolist(),
                'targets': pad_sequences(batch_targets, min(50, max(batch_target_lengths))),
                'targets_length': np.clip(batch_target_lengths, 0, 50).tolist(),
            }
            
            batch_inputs, batch_targets, batch_previous_outputs = [], [], []
            batch_input_lengths, batch_target_lengths, batch_previous_output_lengths = [], [], []

a = 0
for idx, batch in enumerate(samples_iterator(train_data)):
    a += 1
    po_len = batch['previous_outputs_length'][1]
#     assert np.all(batch['targets'][0][:po_len] == batch['previous_outputs'][1][:po_len])
    
    if len(batch['targets']) > 64:
        print(len(batch['targets']))
        raise ValueError
#     break

print(a)

139


In [91]:
with tf.Graph().as_default():
    trainer = CrossEntropyTrainer([len(embeddings), 300])
    embeddings_ph, embeddings_init_op = trainer.embeddings_initializer()
    
    train_op = tf.train.AdamOptimizer().minimize(trainer.loss)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(embeddings_init_op, feed_dict={embeddings_ph: embeddings})
        
        for idx, batch in enumerate(samples_iterator(train_data)):
            _, loss_val = sess.run([train_op, trainer.loss], feed_dict = {
                trainer.inputs: batch['inputs'],
                trainer.inputs_length: batch['inputs_length'],
                trainer.previous_outputs: batch['previous_outputs'],
                trainer.previous_outputs_length: batch['previous_outputs_length'],
                trainer.targets: batch['targets'],
                trainer.targets_length: batch['targets_length']
            })
            
#             break
            
            print(loss_val)
            

Embeddings: [9858, 300]
9.19813
9.14735
9.02763
8.8638
8.67223


KeyboardInterrupt: 

In [148]:
chatbot_graph = tf.Graph()

with chatbot_graph.as_default():
    inputs_ph = tf.placeholder(tf.int32, [1, None])
    inputs_length_ph = tf.placeholder(tf.int32, [1])
    previous_outputs_ph = tf.placeholder(tf.int32, [1, None])
    previous_outputs_length_ph = tf.placeholder(tf.int32, [1])
    
    chatbot = ChatbotAgent(
        inputs_ph, inputs_length_ph, previous_outputs_ph, previous_outputs_length_ph,
        word_embeddings_shape=[len(embeddings), 300], train=False,
        decoder_sampling_p=0.0
    )
    
    saver = tf.train.Saver(max_to_keep=None)

Inference
Embeddings: [9858, 300]


In [5]:
import nltk

def embed(text):
    return [word_dict.get(token, 2) for token in nltk.word_tokenize(str(text).lower())]

def decode(token_ids):
    return ' '.join([word_index[token_id] for token_id in token_ids])

In [47]:
corpus_filtered[8]

{'movie_id': 'm0',
 'turns': ["Listen, I know you hate having to sit home because I'm not Susie High School.",
  'Like you care.',
  "I do care. But I'm a firm believer in doing something for your own reasons, not someone else ' s .",
  "I wish I had that luxury. I'm the only sophomore that got asked to the prom and I can't go, because you won ' t."]}

In [149]:
with tf.Session(graph=chatbot_graph) as sess:
    saver.restore(sess, './models/gcloud_simple_agent_15/model.ckpt-19')
    
    previous_output = [0]
    message = embed('Can we make this quick?')
    print(message)

    previous_output = [0]
    while True:
        message = embed(input('> '))
        response_token_ids = sess.run(chatbot.decoder_token_ids, feed_dict={
            inputs_ph: [message],
            inputs_length_ph: [len(message)],
            previous_outputs_ph: [previous_output],
            previous_outputs_length_ph: [len(previous_output)]
        })
        previous_output = response_token_ids[0]
        print(decode(response_token_ids[0]))

INFO:tensorflow:Restoring parameters from ./models/gcloud_simple_agent_15/model.ckpt-19
[44, 36, 110, 23, 994, 39]
> Can we make this quick?
got ta catch 'em , ray . <EOS>
> who?
fucker . <EOS>
> aha
pig days . <EOS>
> that suck
'cause he 's dead . <EOS>
> who killed him?
he 's a <UNK> . he 's in a room . he 's a <UNK> . he 's a <UNK> . he 's a <UNK> . he 's a <UNK> . he 's a <UNK> . he 's a <UNK> . he 's a <UNK> . he 's gon na be
> okay
<UNK> , too . <EOS>


KeyboardInterrupt: 

In [20]:
with tf.Graph().as_default():
    with tf.Session() as sess:
        trainer = CrossEntropyTrainer([len(embeddings), 300])
        
        seq_lens = tf.constant([1, 3, 7, 2, 10], tf.int32)
        data = tf.sequence_mask(seq_lens, 10, dtype=tf.int32) * 2
        
        seq_lens, data = sess.run([seq_lens, data])
        
        targets, loss_targets = sess.run([trainer._padded_targets, trainer.loss_targets], feed_dict={
            trainer.targets: data,
            trainer.targets_length: seq_lens
        })
        
        print(targets)
        print(loss_targets)

Embeddings: [9858, 300]
[[0 2 1 0 0 0 0 0 0 0 0 0]
 [0 2 2 2 1 0 0 0 0 0 0 0]
 [0 2 2 2 2 2 2 2 1 0 0 0]
 [0 2 2 1 0 0 0 0 0 0 0 0]
 [0 2 2 2 2 2 2 2 2 2 2 1]]
[[2 1 0 0 0 0 0 0 0 0 0 0]
 [2 2 2 1 0 0 0 0 0 0 0 0]
 [2 2 2 2 2 2 2 1 0 0 0 0]
 [2 2 1 0 0 0 0 0 0 0 0 0]
 [2 2 2 2 2 2 2 2 2 2 1 0]]


In [51]:
sess = tf.InteractiveSession()

In [55]:
seq_lens = tf.constant([0, 1, 0, 0, 1], tf.int32)

data = tf.ones([5, 5], dtype=tf.int32)

sess.run(tf.expand_dims(seq_lens, -1) * data)



array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1]], dtype=int32)