In [1]:
import numpy as np
import json

In [2]:
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib import seq2seq
from tensorflow.python.layers.core import Dense

In [3]:
import string

CHARACTERS = list(string.ascii_lowercase + string.digits + string.punctuation)

In [124]:
def temporal_rnn(cell, inputs, rates, sequence_length=None, initial_state=None, dtype=None, time_major=False, parallel_iterations=32, swap_memory=False):
    if not time_major:
        inputs = tf.transpose(inputs, [1,0,2])
    
    _, _, input_size = inputs.get_shape().as_list()
    max_time, batch_size, _ = tf.unstack(tf.shape(inputs))
    
    if sequence_length is None:
        sequence_length = tf.ones([batch_size], dtype=tf.int32) * max_time
    
    inputs_ta = tf.TensorArray(tf.float32, max_time).unstack(inputs)
    outputs_ta = tf.TensorArray(tf.float32, max_time)
    
    zero_output = tf.zeros([max_time, batch_size, cell.output_size], dtype=dtype)
    
    def _get_initial_state():
        if initial_state is None:
            assert dtype is not None
            return cell.zero_state(batch_size, dtype)
        return initial_state
    
    def _time_step(time, output_ta, state):
        input_t = inputs_ta.read(time)
        
        call_cell = lambda: cell(input_t, state)
        
        if sequence_length is None:
            output, new_state = call_cell()
        else:
            output, new_state = call_cell()
        
        output_ta.write(time, output)

        return time+1, output_ta, new_state
    
    state = _get_initial_state()
    time = tf.constant(0, dtype=tf.int32, name='time')
    
    _, outputs_ta, final_state = tf.while_loop(
        cond = lambda time, *_: time < max_time,
        body = _time_step,
        loop_vars = (time, outputs_ta, state),
        parallel_iterations = parallel_iterations,
        swap_memory = swap_memory
    )
    
    return outputs_ta.stack(), final_state

with tf.Graph().as_default():
    inputs_ph = tf.placeholder(tf.float32, [64, 12, 300])
    
    cell = rnn.MultiRNNCell([
        rnn.GRUCell(300),
        rnn.GRUCell(300),
        rnn.GRUCell(300),
    ])
    
    outputs, state = temporal_rnn(
        cell,
        inputs_ph,
        [4, 2, 1],
        time_major=True,
        dtype=tf.float32
    )
    
    print(outputs, state)

Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x109f49470>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
Tensor("TensorArrayStack/TensorArrayGatherV3:0", shape=(?, 12, 300), dtype=float32) (<tf.Tensor 'while/Exit_2:0' shape=(?, 300) dtype=float32>, <tf.Tensor 'while/Exit_3:0' shape=(?, 300) dtype=float32>, <tf.Tensor 'while/Exit_4:0' shape=(?, 300) dtype=float32>)


In [30]:
def add_pad_eos(indices, sequence_length, eos_token = 1, pre_pad = True):
    batch_size, max_length = tf.unstack(tf.shape(indices))
    
    pad = tf.zeros([batch_size, 1], dtype=tf.int32)
    if pre_pad:
        eos = tf.one_hot(sequence_length+1, max_length+2, dtype=tf.int32) * eos_token
        return tf.concat([pad, indices, pad], 1) + eos
    else:
        eos = tf.one_hot(sequence_length, max_length+2, dtype=tf.int32) * eos_token
        return tf.concat([indices, pad, pad], 1) + eos

class Agent(object):
    
    def __init__(self, previous_state, inputs, inputs_length, previous_output, previous_output_length, query_result_state, query_result_slots, query_result_values, query_result_slots_count, query_result_values_length, word_embeddings_shape, n_slots, n_actions, decoder_targets=None, decoder_targets_length=None, decoder_sampling_p=0.0, trainable_embeddings=True, hidden_size=300, dropout=0.0):
        super(Agent, self).__init__()
        
        # Inputs
        self._previous_state = previous_state
        self._inputs = inputs
        self._inputs_length = inputs_length
        self._previous_output = previous_output
        self._previous_output_length = previous_output_length
        
        # Query result
        self._query_result_state = query_result_state
        self._query_result_slots = query_result_slots
        self._query_result_values = query_result_values
        self._query_result_slots_count = query_result_slots_count
        self._query_result_values_length = query_result_values_length
            
        # Decoder targets (only for training)
        self._decoder_targets = decoder_targets
        self._decoder_targets_length = decoder_targets_length
        self._decoder_sampling_p = decoder_sampling_p
        
        # Conf
        self._hidden_size = int(hidden_size)
        self._word_embeddings_shape = list(word_embeddings_shape)
        self._n_slots = int(n_slots)
        self._n_actions = int(n_actions)
        self._n_query_states = 3
        self._trainable_embeddings = bool(trainable_embeddings)
        self._dropout = dropout
        tf.summary.scalar('dropout', self._dropout)

        print('Agent(hidden_size={0}, n_slots={1}, n_actions={2})'.format(self._hidden_size, self._n_slots, self._n_actions))
        
        # Build
        self._embeddings_module()
        self._input_encoder()
        self._slot_parser()
        self._query_result_encoder()
        self._policy()
        self._response_generator()
        
        self._saver_ops()
        
    @property
    def policy_state_size(self):
        return 2*self._hidden_size
        
    def _saver_ops(self):
        self.saver = tf.train.Saver(max_to_keep=None)
        
    def _embeddings_module(self):
        with tf.name_scope('embeddings'):
            self._word_embeddings = tf.Variable(tf.zeros(self._word_embeddings_shape), trainable=self._trainable_embeddings, name='word_embeddings')
            self._slot_embeddings = tf.Variable(tf.zeros([self._n_slots, self._word_embeddings_shape[1]]), trainable=True, name='slot_embeddings')
            
            self._inputs_embedded = tf.nn.embedding_lookup(self._word_embeddings, self._inputs)
            self._previous_output_embedded = tf.nn.embedding_lookup(self._word_embeddings, self._previous_output)
            
            self._query_result_slots_embedded = tf.nn.embedding_lookup(self._slot_embeddings, self._query_result_slots)
            self._query_result_values_embedded = tf.nn.embedding_lookup(self._word_embeddings, self._query_result_values)
            
    def _rnn_cell(self, size=None, activation=None, dropout=None, residual=False):
        cell = rnn.GRUCell((size or self._hidden_size), activation=activation)

        if residual:
            cell = rnn.ResidualWrapper(cell)

        if dropout is not None:
            cell = rnn.DropoutWrapper(cell, input_keep_prob=(1.0 - dropout))

        return cell
        
    def _text_encoder(self, inputs, inputs_length, scope='text_encoder', reuse=False):
        with tf.variable_scope(scope, reuse=reuse):
            _outputs, _state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = self._rnn_cell(activation=tf.nn.tanh),
                cell_bw = self._rnn_cell(activation=tf.nn.tanh),
                inputs = inputs,
                sequence_length = inputs_length,
                dtype = tf.float32
            )

        return (
            tf.concat(_outputs, -1),
            tf.concat(_state, -1)
        )
 
    def _input_encoder(self):
        with tf.name_scope('inputs_encoder'):            
            (self._inputs_encoder_outputs,
             self._inputs_encoder_state) = self._text_encoder(self._inputs_embedded, self._inputs_length)
            (self._previous_output_encoder_outputs,
             self._previous_output_encoder_state) = self._text_encoder(self._previous_output_embedded, self._previous_output_length, reuse=True)
            
    def _slot_parser(self):
        with tf.variable_scope('slot_parser'):
            # Project text encoder state
            e_inputs = tf.layers.dense(
                self._inputs_encoder_outputs,
                self._hidden_size,
                activation = tf.nn.tanh,
                name = 'text_encoder_outputs_projection'
            )
            e_previous_output = tf.layers.dense(
                self._previous_output_encoder_outputs,
                self._hidden_size,
                activation = tf.nn.tanh,
                name = 'text_encoder_outputs_projection',
                reuse = True
            )
            
            # Compare inputs with agent's previous output
            e = tf.matmul(e_inputs, e_previous_output, transpose_b=True, name='e')
            beta = tf.matmul(tf.nn.softmax(e), self._previous_output_encoder_outputs)
            
            inputs_compared = tf.layers.dense(
                tf.concat([self._inputs_encoder_outputs, beta], 2),
                self._hidden_size,
                activation = tf.nn.tanh
            )
            
            # Final slot logits/probabilities
            self._slot_logits = tf.layers.dense(
                tf.layers.dropout(inputs_compared, rate=self._dropout),
                self._n_slots
            )
            self.slot_probabilities = tf.nn.softmax(self._slot_logits)
            self.slot_ids = tf.argmax(self._slot_logits, -1)

            # Slot (any)
            state_proj = tf.layers.dense(
                tf.concat([self._inputs_encoder_state, self._previous_output_encoder_state], -1),
                self._hidden_size,
                activation = tf.nn.tanh
            )
            
            self._slot_any_logits = tf.layers.dense(
                tf.layers.dropout(state_proj, rate=self._dropout),
                self._n_slots
            )
            self.slot_any_probabilites = tf.sigmoid(self._slot_any_logits)
            self.slot_any = tf.greater(self.slot_any_probabilites, .5)
    
    def _query_result_encoder(self):
        with tf.name_scope('query_result_encoder'):
            batch_size, n_slots, n_tokens = tf.unstack(tf.shape(self._query_result_values))
    
            _, _value_encoder_state = self._text_encoder(
                inputs = tf.reshape(self._query_result_values_embedded, [-1, n_tokens, self._word_embeddings_shape[1]]),
                inputs_length = tf.reshape(self._query_result_values_length, [-1]),
                reuse = True
            )
        
            query_result_slot_value = tf.concat([
                self._query_result_slots_embedded,
                tf.reshape(_value_encoder_state, [batch_size, n_slots, 2*self._hidden_size])
            ], -1)
            
            _, self._query_result_encoder_state = tf.nn.dynamic_rnn(
                self._rnn_cell(activation=tf.nn.tanh, dropout=self._dropout),
                inputs = query_result_slot_value,
                sequence_length = self._query_result_slots_count,
                dtype = tf.float32
            )
        
    def _policy(self):
        with tf.name_scope('policy'):
            policy_context = tf.layers.dense(
                tf.concat([
                    self._inputs_encoder_state,
                    self._query_result_encoder_state,
                    tf.one_hot(self._query_result_state, self._n_query_states, dtype=tf.float32),
                ], -1),
                2*self.policy_state_size
            )

            policy_cell = rnn.MultiRNNCell([
                self._rnn_cell(self.policy_state_size, activation=tf.nn.tanh, dropout=self._dropout),
                self._rnn_cell(self.policy_state_size, activation=tf.nn.tanh, dropout=self._dropout),
                self._rnn_cell(self.policy_state_size, activation=tf.nn.tanh, dropout=self._dropout)
            ])

            self._policy_output, self.policy_state = policy_cell(
                policy_context,
                tuple([self._previous_state[i,:,:] for i in range(3)])
            )

            # Value
            value_l1 = tf.layers.dense(
                tf.layers.dropout(self._policy_output, rate=self._dropout),
                self._hidden_size,
                activation = tf.nn.tanh
            )
            self.value = tf.layers.dense(
                tf.layers.dropout(value_l1, rate=self._dropout),
                1
            )
            self.value = tf.squeeze(self.value, -1)

            # Action
            action_l1 = tf.layers.dense(
                tf.layers.dropout(self._policy_output, rate=self._dropout),
                self._hidden_size,
                activation = tf.nn.tanh
            )
            self._action_logits = tf.layers.dense(
                tf.layers.dropout(action_l1, rate=self._dropout),
                self._n_actions
            )
            self.action_probabilities = tf.nn.softmax(self._action_logits)
            self.action_ids = tf.argmax(self._action_logits, -1, output_type=tf.int32)
        
    def _response_generator(self):
        with tf.name_scope('response_generator'):
            batch_size, _ = tf.unstack(tf.shape(self._inputs))
            self._decoder_output_layer = Dense(self._word_embeddings_shape[0])
            
            if self._decoder_targets is not None:
                print('Training decoder helper.')
                
                decoder_targets_embedded = tf.nn.embedding_lookup(
                    self._word_embeddings,
                    add_pad_eos(self._decoder_targets, self._decoder_targets_length)
                )
                helper = seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs = decoder_targets_embedded,
                    sequence_length = (self._decoder_targets_length + 2),
                    embedding = self._word_embeddings,
                    sampling_probability = self._decoder_sampling_p
                )
                tf.summary.scalar('decoder_sampling_p', self._decoder_sampling_p)
                
            else:
                print('Inference decoder helper.')
                
                helper = seq2seq.GreedyEmbeddingHelper(
                    embedding = self._word_embeddings,
                    start_tokens = tf.tile([0], [batch_size]),
                    end_token = 1
                )
                
            decoder_cell, decoder_initial_state = self._decoder_cell()
            decoder = seq2seq.BasicDecoder(
                decoder_cell,
                helper = helper,
                initial_state = decoder_initial_state,
                output_layer = self._decoder_output_layer
            )
            
            decoder_outputs, _, _ = seq2seq.dynamic_decode(
                decoder = decoder,
                impute_finished = True
            )
            
            self._decoder_logits = decoder_outputs.rnn_output
            self.decoder_token_ids = tf.argmax(self._decoder_logits, -1, output_type=tf.int32)
            
    def _decoder_cell(self):
        batch_size, _ = tf.unstack(tf.shape(self._policy_output))

        attention = seq2seq.BahdanauAttention(
            num_units = 2*self._hidden_size,
            memory = self._inputs_encoder_outputs,
            memory_sequence_length = self._inputs_length
        )
        
        attentive_cell = seq2seq.AttentionWrapper(
            cell = self._rnn_cell(self.policy_state_size, activation=tf.nn.tanh),
            attention_mechanism = attention,
            attention_layer_size = 2*self._hidden_size,
            initial_cell_state = self._policy_output
        )

        cell = rnn.MultiRNNCell([
            attentive_cell,
            self._rnn_cell(self.policy_state_size, activation=tf.nn.tanh),
        ])

        initial_state = tuple([
            attentive_cell.zero_state(batch_size, tf.float32),
            self._policy_output
        ])

        return cell, initial_state


In [32]:
class Trainer(object):

    DILATATION_RATES = [4, 2, 1]
    
    def __init__(self, n_slots, n_actions, word_embeddings_shape, save_path, hidden_size=300, graph=None, batch_size=64):
        self._sess = tf.Session(graph=graph)
        self._save_path = save_path
        
        self._n_slots = n_slots
        self._n_actions = n_actions
        self._word_embeddings_shape = word_embeddings_shape
        self._hidden_size = hidden_size
        
        self._batch_size = batch_size
        
        self.reset()

    @property
    def states_memory_shape(self):
        return (4, 3, self._batch_size, 2*self._hidden_size)
    
    def _metrics_writers(self):
        print('Metrics path {0}/metrics/'.format(self._save_path))
        
        self._train_writer = tf.summary.FileWriter('{0}/metrics/train'.format(self._save_path), self._sess.graph)
        self._test_writer = tf.summary.FileWriter('{0}/metrics/test'.format(self._save_path), self._sess.graph)
        self._metrics_op = tf.summary.merge_all()
        
    def initialize_word_embeddings(self, embeddings):
        embeddings_ph = tf.placeholder(tf.float32, self._word_embeddings_shape)
        init_op = self.agent._word_embeddings.assign(embeddings_ph)
        
        return self._sess.run(init_op, feed_dict={embeddings_ph: embeddings})

    def save_checkpoint(self, step):
        print('Write checkpoint:', self.agent.saver.save(self._sess, '{0}/checkpoints/model.ckpt'.format(self._save_path), global_step=step))
    
    def reset(self):
        self._states_index = np.zeros(self._batch_size, dtype=np.int32)
        self._states_memory = np.zeros(self.states_memory_shape, dtype=np.float32)

        assert self._states_memory.shape == self.states_memory_shape
        print('Reset states:', self._states_memory.shape)

    def _reset_states(self, predicate):
        self._states_index[predicate] = 0
        self._states_memory[:,:,predicate] = np.zeros(self.states_memory_shape[3], dtype=np.float32)            

        assert self._states_memory.shape == self.states_memory_shape

    def _get_states(self):
        cell_states = []
        for cell_id, dilatation in enumerate(self.DILATATION_RATES):
            batch_states = []
            for batch_sample_id, location_get_id in enumerate(self._states_index % dilatation):
                batch_states.append(self._states_memory[location_get_id,cell_id,batch_sample_id,:])
            cell_states.append(batch_states)
        return np.asarray(cell_states)
        
    def _update_states(self, states):
        states = np.array(states, copy=True)

        for cell_id, dilatation in enumerate(self.DILATATION_RATES):
            location_update_id = (dilatation - 1) - self._states_index % dilatation
            self._states_memory[location_update_id,cell_id,:,:] = states[cell_id,:,:]

        self._states_index += 1
        assert self._states_memory.shape == self.states_memory_shape

class SupervisedTrainer(Trainer):
    
    def __init__(self, **kwargs):
        super(SupervisedTrainer, self).__init__(**kwargs)
        
        self.state = tf.placeholder(tf.float32, self.states_memory_shape[1:])
        self.inputs = tf.placeholder(tf.int32, [None, None])
        self.inputs_length = tf.placeholder(tf.int32, [None])
        self.previous_output = tf.placeholder(tf.int32, [None, None])
        self.previous_output_length = tf.placeholder(tf.int32, [None])

        self.query_result_state = tf.placeholder(tf.int32, [None])
        self.query_result_slots = tf.placeholder(tf.int32, [None, None])
        self.query_result_values = tf.placeholder(tf.int32, [None, None, None])
        self.query_result_slots_count = tf.placeholder(tf.int32, [None])
        self.query_result_values_length = tf.placeholder(tf.int32, [None, None])
        
        self.slot_targets = tf.placeholder(tf.int32, [None, None])
        self.slot_any_targets = tf.placeholder(tf.int32, [None, None])
        self.action_targets = tf.placeholder(tf.int32, [None])
        self.value_targets = tf.placeholder(tf.float32, [None])
        self.targets = tf.placeholder(tf.int32, [None, None])
        self.targets_length = tf.placeholder(tf.int32, [None])
        
        self._decoder_sampling_p = tf.placeholder(tf.float32, [])
        self._loss_mixture_weights = tf.placeholder(tf.float32, [None])
        self._dropout = tf.placeholder(tf.float32, [])
        
        self.agent = Agent(
            self.state, self.inputs, self.inputs_length, self.previous_output, self.previous_output_length, self.query_result_state,
            self.query_result_slots, self.query_result_values, self.query_result_slots_count, self.query_result_values_length,
            word_embeddings_shape = self._word_embeddings_shape,
            n_slots = self._n_slots, n_actions = self._n_actions,
            decoder_targets = self.targets, decoder_targets_length = self.targets_length, decoder_sampling_p = self._decoder_sampling_p,
            hidden_size = self._hidden_size, dropout=self._dropout
        )
        
        self._loss()
        self._optimizer()
        self._metrics_writers()
        print('Dilatation rates:', self.DILATATION_RATES)
        
    def _loss(self):
        is_speak_sample = tf.cast(tf.equal(self.action_targets, 1), tf.float32)
        padded_targets = add_pad_eos(self.targets, self.targets_length, pre_pad=False)
        
        # Decoder
        stepwise_ce = tf.nn.softmax_cross_entropy_with_logits(
            logits = self.agent._decoder_logits,
            labels = tf.one_hot(padded_targets, self._word_embeddings_shape[0])
        )
        stepwise_ce *= tf.sequence_mask(self.targets_length+2, dtype=tf.float32)
        
        self.decoder_loss = tf.reduce_sum((tf.reduce_sum(stepwise_ce, -1) / tf.cast(self.targets_length+2, tf.float32)) * is_speak_sample) / tf.reduce_sum(is_speak_sample)
        self.decoder_accuracy = tf.reduce_mean(tf.cast(tf.equal(padded_targets, self.agent.decoder_token_ids), tf.float32))
        
        # Slot parser
        slotwise_ce = tf.nn.softmax_cross_entropy_with_logits(
            logits = self.agent._slot_logits,
            labels = tf.one_hot(self.slot_targets, self._n_slots)
        )
        slotwise_ce *= tf.sequence_mask(self.inputs_length, dtype=tf.float32)
        
        self.slots_loss = tf.reduce_sum((tf.reduce_sum(slotwise_ce, -1) / tf.cast(self.inputs_length, tf.float32)) * is_speak_sample) / tf.reduce_sum(is_speak_sample)
        self.slots_accuracy = tf.reduce_mean(tf.cast(tf.equal(self.slot_targets, tf.argmax(self.agent.slot_probabilities, -1, output_type=tf.int32)), tf.float32))

        # Slot states
        slot_any_onehot_targets = tf.reduce_sum(tf.one_hot(self.slot_any_targets, self._n_slots), 1)
        slot_any_ce = tf.nn.sigmoid_cross_entropy_with_logits(
            logits = self.agent._slot_any_logits,
            labels = slot_any_onehot_targets
        )
        
        self.slot_any_loss = tf.reduce_mean(slot_any_ce)
        self.slot_any_accuracy = tf.reduce_mean(tf.cast(tf.equal(slot_any_onehot_targets, tf.cast(self.agent.slot_any, tf.float32)), tf.float32))
        
        # Policy
        action_ce = tf.nn.softmax_cross_entropy_with_logits(
            logits = self.agent._action_logits,
            labels = tf.one_hot(self.action_targets, self._n_actions)
        )

        self.action_loss = tf.reduce_mean(action_ce)
        self.action_accuracy = tf.reduce_mean(tf.cast(tf.equal(self.action_targets, self.agent.action_ids), tf.float32))

        # Value
        self.value_loss = tf.losses.mean_squared_error(
            labels = self.value_targets,
            predictions = self.agent.value
        )
        
        # Total Loss
        self.loss = tf.reduce_sum(tf.multiply(
            tf.stack([self.decoder_loss, self.slots_loss, self.slot_any_loss, self.action_loss, self.value_loss]),
            self._loss_mixture_weights
        ))
        
        # Metrics
        tf.summary.scalar('decoder_loss', self.decoder_loss)
        tf.summary.scalar('decoder_accuracy', self.decoder_accuracy)
        tf.summary.scalar('slots_loss', self.slots_loss)
        tf.summary.scalar('slots_accuracy', self.slots_accuracy)
        tf.summary.scalar('slot_any_loss', self.slot_any_loss)
        tf.summary.scalar('slot_any_accuracy', self.slot_any_accuracy)
        tf.summary.scalar('action_loss', self.action_loss)
        tf.summary.scalar('action_accuracy', self.action_accuracy)
        tf.summary.scalar('value_loss', self.value_loss)
        tf.summary.scalar('loss', self.loss)
        
    def _optimizer(self):
        self.train_op = tf.train.AdamOptimizer().minimize(self.loss)

    def _compute_decoder_sampling_p(self, e, z=0.125, k=0.1):
        return ((2*z) / (1 + np.exp(-k*e)) - z)
        
    def _feed_dict(self, e, batch, opts={}):
        fd = {
            self.state: self._get_states(),
            self.inputs: batch['inputs'],
            self.inputs_length: batch['inputs_length'],
            self.previous_output: batch['previous_output'],
            self.previous_output_length: batch['previous_output_length'],
            self.targets: batch['targets'],
            self.targets_length: batch['targets_length'],
            self.slot_targets: batch['slot_targets'],
            self.slot_any_targets: batch['slot_any_targets'],
            self.action_targets: batch['action_targets'],
            self.value_targets: batch['value_targets'],
            self.query_result_state: batch['query_result_state'],
            self.query_result_slots: batch['query_result']['slots'],
            self.query_result_values: batch['query_result']['values'],
            self.query_result_slots_count: batch['query_result']['slots_count'],
            self.query_result_values_length: batch['query_result']['values_length'],
            self._loss_mixture_weights: [1., 1., 1., 1., 1.],
            self._decoder_sampling_p: self._compute_decoder_sampling_p(e)
        }

        for opt, val in opts.items():
            fd[opt] = val

        return fd
        
    def train_batch(self, e, i, batch):
        self._reset_states(batch['reset_state'])
        
        _, new_states, metrics_val = self._sess.run(
            [self.train_op, self.agent.policy_state, self._metrics_op],
            feed_dict=self._feed_dict(e, batch, {self._dropout: 0.3})
        )

        if i % 20 == 0:
            self._train_writer.add_summary(metrics_val)
        self._update_states(new_states)

    def test_batch(self, e, i, batch):
        self._reset_states(batch['reset_state'])
        
        new_states, metrics_val = self._sess.run(
            [self.agent.policy_state, self._metrics_op],
            feed_dict=self._feed_dict(e, batch, {self._dropout: 0.0})
        )

        if i % 20 == 0:
            self._test_writer.add_summary(metrics_val)
        self._update_states(new_states)
        
with tf.Graph().as_default():
    SupervisedTrainer(n_slots=5, n_actions=4, word_embeddings_shape=[10000, 300], save_path='./mdata')

Reset states: (4, 3, 64, 600)
Agent(hidden_size=300, n_slots=5, n_actions=4)
Training decoder helper.
Metrics path ./mdata/metrics/
Dilatation rates: [4, 2, 1]


### Load data

In [172]:
word_dict = json.load(open('./data/frames_v2/word_dictionary.json', 'r'))
embeddings = np.asarray(json.load(open('./data/frames_v2/embeddings.json', 'r')))
slots_dict = json.load(open('./data/frames_v2/slots_dictionary.json', 'r'))
actions_dict = json.load(open('./data/frames_v2/actions_dictionary.json', 'r'))
action_frequencies = json.load(open('./data/frames_v2/action_frequencies.json'))

samples = json.load(open('./data/frames_v2/embedded_frames.json'))

In [28]:
action_weights = np.zeros(len(actions_dict))
for action, freq in action_frequencies.items():
    action_weights[actions_dict[action]] = 1. - freq
action_weights

array([ 0.        ,  0.33606191,  0.7744456 ,  0.9384581 ,  0.95103438])

In [132]:
samples[0][0]['slot_states']

[0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1]

#### Train

In [178]:
from jamesbot.utils.padding import pad_sequences
import random

def pad_complex(struct, max_keys_len, max_values_len, pad_value=0, shuffle=True):
    '''
    :param struct: Dictionary of arrays. 
    :returns: Vector of slot indices, matrix of value indices, number of slots and a vector of value lengths.
    '''
    if len(struct) == 0:
        return (
            np.zeros(shape=(max_keys_len), dtype=np.int32),
            np.zeros(shape=(max_keys_len, max_values_len), dtype=np.int32)
        )
    
    struct_items = list(struct.items())
    if shuffle == True:
        random.shuffle(struct_items)
        
    keys, values = [], []
    for (key, value) in struct_items:
        keys.append(key)
        values.append(value)
    if len(keys) < max_keys_len:
        keys += [pad_value]*(max_keys_len-len(keys))
        values += [[pad_value]]*(max_keys_len-len(values))
    
    return (
        np.array(keys).astype(int),
        pad_sequences(values, max_values_len)
    )

def pad_array_of_complex(structs):
    '''
    :param structs: An array of structs
    :returns: Padded keys, values, struct sizes and value lengths
    '''
    key_counts = [len(struct) for struct in structs]
    max_keys_count = np.clip(max(key_counts), 1, 15)
    
    value_lengths = [[len(value) for (_, value) in struct.items()] for struct in structs]
    max_values_length = np.clip(max([(max(lens) if len(lens) > 0 else 0) for lens in value_lengths]), 1, 20)
    
    if len(structs) == 0:
        return (
            np.zeros(shape=(1,max_keys_count), dtype=np.int32),
            np.zeros(shape=(1,max_keys_count,max_values_length), dtype=np.int32),
            np.zeros(shape=(1,), dtype=np.int32),
            np.zeros(shape=(1,max_keys_count), dtype=np.int32)
        )
    
    keys_padded, values_padded = [], []
    for struct in structs:
        keys, values = pad_complex(struct, max_keys_count, max_values_length)
        keys_padded.append(keys)
        values_padded.append(values)
    
    keys_padded = np.array(keys_padded, dtype=np.int32)
    values_padded = np.array(values_padded, dtype=np.int32)
    
    for i in range(len(structs)):
        if len(value_lengths[i]) < max_keys_count:
            value_lengths[i] += [0]*(max_keys_count-len(value_lengths[i]))
    
    return {
        'slots': keys_padded,
        'values': values_padded,
        'slots_count': np.array(key_counts, dtype=np.int32),
        'values_length': np.array(value_lengths, dtype=np.int32)
    }


class SamplesIterator(object):
    
    def __init__(self, samples, batch_size=64, max_sequence_len=50):
        self._samples = samples
        
        self._batch_size = batch_size
        self._max_sequence_len = max_sequence_len
        
    def _next_batch(self):
        for i in range(self._batch_size):
            dialog_id = self._dialog_indices[i]
            turn_id = self._turn_indices[i]
            
            if len(self._samples[dialog_id]) > turn_id:
                yield dict(self._samples[dialog_id][turn_id], reset_state=False)
                
                self._turn_indices[i] += 1
            else:
                if len(self._samples) == self._next_dialog_idx:
                    continue
                    
                yield dict(self._samples[self._next_dialog_idx][0], reset_state=True)
                
                self._dialog_indices[i] = self._next_dialog_idx
                self._turn_indices[i] = 0
                self._next_dialog_idx += 1
                
    def _reset(self):
        self._dialog_indices = list(range(self._batch_size))
        self._next_dialog_idx = self._batch_size
        self._turn_indices = [0]*self._batch_size
                
    def batches(self):
        self._reset()

        while True:
            reset_state = []
            inputs, inputs_length, slot_targets = [], [], []
            previous_output, previous_output_length = [], []
            targets, targets_length = [], []
            action_targets, query_result_states, query_results = [], [], []
            slot_any_targets, slot_any_target_lengths = [], []
            values = []
            
            for batch_sample in self._next_batch():
                reset_state.append(batch_sample['reset_state'])
                inputs.append(batch_sample['token_ids'])
                inputs_length.append(len(batch_sample['token_ids']))
                slot_targets.append(batch_sample['token_slot_ids'])
                previous_output.append(batch_sample['previous_response_delexicalized_token_ids'])
                previous_output_length.append(len(batch_sample['previous_response_delexicalized_token_ids']))
                targets.append(batch_sample['next_response_delexicalized_token_ids'])
                targets_length.append(len(batch_sample['next_response_delexicalized_token_ids']))
                action_targets.append(batch_sample['next_action']),
                query_result_states.append(batch_sample['query_state'])
                query_results.append(batch_sample['query_result'])
                values.append(batch_sample['was_booked'])
                slot_any_targets.append(batch_sample['slot_any'])
                slot_any_target_lengths.append(len(batch_sample['slot_any']))
            
            if len(inputs) != self._batch_size:
                return
            
            max_inputs_length = np.clip(max(inputs_length), 1, self._max_sequence_len)
            max_previous_output_length = np.clip(max(previous_output_length), 1, self._max_sequence_len)
            max_targets_length = np.clip(max(targets_length), 1, self._max_sequence_len)
            max_slot_any_length = max(slot_any_target_lengths)
            
            yield {
                'reset_state': reset_state,
                'inputs': pad_sequences(inputs, max_inputs_length),
                'inputs_length': np.clip(inputs_length, 1, self._max_sequence_len),
                'slot_targets': pad_sequences(slot_targets, max_inputs_length),
                'previous_output': pad_sequences(previous_output, max_previous_output_length),
                'previous_output_length': np.clip(previous_output_length, 1, self._max_sequence_len),
                'targets': pad_sequences(targets, max_targets_length),
                'targets_length': np.clip(targets_length, 1, self._max_sequence_len),
                'action_targets': action_targets,
                'query_result_state': query_result_states,
                'query_result': pad_array_of_complex(query_results),
                'value_targets': values,
                'slot_any_targets': pad_sequences(slot_any_targets, max_slot_any_length, -1)
            }

sess = tf.InteractiveSession()
iterator = SamplesIterator(samples)
for i, batch in enumerate(iterator.batches()):
    print(batch['slot_any_targets'])
    break
#     print(batch['targets'].shape)
#     print(max(batch['targets_length']))
    if max(batch['targets_length']) != batch['targets'].shape[1]:
        raise ValueError('Not match.')

[[-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [11  4]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [ 1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [ 2 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]
 [-1 -1]]


In [115]:
with tf.Graph().as_default():
    trainer = SupervisedTrainer(n_slots=len(slots_dict), n_actions=len(actions_dict), word_embeddings_shape=embeddings.shape, save_path='./data')
    iterator = SamplesIterator(samples)
    
    trainer._sess.run(tf.global_variables_initializer())
    trainer.initialize_word_embeddings(embeddings)
    
    for i, batch in enumerate(iterator.batches()):
        print(i, trainer.train_batch(i, batch))

Reset states: (64, 600)
Agent(hidden_size=300, n_slots=12, n_actions=10)
Training decoder helper.
Metrics path ./data/metrics/
0 6.59309
1 6.36613
2 5.94242
3 5.59887
4 5.61605
5 5.60507
6 5.56894
7 5.58158
8 5.53801
9 5.32534
10 5.47998
11 5.49389
12 5.46485
13 5.41271
14 5.36853
15 5.395
16 5.48433
17 5.38326
18 5.27748
19 5.29896
20 5.24432
21 5.41015
22 5.32368
23 5.13847
24 5.13333
25 5.24538
26 5.12053
27 5.14955
28 5.11167
29 5.10806
30 5.06245
31 5.01074
32 4.95219
33 5.04302
34 5.21646
35 5.24043
36 5.12876
37 5.15678
38 5.10059
39 5.21341
40 4.97453
41 5.15807
42 5.2777
43 5.23047
44 5.24202
45 5.15746
46 5.14154
47 5.10926
48 5.1259
49 5.15923
50 4.98816
51 4.96738
52 4.81295
53 4.80407
54 4.84855
55 4.90375
56 5.07379
57 5.21744
58 5.21676
59 5.10267
60 5.25777
61 5.21309
62 5.05522
63 5.03737
64 5.00446
65 5.0099
66 5.00137
67 4.9684
68 4.94365
69 5.0175
70 4.76601
71 5.07694
72 5.20457
73 5.16069
74 5.01017
75 4.89955
76 5.2051
77 5.1509
78 4.99855


KeyboardInterrupt: 

In [60]:
a = np.zeros((3, 12, 5))
z = np.ones((3, 12, 5)) * 5
b = np.zeros(12)

for _ in range(40):
    for i, d in enumerate([1, 2, 4]):
        pred = b % d == 0
        a[i,pred,:] += z[i,pred,:]
#         print(i, d, pred)
    b += 1
a

array([[[ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.],
        [ 200.,  200.,  200.,  200.,  200.]],

       [[ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        [ 100.,  100.,  100.,  100.,  100.],
        

In [84]:
dilatation = 4
a = ['a', 'b', 'c', 'd']

for d in range(12):
    update_id = (dilatation-1) - d % dilatation
    get_id = d % dilatation
    a[update_id] += a[update_id][-1]
    print('Step:', d, 'Update:', update_id, 'Get:', get_id, a[get_id])
    print()

a

Step: 0 Update: 3 Get: 0 a

Step: 1 Update: 2 Get: 1 b

Step: 2 Update: 1 Get: 2 cc

Step: 3 Update: 0 Get: 3 dd

Step: 4 Update: 3 Get: 0 aa

Step: 5 Update: 2 Get: 1 bb

Step: 6 Update: 1 Get: 2 ccc

Step: 7 Update: 0 Get: 3 ddd

Step: 8 Update: 3 Get: 0 aaa

Step: 9 Update: 2 Get: 1 bbb

Step: 10 Update: 1 Get: 2 cccc

Step: 11 Update: 0 Get: 3 dddd



['aaaa', 'bbbb', 'cccc', 'dddd']