In [38]:
import tensorflow as tf
import numpy as np

In [39]:
def masked_softmax(values, lengths, time_major=False, mask_value=-np.inf):
    with tf.name_scope('MaskedSoftmax'):
        if time_major:
            mask = tf.expand_dims(tf.transpose(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32)), -1)
        else:
            mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)

        inf_mask = (1 - mask) * mask_value
        inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)

        return tf.nn.softmax(tf.multiply(values, mask) + inf_mask, 0 if time_major else -1)

class InputPipeline(object):
    
    def __init__(self, filenames, batch_size=32, n_epochs=50, capacity=1e4):
        self._filenames = filenames
        self._batch_size = batch_size
        self._n_epochs = n_epochs
        self._capacity = capacity
        
        print('InputPipeline - batch_size:', self._batch_size, 'n_epochs:', self._n_epochs, 'capacity:', self._capacity)
    

    def _queue_reader(self):
        with tf.device('/cpu:0'):
            file_queue = tf.train.string_input_producer(self._filenames, num_epochs = self._n_epochs, capacity=self._capacity)

            reader = tf.TFRecordReader()
            _, serialized_example = reader.read(file_queue)

            return tf.parse_single_sequence_example(
                serialized_example,
                context_features = {
                    'input_seq': tf.VarLenFeature(tf.int64),
                    'target_seq': tf.VarLenFeature(tf.int64),
                    'input_seq_len': tf.FixedLenFeature([], tf.int64),
                    'target_seq_len': tf.FixedLenFeature([], tf.int64),
                    'history_size': tf.FixedLenFeature([], tf.int64),
                    'history_seq_len': tf.VarLenFeature(tf.int64),
                },
                sequence_features = {
                    'history': tf.VarLenFeature(tf.int64)
                }
            )
      

    def inputs(self):
        with tf.device('/cpu:0'):
            context_features, sequence_features = self._queue_reader()
            
            _, (input_seq, output_seq, input_seq_len, output_seq_len, history, history_size, history_seq_len) = tf.contrib.training.bucket_by_sequence_length(
                tf.reduce_mean([tf.cast(context_features['input_seq_len'], tf.int32), tf.cast(context_features['target_seq_len'], tf.int32)]),
                [
                    context_features['input_seq'],
                    context_features['target_seq'],
                    context_features['input_seq_len'],
                    context_features['target_seq_len'],
                    sequence_features['history'],
                    context_features['history_size'],
                    context_features['history_seq_len']
                ],
                batch_size = self._batch_size,
                bucket_boundaries=[5, 10, 15, 20, 30, 40, 50],
                num_threads=8,
                capacity = self._capacity,
                allow_smaller_final_batch = True,
                dynamic_pad=True
            )
            
            return (
                tf.transpose(tf.cast(tf.sparse_tensor_to_dense(input_seq), tf.int32)),
                tf.transpose(tf.cast(tf.sparse_tensor_to_dense(output_seq), tf.int32)),
                tf.transpose(tf.cast(tf.sparse_tensor_to_dense(history), tf.int32), [1,0,2]),
                tf.cast(input_seq_len, tf.int32),
                tf.cast(output_seq_len, tf.int32),
                tf.cast(history_size, tf.int32),
                tf.cast(tf.sparse_tensor_to_dense(history_seq_len), tf.int32),
            )
        
class DenseLayer(object):
    
    def __init__(self, input_size, output_size, activation=None, name='DenseLayer'):
        self.input_size = input_size
        self.output_size = output_size
        self._activation = activation
        
        with tf.name_scope(name):
            self._W = tf.Variable(tf.truncated_normal([self.input_size, self.output_size], mean=0, stddev=0.1), dtype=tf.float32, name='W')
            self._b = tf.Variable(tf.zeros([self.output_size]), dtype=tf.float32, name='b')
            
            tf.summary.histogram('weights', self._W)
            tf.summary.histogram('bias', self._b)
    
    def __call__(self, inputs):
        linear = tf.add(tf.matmul(inputs, self._W), self._b)
        
        if self._activation is not None:
            return self._activation(linear)
        
        return linear

In [40]:
# Attention mechanisms

class _Attention(object):
    
    def __init__(self, memory, memory_len=None):
        '''
        Abstract attention mechanism class.
        
        :param memory: Memory tensor to query. Shape: [num_steps x batch_size x embedding_size]
        :param memory_len: Optional memory tensor length. Shape: [batch_size]
        '''
        
        assert len(memory.get_shape()) == 3, 'Memory must have rank 3. [num_steps x batch_size x embedding_size]'
        assert memory.get_shape()[-1] is not None, 'Last dimension of memory can not be None'
        
        self._memory = memory
        self._memory_len = memory_len
        self._memory_width = int(self._memory.get_shape()[-1])
        
    def __call__(self, query):
        raise NotImplementedError
    
    def _align_query_with_memory(self, query):
        num_steps, _, _ = tf.unstack(tf.shape(self._memory))
        
        return tf.concat([
            tf.tile(tf.expand_dims(query, 0), [num_steps,1,1]),
            self._memory
        ], 2)

        
class BahdanauAttention(_Attention):
    
    def __init__(self, memory, mask_value=-np.inf, **kwargs):
        super(BahdanauAttention, self).__init__(memory, **kwargs)
        self._mask_value = mask_value
        
        with tf.name_scope('BahdanauAttention'):
            self._attention_W = tf.Variable(tf.truncated_normal([self._memory_width*2, 1], mean=0, stddev=0.1), dtype=tf.float32, name='weights')
            self._attention_b = tf.Variable(tf.zeros([1]), dtype=tf.float32, name='bias')
            
            tf.summary.histogram('attention_w', self._attention_W)
            tf.summary.histogram('attention_b', self._attention_b)
    
    def _attention_weights(self, query):
        num_steps, batch_size, _ = tf.unstack(tf.shape(self._memory))
        
        aligned = tf.reshape(self._align_query_with_memory(query), [-1, self._memory_width*2])
        weights = tf.reshape(tf.add(tf.matmul(aligned, self._attention_W), self._attention_b), [num_steps, batch_size, 1])
        
        if self._memory_len is not None:
            return masked_softmax(weights, self._memory_len, time_major=True, mask_value=self._mask_value)
    
        return tf.nn.softmax(weights, 0)
            
    def __call__(self, query):
        assert query.get_shape()[-1] == self._memory_width, 'Last dimension of query must have size %d' % (self._memory_width)
        
        weights = self._attention_weights(query)
        
        return tf.reduce_sum(tf.multiply(weights, self._memory), 0)
    

class GatedAttention(_Attention):
    
    def __init__(self, memory, residual=False, **kwargs):
        super(GatedAttention, self).__init__(memory, **kwargs)
        self._residual = residual
        
        with tf.name_scope('GatedAttention'):
            self._attention_W = tf.Variable(tf.truncated_normal([self._memory_width*2, self._memory_width], mean=0, stddev=0.1), dtype=tf.float32, name='weights')
            self._attention_b = tf.Variable(tf.zeros([self._memory_width]), dtype=tf.float32, name='bias')
            
            tf.summary.histogram('attention_w', self._attention_W)
            tf.summary.histogram('attention_b', self._attention_b)
            
    def _attention_weights(self, query):
        num_steps, batch_size, _ = tf.unstack(tf.shape(self._memory))
        
        aligned = tf.reshape(self._align_query_with_memory(query), [-1, self._memory_width*2])
        weights = tf.reshape(tf.add(tf.matmul(aligned, self._attention_W), self._attention_b), [num_steps, batch_size, self._memory_width])
        
        return tf.sigmoid(weights)
        
    
    def __call__(self, query):
        assert query.get_shape()[-1] == self._memory_width, 'Last dimension of query must have size %d' % (self._memory_width)
        
        weights = self._attention_weights(query)
        c = tf.reduce_sum(tf.multiply(weights, self._memory), 0)
        
        if self._residual:
            c = tf.add(query, c)
        
        return c

In [53]:
class AttentionWrapper(tf.contrib.rnn.RNNCell):
    
    def __init__(self, cell, attention):
        assert isinstance(attention, _Attention), 'Param :attention should be an _Attention class instance'
        
        with tf.name_scope('AttentionWrapper'):
            self._cell = cell
            self._attention = attention
            self._candidate_state = DenseLayer(cell.state_size*3, cell.state_size, activation=tf.tanh, name='candidate_state')
    
    @property
    def state_size(self):
        return self._cell.state_size

    @property
    def output_size(self):
        return self._cell.output_size
    
    def zero_state(self, batch_size, dtype):
        with tf.name_scope(type(self).__name__+'ZeroState', values=[batch_size]):
            return self._cell.zero_state(batch_size, dtype)
    
    def __call__(self, inputs, state, scope=None):
        new_output, new_state = self._cell(inputs, state, scope=scope)
        
        context = self._attention(new_state)
        attended_state = self._candidate_state(tf.concat([new_state, new_output, context], 1))
        
        return (new_output, attended_state)
    

class MultiAttentionWrapper(tf.contrib.rnn.RNNCell):
    
    def __init__(self, cell, *attentions):
        assert all([isinstance(attention, _Attention) for attention in attentions]), 'Param :attentions should be an _Attention class instance'
        
        with tf.name_scope('MultiAttentionWraper'):
            self._cell = cell
            self._attentions = attentions
            self._candidate_state = DenseLayer(cell.state_size*(2+len(self._attentions)), cell.state_size, activation=tf.tanh, name='candidate_state')
            
    @property
    def state_size(self):
        return self._cell.state_size

    @property
    def output_size(self):
        return self._cell.output_size
    
    def zero_state(self, batch_size, dtype):
        with tf.name_scope(type(self).__name__+'ZeroState', values=[batch_size]):
            return self._cell.zero_state(batch_size, dtype)
        
    def __call__(self, inputs, state, scope=None):
        new_output, new_state = self._cell(inputs, state, scope=scope)
        
        contexts = tf.concat([attention(new_state) for attention in self._attentions], 1)
        attended_state = self._candidate_state(tf.concat([new_state, new_output, contexts], 1))
        
        return (new_output, attended_state)

In [6]:
class Decoder(object):
    
    def __init__(self, cell, initial_state, encoder_outputs, embeddings, targets=None, targets_length=None, feed_previous=False):
        '''
        :param cell: Tensorflow RNNCell instance
        :param initial_state: Initial cell state (encoder final state)
        :param decoder_targets: Decoder true outputs. If this param is passed true targets will be passed as
        cell input.
        '''
        self._cell = cell
        self._initial_state = initial_state
        self._encoder_outputs = encoder_outputs
        self._embeddings = embeddings
        self._targets = targets
        self._targets_length = targets_length
        self._feed_previous = feed_previous
    
    def __call__(self, encoder_state, encoder_outputs):
        pass

In [54]:
class Seq2Seq(object):
    
    def __init__(self, state_size, embeddings_shape, feed_previous=False):
        self._state_size = state_size
        self._embeddings_shape = embeddings_shape
        self._feed_previous = feed_previous
        
        print('Seq2Seq - stacked:')
        print('State size:', self._state_size)
        print('Embeddings shape:', self._embeddings_shape)
        print('Feed previous:', self._feed_previous)
        
    def graph(self, input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len):
        '''
        :param input_seq: Input sequence tensor
        :param target_seq: Target sequence tensor
        :param input_seq_len: Input sequence length tensor
        :param target_seq_len: Target sequence length tensor
        :param history: Sequence history tensor
        :param history_size: Tensor representing number of sequences in history.
        :param history_seq_len: History sequences length
        '''
        self._input_seq = input_seq
        self._target_seq = target_seq
        self._input_seq_len = input_seq_len
        self._target_seq_len = target_seq_len
        # History
        self._history = history
        self._history_size = history_size
        self._history_seq_len = history_seq_len
        
        # Build
        self._initialize_embeddings()
        self._context_encoder()
        self._encoder()
        self._context_attention()
        self._decoder()
        
        return self._loss()
    
    def _initialize_embeddings(self):
        self._embeddings = tf.Variable(tf.random_uniform(self._embeddings_shape, -0.5, 0.5), dtype=tf.float32, name='embeddings')
        
        with tf.device('/cpu:0'):
            _, batch_size = tf.unstack(tf.shape(self._input_seq))
            
            self._input_seq_embedded = tf.nn.embedding_lookup(self._embeddings, self._input_seq)
            self._history_embbedded = tf.nn.embedding_lookup(self._embeddings, self._history)
            self._pad = tf.nn.embedding_lookup(self._embeddings, tf.zeros([batch_size], dtype=tf.int32))
            self._eos = tf.nn.embedding_lookup(self._embeddings, tf.ones([batch_size], dtype=tf.int32))
    
    def _context_encoder(self):
        # _context shape: [num_sentences x batch_size x embedding_size]
        with tf.variable_scope('context_encoder'):
            n_messages, batch_size, num_steps, _ = tf.unstack(tf.shape(self._history_embbedded))
            history_flattened = tf.transpose(tf.reshape(self._history_embbedded, [-1, num_steps, self._embeddings_shape[1]]), [1,0,2])
            
            fw_cell = tf.contrib.rnn.GRUCell(self._state_size)
            bw_cell = tf.contrib.rnn.GRUCell(self._state_size)
            
            _, encoder_state = tf.nn.bidirectional_dynamic_rnn(
                fw_cell, bw_cell,
                history_flattened,
                sequence_length=tf.reshape(self._history_seq_len, [-1]),
                dtype=tf.float32,
                time_major=True
            )
            
            # Project [2*state_size] -> [state_size]
            projected_state = tf.layers.dense(tf.concat(encoder_state, 1), self._state_size, activation=tf.tanh, name='context_encoder_state_projection')
            
            self._context = tf.reshape(projected_state, [n_messages, batch_size, self._state_size])
            tf.summary.histogram('context', self._context)
        
    def _encoder(self):
        with tf.variable_scope('encoder'):
            with tf.name_scope('bi-directional'):
                fw_cell = tf.contrib.rnn.GRUCell(self._state_size)
                bw_cell = tf.contrib.rnn.GRUCell(self._state_size)

                bi_encoder_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                    fw_cell, bw_cell,
                    self._input_seq_embedded,
                    sequence_length = self._input_seq_len,
                    dtype=tf.float32,
                    time_major=True
                )
                
                bi_encoder_outputs = tf.concat(bi_encoder_outputs, 2)
            
            with tf.name_scope('uni-directional'):
                cell = tf.contrib.rnn.MultiRNNCell([
                    tf.contrib.rnn.GRUCell(self._state_size),
                    tf.contrib.rnn.GRUCell(self._state_size),
                ], state_is_tuple=True)
                
                uni_encoder_outputs, uni_encoder_state = tf.nn.dynamic_rnn(
                    cell,
                    bi_encoder_outputs,
                    sequence_length = self._input_seq_len,
                    dtype = tf.float32,
                    time_major = True
                )
                
                self._encoder_outputs = uni_encoder_outputs
                self._encoder_state = uni_encoder_state[-1]
            
            tf.summary.histogram('encoder_outputs', self._encoder_outputs)
            tf.summary.histogram('encoder_state', self._encoder_state)
    
    def _context_attention(self):
        with tf.name_scope('context_attention'):
            context_attention = BahdanauAttention(self._context, memory_len=self._history_size, mask_value=1e-18)

            self._encoder_state_with_context = context_attention(self._encoder_state)
        
    def _initialize_decoder_params(self):
        self._output_projection_layer = DenseLayer(self._state_size, self._embeddings_shape[0], name='output_projection')
        
        # Prepare targets tensor array if feed_previous=False
        if not self._feed_previous:
            with tf.device('/cpu:0'):
                _target_seq_embedded = tf.nn.embedding_lookup(self._embeddings, self._target_seq)
        
            _targets_ta = tf.TensorArray(dtype=tf.float32, size=tf.reduce_max(self._target_seq_len))
            self._targets_ta = _targets_ta.unstack(_target_seq_embedded)
    
    def _decoder(self):  
        with tf.variable_scope('decoder'):
            self._initialize_decoder_params()
            
            attention_cell = MultiAttentionWrapper(
                tf.contrib.rnn.GRUCell(self._state_size),
                BahdanauAttention(self._encoder_outputs, memory_len=self._input_seq_len),
                BahdanauAttention(self._context, memory_len=self._history_size, mask_value=1e-18)
            )

            cell = tf.contrib.rnn.MultiRNNCell([
                # Only first cell has attention
                attention_cell,
                # Other cells
                tf.contrib.rnn.GRUCell(self._state_size),
                tf.contrib.rnn.GRUCell(self._state_size)
            ], state_is_tuple=True)

            decoder_outputs_ta, _, _ = tf.nn.raw_rnn(cell, self._decoder_loop_fn)
            decoder_outputs = decoder_outputs_ta.stack()
            
            tf.summary.histogram('decoder_outputs', decoder_outputs)

            num_steps, batch_size, decoder_output_size = tf.unstack(tf.shape(decoder_outputs))
            self._decoder_logits = tf.reshape(
                self._output_projection_layer(tf.reshape(decoder_outputs, [-1, decoder_output_size])),
                [num_steps, batch_size, self._embeddings_shape[0]]
            )

            self.decoder_embedding_ids = tf.cast(tf.argmax(self._decoder_logits, 2), tf.int32)
    
    def _decoder_loop_fn(self, time, previous_output, previous_state, previous_loop_state):
        is_finished = tf.greater_equal(time, self._target_seq_len)
            
        if previous_state is None:
            # Initial state
            return (is_finished, self._eos, tuple([self._encoder_state_with_context]*3), None, None)
        
        def _next_input():
            if not self._feed_previous:
                return self._targets_ta.read(time - 1)
            
            embedding_ids = tf.argmax(self._output_projection_layer(previous_output), 1)
            with tf.device('/cpu:0'):
                return tf.nn.embedding_lookup(self._embeddings, embedding_ids)
        
        next_input = tf.cond(tf.reduce_all(is_finished), lambda: self._pad, _next_input)
        
        return (is_finished, next_input, previous_state, previous_output, None)
    
    def _loss(self):
        stepwise_ce = tf.nn.softmax_cross_entropy_with_logits(
            labels = tf.one_hot(self._target_seq, self._embeddings_shape[0]),
            logits = self._decoder_logits
        )
        
        self.loss = tf.reduce_mean(stepwise_ce)
        tf.summary.scalar('loss', self.loss)
        
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self._target_seq, self.decoder_embedding_ids), tf.float32))
        tf.summary.scalar('accuracy', self.accuracy)
        
        return self.loss


with tf.Graph().as_default() as graph:
    input_seq = tf.placeholder(tf.int32, [None, None])
    target_seq = tf.placeholder(tf.int32, [50, 32])
    input_seq_len = tf.placeholder(tf.int32, [None])
    target_seq_len = tf.placeholder(tf.int32, [32])
    history = tf.placeholder(tf.int32, [None, None, None])
    history_size = tf.placeholder(tf.int32, [None])
    history_seq_len = tf.placeholder(tf.int32, [None, None])

    model = Seq2Seq(500, [7800,300])
    model.graph(input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len)
    print('Done')

Seq2Seq - stacked:
State size: 500
Embeddings shape: [7800, 300]
Feed previous: False
Done


In [48]:
with tf.Graph().as_default() as graph:
    model = Seq2Seq(150, [15,50])
    queue = InputPipeline(['./data/test_number_copy.tfrecords'], batch_size = 64, n_epochs = 50)
    
    input_seq, target_seq, history, input_seq_len, target_seq_len, history_size, history_seq_len = queue.inputs()
    model.graph(input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len)
    
    global_step = tf.Variable(0, trainable=False, name='global_step')
    train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(model.loss, global_step=global_step)

    
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        
        while not coord.should_stop():
            _, l, a, s, targets, generated = sess.run([train_op, model.loss, model.accuracy, global_step, model._target_seq, model.decoder_embedding_ids])

            print('Loss:', l, 'Accuracy:', a, 'Step:', s)
            if s % 100 == 0:
                for i in range(3):
                    print("GT:", targets.T[i])
                    print("Predicted:", generated.T[i])
                    print('------')
        
        coord.join(threads)

Seq2Seq - stacked:
State size: 150
Embeddings shape: [15, 50]
Feed previous: False
InputPipeline - batch_size: 64 n_epochs: 50 capacity: 10000.0
Loss: 2.71297 Accuracy: 0.178125 Step: 1
Loss: 2.59408 Accuracy: 0.15625 Step: 2
Loss: 2.48051 Accuracy: 0.2375 Step: 3
Loss: 2.42493 Accuracy: 0.174107 Step: 4
Loss: 2.3006 Accuracy: 0.240625 Step: 5
Loss: 2.35138 Accuracy: 0.174107 Step: 6
Loss: 2.15192 Accuracy: 0.271875 Step: 7
Loss: 2.20017 Accuracy: 0.200893 Step: 8
Loss: 2.15671 Accuracy: 0.221875 Step: 9
Loss: 2.11477 Accuracy: 0.229911 Step: 10
Loss: 2.14174 Accuracy: 0.203125 Step: 11
Loss: 2.08963 Accuracy: 0.272321 Step: 12
Loss: 2.06514 Accuracy: 0.359375 Step: 13
Loss: 2.12444 Accuracy: 0.28125 Step: 14
Loss: 2.02451 Accuracy: 0.359375 Step: 15
Loss: 2.10033 Accuracy: 0.294643 Step: 16
Loss: 2.0054 Accuracy: 0.384375 Step: 17
Loss: 2.0999 Accuracy: 0.287946 Step: 18
Loss: 2.04236 Accuracy: 0.3625 Step: 19
Loss: 2.06664 Accuracy: 0.274554 Step: 20
Loss: 2.06098 Accuracy: 0.35 Step

KeyboardInterrupt: 

In [135]:
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)

input_seq = tf.placeholder(tf.int32, [None, None])
target_seq = tf.placeholder(tf.int32, [None, None])
input_seq_len = tf.placeholder(tf.int32, [None])
target_seq_len = tf.placeholder(tf.int32, [None])
history = tf.placeholder(tf.int32, [None, None, None])
history_size = tf.placeholder(tf.int32, [None])
history_seq_len = tf.placeholder(tf.int32, [None])


model = Seq2Seq(500, [7800,300])
model.graph(input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len)
tf.train.Saver().restore(sess, './models/gcloud_seq2seq_v5/6/model.ckpt-83266')

INFO:tensorflow:Restoring parameters from ./models/gcloud_seq2seq_v5/6/model.ckpt-83266


NotFoundError: Key decoder/candidate_state/W not found in checkpoint
	 [[Node: save/RestoreV2_12 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2_12/tensor_names, save/RestoreV2_12/shape_and_slices)]]

Caused by op 'save/RestoreV2_12', defined at:
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2827, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-135-ef9ffcb1523c>", line 15, in <module>
    tf.train.Saver().restore(sess, './models/gcloud_seq2seq_v5/6/model.ckpt-83266')
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1056, in __init__
    self.build()
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1086, in build
    restore_sequentially=self._restore_sequentially)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 691, in build
    restore_sequentially, reshape)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 407, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 247, in restore_op
    [spec.tensor.dtype])[0])
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 669, in restore_v2
    dtypes=dtypes, name=name)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/marekgalovic/.virtualenvs/data/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

NotFoundError (see above for traceback): Key decoder/candidate_state/W not found in checkpoint
	 [[Node: save/RestoreV2_12 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2_12/tensor_names, save/RestoreV2_12/shape_and_slices)]]


In [86]:
import json
import nltk
word_index = json.load(open('./data/frames/word_index.json'))
dictionary = {}

for word, idx in word_index.items():
    dictionary[idx] = word

def parse_message(message):
    tokens = nltk.word_tokenize(message)
    
    return np.array([word_index[token] if token in word_index else word_index['<UNK>'] for token in tokens])

def pad_sequences(values, length, add_eos=False):
    result = []
    for row in values:
        row_value = list(row)
        if add_eos:
            row_value.append(1)
        if len(row_value) < length:
            row_value.extend([0] * (length - len(row_value)))
        if len(row_value) > length:
            if add_eos:
                row_value = row_value[:length-1] + [1]
            else:
                row_value = row_value[:length]
        result.append(row_value)
    return np.array(result)

def conversation_input(message, history_messages):
    history = [parse_message(m) for m in history_messages]
    history_size = [len(history)]
    history = np.array([pad_sequences(history, max(map(len, history)))]).swapaxes(0,1)
    message = np.array([parse_message(message)]).T
    
    return message, [message.shape[0]], history, history_size

def response(message, history_messages):
    message, message_len, history, history_size = conversation_input(message, history_messages)
    
    output, output_seq_len = sess.run([model.decoder_embedding_ids, model._target_seq_len], feed_dict={
        model._input_seq: message,
        model._input_seq_len: message_len,
        model._target_seq_len: np.array(message_len) + 10,
        model._history: history,
        model._history_size: history_size
    })
    
    print(output_seq_len)
    
    return ' '.join([dictionary[i] for i in output.reshape(-1)])

In [125]:
response('business', ['Hi', 'Hello', 'How much does it cost to fly to bahamas?', 'Economy class or business?'])

RuntimeError: Attempted to use a closed Session.