In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import time
import collections

np.set_printoptions(precision=2, linewidth=200)

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import seaborn as sns

In [3]:
from utils.reader import europarl_raw_data

In [4]:
from utils.attn_graph import AttentionModel

In [5]:
def show_dict_contents(d):
    for k, v in d.items():
        for k2, v2 in v.items():
            print('{0: <30}: type={1: <30}{2}{3}'.format(
                '{0}.{1}'.format(k, k2),
                str(type(v2)),
                ' shape={0}'.format(v2.shape) if isinstance(v2, np.ndarray) else '',
                ' len={0}, contents type={1}'.format(
                    len(v2),
                    type(v2[0])
                ) if isinstance(v2, list) else '',
            ))

In [6]:
def unvectorize_sentence(sentence, idx2word):
    return ' '.join([idx2word[i] 
                     for i in sentence
                     if idx2word[i] != '<bos>' and idx2word[i] != '<eos>' and idx2word[i] != '<pad>'
                    ])

In [7]:
def sample_group(data, group_num, num_samples=10):
    samples = np.random.choice(len(data['train']['X'][group_num]), size=num_samples)
    for sample in samples:
        print(unvectorize_sentence(data['train']['X'][group_num][sample], data['vocab']['lang1_idx2word']))
        print(unvectorize_sentence(data['train']['y'][group_num][sample], data['vocab']['lang2_idx2word']))
        print()

In [9]:
data = europarl_raw_data()
show_dict_contents(data)
print(
    [(x.shape, y.shape) for x, y in sorted(zip(data['train']['X'], data['train']['y']), key=lambda t: t[0].shape[1]) if x.shape[1] > 2]
)

vocab.lang1_idx2word          : type=<class 'list'>                 len=93800, contents type=<class 'str'>
vocab.lang1_word2idx          : type=<class 'dict'>                
vocab.lang2_idx2word          : type=<class 'list'>                 len=44248, contents type=<class 'str'>
vocab.lang2_word2idx          : type=<class 'dict'>                
train.X                       : type=<class 'list'>                 len=4, contents type=<class 'numpy.ndarray'>
train.y                       : type=<class 'list'>                 len=4, contents type=<class 'numpy.ndarray'>
val.X                         : type=<class 'numpy.ndarray'>        shape=(160000, 604)
val.y                         : type=<class 'list'>                 len=160000, contents type=<class 'str'>
test.X                        : type=<class 'numpy.ndarray'>        shape=(160209, 640)
test.y                        : type=<class 'list'>                 len=160209, contents type=<class 'str'>
[((63624, 8), (63624, 32)), ((29

In [10]:
sample_group(data, 0, num_samples=5)

Das ist das Problem .
Therein lies the problem .

Mitglied der Kommission .
Member of the Commission .

( Beifall )
( Applause )

Europa leidet an knappen Kassen .
It is budgetary resources that Europe sorely lacks .

Die Aussprache ist geschlossen .
The debate is closed .



In [11]:
data['train']['X'][1].shape

(297434, 16)

In [24]:
tf.reset_default_graph()
model = AttentionModel(data['vocab']['lang1_idx2word'], data['vocab']['lang2_idx2word'], 64, 128)
all_graphs = model.make_all_graphs(16, data['train']['X'], data['train']['y'])
eval_graph = model.make_eval_graph(16, 30, 32, data['vocab']['lang2_word2idx']['<bos>'])
writer = tf.summary.FileWriter(logdir='logs', graph=tf.get_default_graph())
writer.flush()

In [27]:
all_graphs[1]['inputs_and_outputs']['placeholders']['inputs']

<tf.Tensor 'placeholders_len14/inputs:0' shape=(16, 14) dtype=int32>

In [15]:
show_dict_contents(eval_graph)

placeholders.inputs           : type=<class 'tensorflow.python.framework.ops.Tensor'>
outputs.outputs               : type=<class 'tensorflow.python.framework.ops.Tensor'>
outputs.attention_weights     : type=<class 'tensorflow.python.framework.ops.Tensor'>


In [16]:
data['train']['X'][3].shape

(241295, 32)

In [17]:
np.set_printoptions(precision=2, linewidth=250)

In [19]:
sns.set(font_scale=1.2)
sns.set_style({'savefig.dpi': 100})
def plot_attention_map(eval_input, eval_output_tokens, eval_output_weights, correct_output, lang1_idx2word, lang2_idx2word):
    print('Input:', unvectorize_sentence(eval_input, lang1_idx2word))
    print('Model output:', unvectorize_sentence(eval_output_tokens, lang2_idx2word))
    print('Correct output:', correct_output)
    ax = sns.heatmap(eval_output_weights, linewidths=0.1)
    ax.xaxis.tick_top()
    ax.invert_yaxis()
    input_words = [lang1_idx2word[i] for i in eval_input]
    output_words = [lang2_idx2word[i] for i in eval_output_tokens]
    ax.set_xticklabels(input_words)
    ax.set_yticklabels(output_words)
    plt.xticks(rotation=90)
    fig = ax.get_figure()
    fig.set_size_inches(12, 12)
    plt.show()   


In [25]:
INITIAL_LR=5e0
MAX_NORM=1
with tf.Session() as sess:
    run_id = time.time()
    logdir = 'logs/{0}'.format(run_id)
    print('run_id:', run_id)
    writer = tf.summary.FileWriter(logdir, sess.graph)
    coord = tf.train.Coordinator()
    sess.run(tf.global_variables_initializer())
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    learning_rate = INITIAL_LR
    max_norm = MAX_NORM
    training_outputs = {
        **all_graphs[1]['inputs_and_outputs']['outputs'],
        **all_graphs[1]['inputs_and_outputs']['train_ops'],
    }
    num_batches = (data['train']['X'][1].shape[0] // 100) // 16
    print('num_batches: ', num_batches)
    for i in range(num_batches * 50):
        for j in range(100):
            start_idx = ((i % num_batches) * 100 + j) * 16
            end_idx = ((i % num_batches) * 100 + j+1) * 16
            inputs = data['train']['X'][1][start_idx:end_idx, 1:-1]
            targets = data['train']['y'][1][start_idx:end_idx]
            outputs = sess.run(
                training_outputs,
                feed_dict={
                    all_graphs[1]['inputs_and_outputs']['placeholders']['inputs']: inputs,
                    all_graphs[1]['inputs_and_outputs']['placeholders']['targets']: targets,
                    all_graphs[1]['inputs_and_outputs']['placeholders']['learning_rate']: learning_rate, # * (10.0 / (10.0 + np.sqrt(i))),
                    all_graphs[1]['inputs_and_outputs']['placeholders']['max_norm']: max_norm,
                }
            )
            writer.add_summary(outputs['summary'])
        model.save(sess, '{0}/model.ckpt'.format(logdir), global_step=i)

        
        
        

    # Bookkeeping        
    writer.close()
    coord.request_stop()
    coord.join(threads)
    
commented_out = """
        samples = np.random.choice(len(data['val']['X']), size=16)
        eval_inputs = data['val']['X'][samples][:,1:31]
        correct_outputs = [data['val']['y'][sample] for sample in samples]
#         samples = np.arange(16)
#         eval_inputs = data['train']['X'][3][samples][:,1:31]
#         correct_outputs = [
#             unvectorize_sentence(data['train']['y'][3][sample], data['vocab']['lang2_idx2word'])
#             for sample in samples
#         ]
        eval_outputs = sess.run(
            eval_graph['outputs'],
            feed_dict={
                eval_graph['placeholders']['inputs']: eval_inputs,
            }
        )
        print('-' * 40)
        print('i = {0}'.format(i))
        print(outputs)
        for sample_idx, sample in enumerate(samples):
            plot_attention_map(eval_inputs[sample_idx], eval_outputs['outputs'][sample_idx], eval_outputs['attention_weights'][sample_idx], correct_outputs[sample_idx], data['vocab']['lang1_idx2word'], data['vocab']['lang2_idx2word'])
#             print(unvectorize_sentence(eval_inputs[sample_idx], data['vocab']['lang1_idx2word']))
#             print(correct_outputs[sample_idx])
#             print(unvectorize_sentence(eval_outputs['outputs'][sample_idx], data['vocab']['lang2_idx2word']))
#             print(eval_outputs['attention_weights'][sample_idx])
#             print()
        print('-' * 40)
"""

run_id: 1518575718.5619578


InvalidArgumentError: You must feed a value for placeholder tensor 'placeholders_len6/inputs' with dtype int32 and shape [16,6]
	 [[Node: placeholders_len6/inputs = Placeholder[dtype=DT_INT32, shape=[16,6], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'placeholders_len6/inputs', defined at:
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-24-b2e06807e2af>", line 3, in <module>
    all_graphs = model.make_all_graphs(16, data['train']['X'], data['train']['y'])
  File "/Users/jsamet/nmt-attention-tensorflow/utils/attn_graph.py", line 588, in make_all_graphs
    y.shape[1],
  File "/Users/jsamet/nmt-attention-tensorflow/utils/attn_graph.py", line 318, in make_training_graph
    name='inputs',
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1548, in placeholder
    return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2094, in _placeholder
    name=name)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/jsamet/anaconda2/envs/py3k/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'placeholders_len6/inputs' with dtype int32 and shape [16,6]
	 [[Node: placeholders_len6/inputs = Placeholder[dtype=DT_INT32, shape=[16,6], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
