In [1]:
# !pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl numpy matplotlib torchtext 

In [2]:
# Standard PyTorch imports
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy
from torch.autograd import Variable

# For plots
%matplotlib inline
import matplotlib.pyplot as plt


import tensorflow as tf

#!conda install torchtext spacy
# !python -m spacy download en
# !python -m spacy download de

from torchtext import data
from torchtext import datasets

import re
import spacy

spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

url = re.compile('(<url>.*</url>)')


def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(url.sub('@URL@', text))]


def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(url.sub('@URL@', text))]


# Testing IWSLT
DE = data.Field(tokenize=tokenize_de, init_token='<bos>', eos_token='<eos>', include_lengths=True)
EN = data.Field(tokenize=tokenize_en, init_token='<bos>', eos_token='<eos>', include_lengths=True)

train, val, test = datasets.IWSLT.splits(exts=('.de', '.en'), fields=(DE, EN))


train_it = data.Iterator(train, batch_size=4, sort_within_batch=True, train=True, repeat=False, shuffle=True)
MIN_WORD_FREQ = 10
MAX_NUM_WORDS = 1000
DE.build_vocab(train.src, min_freq=MIN_WORD_FREQ, max_size=MAX_NUM_WORDS)
EN.build_vocab(train.trg, min_freq=MIN_WORD_FREQ, max_size=MAX_NUM_WORDS)

DE.vocab.itos[:7]


[93m    Linking successful[0m
    /home/lee/anaconda3/lib/python3.6/site-packages/en_core_web_sm -->
    /home/lee/anaconda3/lib/python3.6/site-packages/spacy/data/en

    You can now load the model via spacy.load('en')


[93m    Linking successful[0m
    /home/lee/anaconda3/lib/python3.6/site-packages/de_core_news_sm -->
    /home/lee/anaconda3/lib/python3.6/site-packages/spacy/data/de

    You can now load the model via spacy.load('de')



['<unk>', '<pad>', '<bos>', '<eos>', ',', '.', 'die']

In [3]:
num_wds_input = len(DE.vocab.itos)
num_wds_output = len(EN.vocab.itos)

num_wds_input, num_wds_output


(1004, 1004)

In [4]:
# X = tf.ones((5, 4, 3))

# xflat = tf.reshape(X, (-1, 3))

# xflat

# xflat.shape[-1].value

# bs, length, ndims = [v.value for v in X.shape]


# bs

# q, k, v = [tf.layers.dense(X, 3) for _ in range(3)]

# q_expanded = tf.expand_dims(q, 1)
# k_expanded = tf.expand_dims(k, 2)

# q_expanded.shape, k_expanded.shape

# tf.reduce_sum(q_expanded * k_expanded, -1).shape

# s_raw = tf.reduce_sum(q_expanded * k_expanded, -1)

# s = tf.expand_dims(tf.nn.softmax(s_raw, 1), -1)

# v_expanded = tf.expand_dims(v, 1)

# v_expanded.shape

# s.shape

# a = tf.reduce_sum(v_expanded * s, 1)

# a.shape



# tf.reduce_max(a, (1, 2))



In [107]:
import pdb
tf.reset_default_graph()
from nn_utils import *
from tensorflow.contrib.layers import layer_norm
def masked_softmax(v, mask, dim=1):
    v_masked = v * mask
    v_max = tf.reshape(tf.reduce_max(v_masked, (1, 2)), (-1, 1, 1))
    v_stable = v_masked - v_max
    v_exp = tf.exp(v_stable)
    v_exp_masked = v_exp * mask
    v_exp_summed = tf.expand_dims(tf.reduce_sum(v_exp_masked, dim), dim)
    
    return v_exp_masked / v_exp_summed
    
    
class AttentionLayer:
    def __init__(self, X, mask, X_decode = None, decode_mask = None, ff_layer = True):
        #If X_decode is not none, this is the decoder module that takes in two embeddings
        #Otherwise, this is the standard self-attention layer
        bs, length, ndims = [v.value for v in X.shape]
        if X_decode is None:
            self.q, self.k, self.v = [tf.tanh(tf.layers.dense(X, ndims)) for _ in range(3)]
        else:
            self.k, self.v = [tf.tanh(tf.layers.dense(X, ndims)) for _ in range(2)]
            self.q = tf.tanh(tf.layers.dense(X_decode, ndims))
        #dimensions are batch, attn head, attn tail, emb
        self.q_expanded = tf.expand_dims(self.q, 1)
        self.k_expanded = tf.expand_dims(self.k, 2)
        self.s_raw = tf.reduce_sum(self.q_expanded * self.k_expanded, -1)
        enc_mask = tf.expand_dims(mask, 1)
        if decode_mask is None:
            dec_mask = tf.expand_dims(mask, 2)
        else:
            dec_mask = tf.expand_dims(decode_mask, 2)
        self.combined_mask = combined_mask = enc_mask * dec_mask
        #(4, 62, 62)
        self.s = masked_softmax(self.s_raw, combined_mask, dim=1)
        #(4, 1, 62, 20)
        self.v_expanded = tf.expand_dims(self.v, 1)
        self.a = tf.reduce_sum(self.v_expanded * tf.expand_dims(self.s, -1), 2)
        if X_decode is None:
            residual_identity = X
        else:
            residual_identity = X_decode
        self.e_raw = self.a + residual_identity
        self.e = layer_norm(self.e_raw)
        if ff_layer:
            self.e_tilde_raw = tf.layers.dense(X, X.shape[-1].value) + X
            self.e_tilde = layer_norm(self.e_tilde_raw)
            self.output = self.e_tilde
        else:
            self.output = self.e
class Encoder:
    def __init__(self, num_wds, wd_ind, mask, ndims = 20, n_layers = 2):
        self.wd_ind = wd_ind
        self.num_wds = num_wds
        self.mask = mask
        self.wd_emb = tf.Variable(
            tf.random_uniform([self.num_wds, ndims],minval = -1, maxval = 1.))
        self.length = tf.shape(self.wd_ind)[1]
        self.wd_vec = tf.nn.embedding_lookup(self.wd_emb, wd_ind)
        self.position = tf.reshape(tf.range(tf.cast(self.length, tf.float32), dtype=tf.float32)/10000, (1, -1, 1))
        
        # for debugging purposes
        self.encodings = []
        #self.attn_layers = []
        last_encoding = self.wd_vec + self.position
        for _ in range(n_layers):
            attn_layer = AttentionLayer(last_encoding, mask)
            last_encoding = attn_layer.output
            #last_encoding = LinearResNorm(attn_layer.e)
            #self.attn_layers.append(attn_layer)
            self.encodings.append(last_encoding)
        
        
class Decoder:
    def __init__(self, num_wds, wd_ind, mask, encoder, ndims = 20, n_layers = 2):
        self.encoder = encoder
        self.encodings = encoder.encodings
        self.wd_ind = wd_ind
        input_mask = encoder.mask
        self.num_wds = num_wds
        self.wd_ind = wd_ind
        self.length = tf.shape(self.wd_ind)[1]
        self.num_wds = num_wds
        self.wd_emb = tf.Variable(
            tf.random_uniform([self.num_wds, ndims],minval = -1, maxval = 1.))
        self.wd_vec = tf.nn.embedding_lookup(self.wd_emb, wd_ind)
        self.position = tf.reshape(tf.range(tf.cast(self.length, tf.float32), dtype=tf.float32)/10000, (1, -1, 1))
        self.first_encoding = last_encoding = self.wd_vec + self.position
        self.self_attentions = []
        self.second_attentions = []
        self.dec_encodings = [last_encoding]
        for idx in range(n_layers):
            encodings = self.encodings[idx]
            self_attention = AttentionLayer(last_encoding, mask, ff_layer = False)
            second_attention = AttentionLayer(encodings, input_mask, X_decode = self_attention.output,
                                             decode_mask = mask)
            last_encoding = second_attention.output
            self.dec_encodings.append(last_encoding)
            self.self_attentions.append(self_attention)
            self.second_attentions.append(second_attention)
            #linear_res_norm = LinearResNorm(attn_layer.e)
            
        self.presoftmax_output = tf.layers.dense(last_encoding, num_wds)
        #self.output = tf.nn.softmax(self.presoftmax_output)
        self.output = masked_softmax(self.presoftmax_output, mask, dim=1)



class Transformer:
    def __init__(self, num_wds):
        self.num_wds = num_wds
        self.learning_rate = tf.placeholder(tf.float32, ())
        self.wd_ind_src = wd_ind_src = tf.placeholder(tf.int32, (None, None))
        self.wd_ind_trg = wd_ind_trg = tf.placeholder(tf.int32, (None, None))
        self.input_lengths = tf.placeholder(tf.int32, [None])
        self.output_lengths = tf.placeholder(tf.int32, [None])
        self.input_mask = tf.sequence_mask(
            self.input_lengths, maxlen = tf.shape(self.wd_ind_src)[-1], dtype = tf.float32)
        self.output_mask = tf.sequence_mask(
            self.output_lengths, maxlen = tf.shape(self.wd_ind_trg)[-1], dtype = tf.float32)
        self.encoder = Encoder(num_wds, wd_ind_src, self.input_mask)
        self.decoder = Decoder(num_wds, wd_ind_trg, self.output_mask, self.encoder)
        self.presoftmax_output = self.decoder.presoftmax_output
        self.output = self.decoder.output
        opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels = self.wd_ind_trg, logits = self.presoftmax_output)
        self.optimizer, self.grad_norm_total = apply_clipped_optimizer(opt, self.loss)

In [108]:
transformer = Transformer(num_wds_input)
trn_feed_dict = {transformer.wd_ind_src : src_tensor, transformer.input_lengths : src_len,
                    transformer.wd_ind_trg : trg_tensor, transformer.output_lengths : trg_len,
                    transformer.learning_rate : 1e-2}

In [109]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [124]:
sess.run(transformer.decoder.second_attentions[0].s, trn_feed_dict).shape

InvalidArgumentError: Incompatible shapes: [4,30,33] vs. [4,33,30]
	 [[Node: mul_17 = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Sum_9, mul_16)]]

Caused by op 'mul_17', defined at:
  File "/home/lee/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/lee/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-108-2cf50e2e92a5>", line 1, in <module>
    transformer = Transformer(num_wds_input)
  File "<ipython-input-107-b1710d7f4bf2>", line 124, in __init__
    self.decoder = Decoder(num_wds, wd_ind_trg, self.output_mask, self.encoder)
  File "<ipython-input-107-b1710d7f4bf2>", line 98, in __init__
    decode_mask = mask)
  File "<ipython-input-107-b1710d7f4bf2>", line 37, in __init__
    self.s = masked_softmax(self.s_raw, combined_mask, dim=1)
  File "<ipython-input-107-b1710d7f4bf2>", line 6, in masked_softmax
    v_masked = v * mask
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 850, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1094, in _mul_dispatch
    return gen_math_ops.mul(x, y, name=name)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 4936, in mul
    "Mul", x=x, y=y, name=name)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [4,30,33] vs. [4,33,30]
	 [[Node: mul_17 = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Sum_9, mul_16)]]


In [123]:
sess.run(transformer.decoder.second_attentions[0].v_expanded, trn_feed_dict).shape

(4, 1, 30, 20)

In [122]:
sess.run(transformer.decoder.self_attentions[0].output, trn_feed_dict).shape

(4, 33, 20)

In [82]:
[b.shape for b in blah]

[(4, 62, 20), (4, 59, 20), (4, 59, 20)]

In [20]:
transformer.presoftmax_output.shape

TensorShape([Dimension(None), Dimension(None), Dimension(1004)])

In [21]:
trg_tensor.shape

(36, 4)

In [30]:

src_tensor  = train_batch.src[0].data.cpu().numpy()
src_len = train_batch.src[1].cpu().numpy()
trg_tensor  = train_batch.trg[0].data.cpu().numpy()
trg_len = train_batch.trg[1].cpu().numpy()

In [43]:
a, b = sess.run([transformer.wd_ind_trg, transformer.presoftmax_output], trn_feed_dict)

In [119]:
src_tensor.shape, trg_tensor.shape

((4, 30), (4, 33))

In [45]:
a.shape, b.shape

((4, 62), (4, 59, 1004))

In [42]:
src_tensor.shape, src_len.shape, trg_tensor.shape, trg_len.shape

((4, 59), (4,), (4, 62), (4,))

In [47]:
trg_tensor.shape

(4, 62)

In [114]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for train_batch in train_it:
    src_tensor  = train_batch.src[0].data.cpu().numpy().transpose()
    src_len = train_batch.src[1].cpu().numpy()
    trg_tensor  = train_batch.trg[0].data.cpu().numpy().transpose()
    trg_len = train_batch.trg[1].cpu().numpy()
#     print(src_tensor.shape, src_len.shape, trg_tensor.shape, trg_len.shape)
#     print(src_tensor, src_len, trg_tensor, trg_len)
    trn_feed_dict = {transformer.wd_ind_src : src_tensor, transformer.input_lengths : src_len,
                    transformer.wd_ind_trg : trg_tensor, transformer.output_lengths : trg_len,
                    transformer.learning_rate : 1e-2}
    _, loss = sess.run([transformer.optimizer, transformer.loss], trn_feed_dict)
    print(loss)


InvalidArgumentError: assertion failed: [] [Condition x == y did not hold element-wise:] [x (SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [4 33] [y (SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [4 30]
	 [[Node: SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert = Assert[T=[DT_STRING, DT_STRING, DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](SparseSoftmaxCrossEntropyWithLogits/assert_equal/All, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_0, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_1, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_2, SparseSoftmaxCrossEntropyWithLogits/Shape_1, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_4, SparseSoftmaxCrossEntropyWithLogits/strided_slice)]]

Caused by op 'SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert', defined at:
  File "/home/lee/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/lee/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/lee/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/lee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-108-2cf50e2e92a5>", line 1, in <module>
    transformer = Transformer(num_wds_input)
  File "<ipython-input-107-b1710d7f4bf2>", line 129, in __init__
    labels = self.wd_ind_trg, logits = self.presoftmax_output)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 2076, in sparse_softmax_cross_entropy_with_logits
    array_ops.shape(logits)[:-1]))
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/check_ops.py", line 382, in assert_equal
    return control_flow_ops.Assert(condition, data, summarize=summarize)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py", line 118, in wrapped
    return _add_should_use_warning(fn(*args, **kwargs))
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 149, in Assert
    return gen_logging_ops._assert(condition, data, summarize, name="Assert")
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 51, in _assert
    name=name)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/lee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): assertion failed: [] [Condition x == y did not hold element-wise:] [x (SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [4 33] [y (SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [4 30]
	 [[Node: SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert = Assert[T=[DT_STRING, DT_STRING, DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](SparseSoftmaxCrossEntropyWithLogits/assert_equal/All, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_0, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_1, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_2, SparseSoftmaxCrossEntropyWithLogits/Shape_1, SparseSoftmaxCrossEntropyWithLogits/assert_equal/Assert/Assert/data_4, SparseSoftmaxCrossEntropyWithLogits/strided_slice)]]


In [None]:
train_batch

In [None]:
train_batch.src[0].data

In [None]:
from torchtext.datasets import WMT14

In [None]:
WMT14('data/', ('.en', '.de'))

In [None]:
# Load words from IWSLT

#!pip install torchtext spacy
#!python -m spacy download en
#!python -m spacy download de
from torchtext import data, datasets
import spacy
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

BOS_WORD = '<s>'
EOS_WORD = '</s>'
BLANK_WORD = "<blank>"
SRC = data.Field(tokenize=tokenize_de, pad_token=BLANK_WORD)
TGT = data.Field(tokenize=tokenize_en, init_token = BOS_WORD, 
                 eos_token = EOS_WORD, pad_token=BLANK_WORD)

MAX_LEN = 100
train, val, test = datasets.IWSLT.splits(exts=('.de', '.en'), fields=(SRC, TGT), 
                                         filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and 
                                         len(vars(x)['trg']) <= MAX_LEN)
MIN_FREQ = 1
SRC.build_vocab(train.src, min_freq=MIN_FREQ)
TGT.build_vocab(train.trg, min_freq=MIN_FREQ)

In [None]:
dir(train)

In [None]:
!pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl numpy matplotlib spacy torchtext seaborn 