## Based on the followings:
* http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
* http://adventuresinmachinelearning.com/keras-lstm-tutorial/
* https://machinelearningmastery.com/configure-encoder-decoder-model-neural-machine-translation/
* https://machinelearningmastery.com/develop-encoder-decoder-model-sequence-sequence-prediction-keras/
* https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
* https://github.com/farizrahman4u/seq2seq
* https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html

## TODO
* ~~look into categorical representation~~
* ~~look into the number of missing words over the total~~
* look into different training data generators (e.g. simple sentence2sentence)
* look into different models (attention, hierachical, etc.)
* look into character-level representation

In [1]:
import os
import re
import numpy as np
import glob
# import seq2seq

from string import punctuation
from itertools import islice
from nltk import corpus, stem
from gensim.models import KeyedVectors
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Masking, Embedding
from keras.preprocessing import sequence
from keras.utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
DATA_PATH = 'data'
OUTPUT_PATH = 'output'

punct = set(punctuation)

In [3]:
w2v = KeyedVectors.load_word2vec_format(os.path.join(DATA_PATH, 'GoogleNews-vectors-negative300.bin'), binary=True)

In [4]:
def expand_contractions(text, w2v):
    cont = Contractions(w2v_model=w2v)
    return cont.expand_texts(text=text, precise=True)

In [5]:
vocab_dim = w2v.vector_size
eos_vector = np.ones((vocab_dim))
unk_vector = np.zeros((vocab_dim))

def preprocess(text):
    text = re.sub(repl=' ', string=text, pattern='-')
    return re.sub(repl='', string=text, pattern='[{}\n\t\\\\]'.format(''.join(punctuation)))

def build_vocabulary(file_list, w2v):
    idx2word = set() # only count unique words
    missing_words = set()
    for file in file_list:
        print('build_vocabulary: processing [{}]'.format(file))
        with open(file, 'r', encoding='utf-8') as f:
            for i,line in enumerate(f):
                line = preprocess(line)
                if len(line) == 0:
                    print('Line {} is empty. Skipping it.'.format(i+1))
                    continue
                
                for word in line.split(' '):
                    # skip words without embeddings. They'll be assigned the <UNK> token
                    if len(word) > 0:
                        if word in w2v:
                            idx2word.add(word)
                        else:
                            missing_words.add(word)
                        
    missing_words = sorted(list(missing_words))
    idx2word = sorted(list(idx2word))
    idx2word.insert(0, '<EOS>')
    idx2word.insert(1, '<UNK>')
    word2idx = {w:i for i,w in enumerate(idx2word)}
    # skip EOS and UNK when looking up word embeddings
    word2embeddings = {**{'<EOS>': eos_vector, '<UNK>': unk_vector}, **{w:w2v[w] for w in idx2word[2:]}}
    return idx2word, word2idx, word2embeddings, missing_words

def prepare_data(file_list, word2idx):
    vocab_size = len(word2idx)    
    data = []
    for file in file_list:
        print('prepare_data: processing [{}]'.format(file))
        with open(file, 'r', encoding='utf-8') as f:
            file_data = []
            for i,line in enumerate(f):
                line = preprocess(line)
                if len(line) == 0:
                    print('Line {} is empty. Skipping it.'.format(i+1))
                    continue
                # return the integer representation of the sentence
                file_data.append([word2idx[w] if w in word2idx else word2idx['<UNK>'] for w in line.split(' ')])
        data.append(file_data)
    return data
                                 
def get_embedding_matrix(word2embeddings):
    embedding_dim = len(list(word2embeddings.values())[0])
    embedding_matrix = np.zeros(shape=(len(word2embeddings), embedding_dim))
    for i, w in enumerate(word2embeddings):
        embedding_matrix[i] = word2embeddings[w]
    return embedding_matrix
                                
def prepare_input(input_text, word2embeddings):
    return [word2embeddings[word] if word in word2embeddings else unk_vector for word in preprocess(input_text).split(' ') if len(word) > 0]

In [6]:
file_list = glob.glob('data/parsed/*.txt')
idx2word, word2idx, word2embeddings, missing_words = build_vocabulary(file_list, w2v)
data = prepare_data(file_list, word2idx)
vocab_size = len(idx2word)

build_vocabulary: processing [data/parsed\parsed-12heads.txt]
build_vocabulary: processing [data/parsed\parsed-1893.txt]
build_vocabulary: processing [data/parsed\parsed-20160221-thesueno-utf8.txt]
build_vocabulary: processing [data/parsed\parsed-20160221-thesueno.txt]
build_vocabulary: processing [data/parsed\parsed-20160327-unrealcity-lifeonmars.txt]
build_vocabulary: processing [data/parsed\parsed-3card-deadmanshill-2016Ap24.txt]
build_vocabulary: processing [data/parsed\parsed-69krakatoa.txt]
build_vocabulary: processing [data/parsed\parsed-905-shrapnel.txt]
build_vocabulary: processing [data/parsed\parsed-abno.txt]
build_vocabulary: processing [data/parsed\parsed-acg-crossbow.txt]
build_vocabulary: processing [data/parsed\parsed-acitw.txt]
Line 171 is empty. Skipping it.
build_vocabulary: processing [data/parsed\parsed-actofmurder.txt]
build_vocabulary: processing [data/parsed\parsed-adverbum.txt]
build_vocabulary: processing [data/parsed\parsed-afdfr.txt]
build_vocabulary: proces

build_vocabulary: processing [data/parsed\parsed-orevore.txt]
build_vocabulary: processing [data/parsed\parsed-park.txt]
build_vocabulary: processing [data/parsed\parsed-partyfoul.txt]
build_vocabulary: processing [data/parsed\parsed-pathway.txt]
build_vocabulary: processing [data/parsed\parsed-pax.txt]
build_vocabulary: processing [data/parsed\parsed-pax2.txt]
build_vocabulary: processing [data/parsed\parsed-pax2011.txt]
build_vocabulary: processing [data/parsed\parsed-pepper.txt]
build_vocabulary: processing [data/parsed\parsed-photograph.txr.txt]
build_vocabulary: processing [data/parsed\parsed-photograph.txt]
build_vocabulary: processing [data/parsed\parsed-plan6-waker.txt]
build_vocabulary: processing [data/parsed\parsed-plunderedhearts.txt]
build_vocabulary: processing [data/parsed\parsed-pnnsi1.txt]
build_vocabulary: processing [data/parsed\parsed-pnnsi2.txt]
build_vocabulary: processing [data/parsed\parsed-primrose-edited.txt]
build_vocabulary: processing [data/parsed\parsed-pr

prepare_data: processing [data/parsed\parsed-clipperbeta.txt]
prepare_data: processing [data/parsed\parsed-cokeandspeed.txt]
prepare_data: processing [data/parsed\parsed-cove.txt]
prepare_data: processing [data/parsed\parsed-crescent.txt]
prepare_data: processing [data/parsed\parsed-csbb.txt]
prepare_data: processing [data/parsed\parsed-cull.txt]
prepare_data: processing [data/parsed\parsed-death.txt]
prepare_data: processing [data/parsed\parsed-defra.txt]
prepare_data: processing [data/parsed\parsed-degeneracy.txt]
Line 248 is empty. Skipping it.
prepare_data: processing [data/parsed\parsed-demoparty.txt]
prepare_data: processing [data/parsed\parsed-dialcforcupcakes-103014-utf8.txt]
prepare_data: processing [data/parsed\parsed-dialcforcupcakes-103014.txt]
prepare_data: processing [data/parsed\parsed-divis.txt]
prepare_data: processing [data/parsed\parsed-djinni.txt]
prepare_data: processing [data/parsed\parsed-dramaqueen.txt]
prepare_data: processing [data/parsed\parsed-dualtransform.

prepare_data: processing [data/parsed\parsed-shelter.txt]
prepare_data: processing [data/parsed\parsed-sherbet.txt]
prepare_data: processing [data/parsed\parsed-simplethefts.txt]
prepare_data: processing [data/parsed\parsed-sinsagainstmimesis.txt]
prepare_data: processing [data/parsed\parsed-six.txt]
prepare_data: processing [data/parsed\parsed-smittenkittens.txt]
prepare_data: processing [data/parsed\parsed-snacktime.txt]
prepare_data: processing [data/parsed\parsed-softfood.txt]
prepare_data: processing [data/parsed\parsed-sorcerer.txt]
prepare_data: processing [data/parsed\parsed-spring.txt]
prepare_data: processing [data/parsed\parsed-spur.txt]
prepare_data: processing [data/parsed\parsed-ssi.txt]
prepare_data: processing [data/parsed\parsed-ssos.txt]
prepare_data: processing [data/parsed\parsed-starborn.txt]
prepare_data: processing [data/parsed\parsed-statue.txt]
prepare_data: processing [data/parsed\parsed-stewie-escapade.txt]
prepare_data: processing [data/parsed\parsed-stf.txt

In [7]:
print('Unique words found (<UNK>, <EOS> + embeddings):', len(word2idx))
print('Missing words (no embeddings):', len(missing_words))

Unique words found (<UNK>, <EOS> + embeddings): 34160
Missing words (no embeddings): 9120


In [8]:
embedding_matrix = get_embedding_matrix(word2embeddings)

In [14]:
# Define a batch generator
class BatchGenerator(object):            
    def __init__(self, data, batch_size=1):
        self.data = data
        self.batch_size = batch_size
        self.UNK = word2idx['<UNK>']
        self.EOS = word2idx['<EOS>']
        self.PAD = 0
        self.eye = np.eye(len(word2idx))
        
    def generate_batch(self): 
        def window(seq, n=3, step=1):
            "Returns a sliding window (of width n) over data from the iterable"
            "   s -> (s[0],...s[n-1]), (s[0+skip_n],...,s[n-1+skip_n]), ...   "
            it = iter(seq)
            result = tuple(islice(it, n))
            if len(result) == n:
                yield result    

            result = result[step:]
            for elem in it:
                result = result + (elem,)
                if len(result) == n:
                    yield result
                    result = result[step:]
                    
        def to_categorical(sentence):
            return [self.eye[wordidx] for wordidx in sentence]
                    
        # every three lines comprise a sample sequence where the first two items
        # are the input and the last one is the output
        i  = 1 # batch counter        
        x_enc = []
        x_dec = []
        y  = []
        while True:
            for play in self.data:
                j  = 1 # sample counter
                for scene, command, reply in window(play, n=3, step=2):
                    scene_command = scene + command
                    
                    encoder_input  = np.array(scene_command + [self.EOS])
                    decoder_input  = np.array(reply)
                    decoder_output = np.array(to_categorical(reply[1:] + [self.EOS]))
                
                    x_enc.append(encoder_input)
                    x_dec.append(decoder_input)
                    y.append(decoder_output)
                    if i == self.batch_size or j == len(play):
                        if self.batch_size > 1:
                            # pad and return the batch
                            x_enc = sequence.pad_sequences(x_enc, padding='post', value=self.PAD)
                            x_dec = sequence.pad_sequences(x_dec, padding='post', value=self.PAD)
                            y     = sequence.pad_sequences(y,     padding='post', value=self.PAD) 

                        x_out, y_out = [np.array(x_enc.copy()), np.array(x_dec.copy())], np.array(y.copy())

                        i  = 1
                        x_enc = []
                        x_dec = []
                        y  = []

                        yield (x_out, y_out)
                    else:
                        i += 1 # next sample per batch
                    j += 1 # next sample
                    
            # no more data, just stop the generator
            break

In [16]:
# returns train, inference_encoder and inference_decoder models
def define_models(src_vocab_dim, dst_vocab_dim=None, latent_dim=300, mask_value=0, embedding_matrix=None):
    # define training encoder. We use return_state to retrieve the hidden states for the encoder and
    # provide them as input to the decoder
    if dst_vocab_dim is None:
        dst_vocab_dim = src_vocab_dim
        
    encoder_inputs = Input(shape=(None,)) # timesteps, features (one-hot encoding)
    encoder_masking = Masking(mask_value=mask_value)(encoder_inputs)
    
    if embedding_matrix is not None:
        encoder_masking = Embedding(input_dim=src_vocab_dim, output_dim=latent_dim, weights=[embedding_matrix], 
                                   trainable=False)(encoder_masking)
        
    encoder = LSTM(units=latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_masking)
    encoder_states = [state_h, state_c]
    
    # define training decoder. It is initialized with the encoder hidden states
    decoder_inputs = Input(shape=(None,))
    decoder_masking = Masking(mask_value=mask_value)(decoder_inputs)
    
    if embedding_matrix is not None:
        decoder_masking = Embedding(input_dim=src_vocab_dim, output_dim=latent_dim, weights=[embedding_matrix], 
                                   trainable=False)(decoder_masking)
    
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_masking, initial_state=encoder_states)
    decoder_dense = Dense(dst_vocab_dim, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    
    # define inference encoder
    encoder_model = Model(encoder_inputs, encoder_states)
    
    # define inference decoder
    decoder_state_input_h = Input(shape=(latent_dim,))
    decoder_state_input_c = Input(shape=(latent_dim,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_masking, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    
    # return all models
    return model, encoder_model, decoder_model

In [12]:
# import keras.backend as K

# def cos_distance(y_true, y_pred):
#     y_true = K.l2_normalize(y_true, axis=-1)
#     y_pred = K.l2_normalize(y_pred, axis=-1)
#     return K.mean(1 - K.sum((y_true * y_pred), axis=-1))

model, encinf, decinf = define_models(src_vocab_dim=vocab_size, embedding_matrix=embedding_matrix)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.summary(line_length=110)

______________________________________________________________________________________________________________
Layer (type)                        Output Shape            Param #      Connected to                         
input_1 (InputLayer)                (None, None)            0                                                 
______________________________________________________________________________________________________________
input_2 (InputLayer)                (None, None)            0                                                 
______________________________________________________________________________________________________________
masking_1 (Masking)                 (None, None)            0            input_1[0][0]                        
______________________________________________________________________________________________________________
masking_2 (Masking)                 (None, None)            0            input_2[0][0]                        
_

In [17]:
batch_generator = BatchGenerator(data, batch_size=16)
model.fit_generator(batch_generator.generate_batch(), steps_per_epoch=1000, epochs=25)

Epoch 1/25
   4/1000 [..............................] - ETA: 39:50 - loss: 4.3611

ResourceExhaustedError: OOM when allocating tensor with shape[16,237,34160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training/RMSprop/gradients/loss/dense_1_loss/truediv_grad/Neg = Neg[T=DT_FLOAT, _class=["loc:@loss/dense_1_loss/truediv"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](dense_1/truediv)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: loss/mul/_111 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_3128_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'training/RMSprop/gradients/loss/dense_1_loss/truediv_grad/Neg', defined at:
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\platform\asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\asyncio\base_events.py", line 421, in run_forever
    self._run_once()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\asyncio\base_events.py", line 1431, in _run_once
    handle._run()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\platform\asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2856, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-0f4ce427ae4c>", line 2, in <module>
    model.fit_generator(batch_generator.generate_batch(), steps_per_epoch=1000, epochs=25)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\engine\training.py", line 2080, in fit_generator
    self._make_train_function()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\engine\training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\optimizers.py", line 244, in get_updates
    grads = self.get_gradients(loss, params)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\optimizers.py", line 78, in get_gradients
    grads = K.gradients(loss, params)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\backend\tensorflow_backend.py", line 2515, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 611, in gradients
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 377, in _MaybeCompile
    return grad_fn()  # Exit early
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 611, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\math_grad.py", line 864, in _RealDivGrad
    grad * math_ops.realdiv(math_ops.realdiv(-x, y), y), ry), sy))
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 3123, in _neg
    "Neg", x=x, name=name)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
    op_def=op_def)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'loss/dense_1_loss/truediv', defined at:
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
[elided 7 identical lines from previous traceback]
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\ioloop.py", line 760, in _run_callback
    ret = callback()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-fe748bc89ac9>", line 9, in <module>
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\engine\training.py", line 830, in compile
    sample_weight, mask)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\engine\training.py", line 429, in weighted
    score_array = fn(y_true, y_pred)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\losses.py", line 69, in categorical_crossentropy
    return K.categorical_crossentropy(y_true, y_pred)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\keras\backend\tensorflow_backend.py", line 3004, in categorical_crossentropy
    True)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\math_ops.py", line 934, in binary_op_wrapper
    return func(x, y, name=name)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1030, in _truediv_python3
    return gen_math_ops._real_div(x, y, name=name)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 3757, in _real_div
    "RealDiv", x=x, y=y, name=name)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
    op_def=op_def)
  File "C:\Users\daniele\AppData\Local\conda\conda\envs\ml\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[16,237,34160] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training/RMSprop/gradients/loss/dense_1_loss/truediv_grad/Neg = Neg[T=DT_FLOAT, _class=["loc:@loss/dense_1_loss/truediv"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](dense_1/truediv)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: loss/mul/_111 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_3128_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
from scipy.spatial import distance

def decode_sequence(input_seq, vocab_dim, eos_vector, tol=1e-2, max_output_len=200):
    # Encode the input as state vectors.
    states_value = encinf.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, vocab_dim))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = eos_vector

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    i = 0 # number of sampled words
    while not stop_condition:
        output_embedding, h, c = decinf.predict([target_seq] + states_value)
        output_embedding = output_embedding[0,0,:]

        # Exit condition: either hit max length
        # or find stop character.
        eos_distance = distance.cosine(output_embedding, eos_vector)
        if eos_distance < tol or i > max_output_len:
            print(eos_distance, tol)
            stop_condition = True
            
        # Sample a token
        if distance.cosine(output_embedding, unk_vector) < tol:
            sampled_word = unk_vector
        else:
            sampled_word = w2v.most_similar(positive=[output_embedding], topn=1)
        decoded_sentence += sampled_word[0][0] + ' '     

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, vocab_dim))
        target_seq[0, 0] = output_embedding

        # Update states
        states_value = [h, c]
        i += 1

    return decoded_sentence

In [281]:
test_line = 'every muscle in your body strains, and you feel the grinding of faraway pulleys as the portcullis slowly lifts open. at last the heavy machinery catches, and you relax. east'
input_seq = np.array(prepare_input(test_line, word2embeddings))

In [282]:
input_seq = np.reshape(input_seq, (1, 30, 300))
input_seq.shape

(1, 30, 300)

In [283]:
decode_sequence(input_seq, w2v.vector_size, eos_vector)

  dist = 1.0 - uv / np.sqrt(uu * vv)


0.2657773540215731 0.01


'Perrine_Bridge butterflyer butterflyer butterflyer UniCredit UniCredit UniCredit UniCredit UniCredit UniCredit Woodbourne_Correctional_Facility Woodbourne_Correctional_Facility Woodbourne_Correctional_Facility Woodbourne_Correctional_Facility unsuspecting_sockeye_salmon unsuspecting_sockeye_salmon unsuspecting_sockeye_salmon unsuspecting_sockeye_salmon unsuspecting_sockeye_salmon replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers replacing_eager_earmarkers Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett Nicole_Haislett