In [1]:
%matplotlib inline

In [2]:
from keras.models import Model
from keras.layers import Input,  Dense, LSTM, GRU
from keras.layers import Embedding

from keras.utils import plot_model

from keras.optimizers import RMSprop

from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard

Using TensorFlow backend.


In [None]:
vocab_size=200000

In [None]:
word2vec_dim=300

In [3]:
rnn_output_dim = 512

# Encoder

In [4]:
def connect_encoder():
    # Start the neural network with its input-layer.
    encoder_input = Input(shape=(None, ), name='encoder_input')
    
    # Connect the embedding-layer.
    encoder_embeddings = Embedding(input_dim=vocab_size, output_dim=word2vec_dim, name='encoder_embedding')(encoder_input)

    # Connect all the GRU-layers.
    encoder_layer_1 = GRU(rnn_output_dim, name='encoder_layer_1', return_sequences=True)(encoder_embeddings)
    encoder_layer_2 = GRU(rnn_output_dim, name='encoder_layer_2', return_sequences=True)(encoder_layer_1)

    # This is the output of the encoder.
    encoder_output = GRU(rnn_output_dim, name='encoder_output', return_sequences=False)(encoder_layer_2)
    
    return encoder_input, encoder_output

In [5]:
encoder_input, encoder_output = connect_encoder()

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
model_encoder = Model(inputs=[encoder_input], outputs=[encoder_output])

In [7]:
plot_model(model_encoder)

# Decoder

In [8]:
decoder_initial_state = Input(shape=(latent_dim,), name='decoder_initial_state')

In [9]:
def connect_decoder(initial_state, suffix=""):
    # Start the decoder-network with its input-layer.
    decoder_input = Input(shape=(None, ), name='decoder_input_' + suffix)

    # Connect the embedding-layer.
    decoder_embedding = Embedding(input_dim=vocab_size, output_dim=word2vec_dim,  name='decoder_embedding_'+suffix)(decoder_input)
    
    # Connect all the GRU-layers.
    decoder_layer_1 = GRU(rnn_output_dim, name='decoder_layer_1_' + suffix, return_sequences=True)(decoder_embedding, initial_state=initial_state)
    decoder_layer_2 = GRU(rnn_output_dim, name='decoder_layer_2_' + suffix, return_sequences=True)(decoder_layer_1, initial_state=initial_state)
    decoder_layer_3 = GRU(rnn_output_dim, name='decoder_layer_3_' + suffix, return_sequences=True)(decoder_layer_2, initial_state=initial_state)

    # Connect the final dense layer that converts to
    # one-hot encoded arrays.
    decoder_output = Dense(100000, activation='linear', name='decoder_output_'+suffix)(decoder_layer_3)
    
    return decoder_input, decoder_output

In [10]:
decoder_input, decoder_output = connect_decoder(decoder_initial_state)

In [11]:
model_decoder = Model(inputs=[decoder_input, decoder_initial_state], outputs=[decoder_output])

In [12]:
plot_model(model_decoder)

# Model Input and Output

In [13]:
decoder_input_1, decoder_output_1 = connect_decoder(encoder_output,"1")

In [14]:
decoder_input_2, decoder_output_2 = connect_decoder(encoder_output,"2")

In [15]:
decoder_input_3, decoder_output_3 = connect_decoder(encoder_output,"3")

In [17]:
model_train = Model(
    inputs = [encoder_input, decoder_input_1, decoder_input_2, decoder_input_3], 
    outputs = [decoder_output_1,decoder_output_2, decoder_output_3]
)

In [18]:
plot_model(model_train)

# Cost Function

In [19]:
model_train.compile(optimizer=RMSprop(lr=1e-3),
              loss={
                  'decoder_output_1': 'sparse_categorical_crossentropy', 
                  'decoder_output_2': 'sparse_categorical_crossentropy', 
                  'decoder_output_3': 'sparse_categorical_crossentropy' 
              })


# Checkpoints


In [20]:
callback_checkpoint = ModelCheckpoint(filepath="checkpoint", monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True)

In [21]:
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

In [22]:
callback_tensorboard = TensorBoard(log_dir='./logs/', histogram_freq=0, write_graph=False)

In [23]:
callbacks = [callback_early_stopping, callback_checkpoint, callback_tensorboard]

# Training

In [25]:
model_train.fit(x=\
          {
              'encoder_input': None, 
              'decoder_input_1': None,
              'decoder_input_2': None,
              'decoder_input_3': None,
          },
          y=\
          {
               'decoder_output_1': None, 
               'decoder_output_2': None, 
               'decoder_output_3': None 
          },
          batch_size=32, 
          epochs=50,
          validation_split=0.2,
          callbacks=callbacks)

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
def translate(input_text, true_output_text=None):
    """Translate a single text-string."""

    # Convert the input-text to integer-tokens.
    # Note the sequence of tokens has to be reversed.
    # Padding is probably not necessary.
    input_tokens = tokenizer_src.text_to_tokens(text=input_text, reverse=True,padding=True)
    
    # Get the output of the encoder's GRU which will be
    # used as the initial state in the decoder's GRU.
    # This could also have been the encoder's final state
    # but that is really only necessary if the encoder
    # and decoder use the LSTM instead of GRU because
    # the LSTM has two internal states.
    initial_state = model_encoder.predict(input_tokens)

    # Max number of tokens / words in the output sequence.
    max_tokens = tokenizer_dest.max_tokens

    # Pre-allocate the 2-dim array used as input to the decoder.
    # This holds just a single sequence of integer-tokens,
    # but the decoder-model expects a batch of sequences.
    shape = (1, max_tokens)
    decoder_input_data = np.zeros(shape=shape, dtype=np.int)

    # The first input-token is the special start-token for 'ssss '.
    token_int = token_start

    # Initialize an empty output-text.
    output_text = ''

    # Initialize the number of tokens we have processed.
    count_tokens = 0

    # While we haven't sampled the special end-token for ' eeee'
    # and we haven't processed the max number of tokens.
    while token_int != token_end and count_tokens < max_tokens:
        # Update the input-sequence to the decoder
        # with the last token that was sampled.
        # In the first iteration this will set the
        # first element to the start-token.
        decoder_input_data[0, count_tokens] = token_int

        # Wrap the input-data in a dict for clarity and safety,
        # so we are sure we input the data in the right order.
        x_data = \
        {
            'decoder_initial_state': initial_state,
            'decoder_input': decoder_input_data
        }

        # Note that we input the entire sequence of tokens
        # to the decoder. This wastes a lot of computation
        # because we are only interested in the last input
        # and output. We could modify the code to return
        # the GRU-states when calling predict() and then
        # feeding these GRU-states as well the next time
        # we call predict(), but it would make the code
        # much more complicated.

        # Input this data to the decoder and get the predicted output.
        decoder_output = model_decoder.predict(x_data)

        # Get the last predicted token as a one-hot encoded array.
        token_onehot = decoder_output[0, count_tokens, :]
        
        # Convert to an integer-token.
        token_int = np.argmax(token_onehot)

        # Lookup the word corresponding to this integer-token.
        sampled_word = tokenizer_dest.token_to_word(token_int)

        # Append the word to the output-text.
        output_text += " " + sampled_word

        # Increment the token-counter.
        count_tokens += 1

    # Sequence of tokens output by the decoder.
    output_tokens = decoder_input_data[0]
    
    # Print the input-text.
    print("Input text:")
    print(input_text)
    print()

    # Print the translated output-text.
    print("Translated text:")
    print(output_text)
    print()

    # Optionally print the true translated text.
    if true_output_text is not None:
        print("True output text:")
        print(true_output_text)
        print()