In [2]:
# importing libraries
from preprocessing import *

from tensorflow import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model

import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [3]:
latent_dim = 256
batch_size = 64
epochs = 50

In [4]:
# encoder training setup
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_hidden, state_cell = encoder_lstm(encoder_inputs)
encoder_states = [state_hidden, state_cell]

In [5]:
# decoder training setup:
decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, decoder_state_hidden, decoder_state_cell = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [6]:
# defining the model
training_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

print("Model summary:\n")
training_model.summary()
print("\n\n")

Model summary:

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 434)    0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 756)    0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 256), (None, 707584      input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 256),  1037312     input_2[0][0]                    
                                                                 lstm_1[0][1

In [7]:
training_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
# training the model
training_model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size = batch_size, epochs = epochs, validation_split = 0.2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 800 samples, validate on 201 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7fe6b531afd0>

In [9]:
training_model.save('training_model.h5')

In [10]:
enc_inp = training_model.input[0]
enc_out, state_h_enc, state_c_enc = training_model.layers[2].output
enc_states = [state_h_enc, state_c_enc]

enc_model = Model(enc_inp, enc_states)

In [11]:
dec_state_inp_h = Input(shape=(latent_dim, ))
dec_state_inp_c = Input(shape=(latent_dim, ))
dec_state_inp = [dec_state_inp_h, dec_state_inp_c]
dec_out, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=dec_state_inp)
dec_states = [state_h, state_c]
dec_out = decoder_dense(dec_out)

dec_model = Model([decoder_inputs] + dec_state_inp, [dec_out] + dec_states)

In [16]:
def decode_seq(test_input):
    # encoding input as state vectors
    states_v = enc_model.predict(test_input)
    
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    target_seq[0, 0, target_features_dict['<START>']] = 1.
    
    decoded_sent = ''
    
    stop = False
    while not stop:
        out_tokens, h_state, c_state = dec_model.predict([target_seq] + states_v)
        s_token_idx = np.argmax(out_tokens[0, -1, :])
        s_token = reverse_target_features_dict[s_token_idx]
        decoded_sent += ' ' + s_token
        
        if s_token == '<END>' or len(decoded_sent) > max_decoder_seq_length:
            stop = True
            
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, s_token_idx] = 1.
        
        states_v = [h_state, c_state]
    return decoded_sent

In [17]:
for seq_index in range(20):
  test_input = encoder_input_data[seq_index: seq_index + 1]
  decoded_sentence = decode_seq(test_input)
  print('-')
  print('Input sentence:', input_docs[seq_index])
  print('Decoded sentence:', decoded_sentence)

-
Input sentence: Go.
Decoded sentence:  Komm raus . <END>
-
Input sentence: Hi.
Decoded sentence:  Warte ! <END>
-
Input sentence: Hi.
Decoded sentence:  Warte ! <END>
-
Input sentence: Run!
Decoded sentence:  ! <END>
-
Input sentence: Run.
Decoded sentence:  Lauf ! <END>
-
Input sentence: Wow!
Decoded sentence:  Fantastisch !
-
Input sentence: Wow!
Decoded sentence:  Fantastisch !
-
Input sentence: Fire!
Decoded sentence:  Feuer ! <END>
-
Input sentence: Help!
Decoded sentence:  Nicht ! <END>
-
Input sentence: Help!
Decoded sentence:  Nicht ! <END>
-
Input sentence: Stop!
Decoded sentence:  Nicht ! <END>
-
Input sentence: Wait!
Decoded sentence:  Warte ! <END>
-
Input sentence: Wait.
Decoded sentence:  Warte nicht !
-
Input sentence: Begin.
Decoded sentence:  Ruf mich an .
-
Input sentence: Go on.
Decoded sentence:  Mach dich ! <END>
-
Input sentence: Hello!
Decoded sentence:  Warte ! <END>
-
Input sentence: Hurry!
Decoded sentence:  Beeil ! <END>
-
Input sentence: Hurry!
Decoded sen