In [None]:
from google.colab import drive
drive.mount('/content/drive')

# please navigate to the folder where you will place the folder "data"
%cd <path-to-data-folder>


Mounted at /content/drive
/content/drive/My Drive/DLE_assignment/DLE_assignment


## English to French translation

We start with the Vanilla Seq2Seq model and try out a few architectural changes to achieve better performance.

The Seq2Seq model is introduced in this publication: https://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf. 

### SOLUTION :

**Starting point**

1. Vanilla Seq2Seq model with a single layer and  implemented using LSTM cells. 

**Changes made and things tried out :**

1. Two Seq2Seq models using **LSTM and GRU models** have been trained. 
2. Bidirectional RNNs have been used and has proven to be more effective. **Only the encoders have been made bidirectional.**
3. Deeper LSTM/GRU models have been used to create the Seq2Seq network. Specifically, **3 layer networks** have been created for both LSTM and GRU based frameworks.
4. Initialize LSTM/GRU units with a **uniform distribution** between -0.08 and 0.08 (according to Seq2Seq paper). 
5. Using ReLU activation function instead of tanh **did not help.**
6.**Input string was reversed** when feeding to the encoder (according to Seq2Seq paper). 
7. **Using BLEU score to quantify performance.** Taking the average of the BLEU scores of random 1000 sentences in the dataset.

**Observations:**

1. Usage of dropout results in a lower validation error, but doesnt exactly yield a significantly better BLEU score. 
2. LSTM (BLEU: ~0.36) gives a better performance compared to GRU (BLEU: ~0.32). 
3. The saved weights are for the scenarios of a) normal Bi directional RNN, b) Bi directional string reversed RNN with dropout and c) Bi directional string reversed RNN without dropout

**Note:**

Attention mechanism has **not** been used because of the huge increase in computational complexity and little increase in performance, especially since this is a Character-level RNN. 

**################################################################**

### DATA PREPROCESS

**################################################################**

In [None]:
import random
import numpy as np

In [None]:
# TRAINING DETAILS

batch_size = 64       # Batch size for training.
epochs = 30           # Number of epochs to train for.
latent_dim = 256      # Latent dimensionality of the encoding space.
num_samples = 10000   # Number of samples to train on.
data_path = './data/fra.txt' # Path to the data txt file on disk.

# OPEN FILE AND READ SENTENCES

input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')
for line in lines[: min(num_samples, len(lines) - 1)]:
    input_text, target_text = line.split('\t')
    input_text = input_text[::-1] # reverse input sequence
    
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

# PRINT DATA DETAILS

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

# SET UP DATA

input_token_index = dict(
    [(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
    [(char, i) for i, char in enumerate(target_characters)])

encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
            
# REVERSE LOOKUP TOKENS

reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

Number of samples: 10000
Number of unique input tokens: 71
Number of unique output tokens: 94
Max sequence length for inputs: 16
Max sequence length for outputs: 59


**################################################################**

### LSTM BASED MODEL

**################################################################**

In [None]:
import keras # 2.2.4
from keras.models import Model, load_model
from keras.layers import Input, LSTM, GRU, Dense
from keras.layers import Bidirectional, Concatenate
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.optimizers import RMSprop
from keras.layers import Lambda
from keras import backend as K
from keras.initializers import RandomUniform
import nltk

#### Model

In [None]:
# # Define an input sequence and process it.
# encoder_inputs = Input(shape=(None, num_encoder_tokens))
# encoder = LSTM(latent_dim, return_state=True)
# encoder_outputs, state_h, state_c = encoder(encoder_inputs)

# # We discard `encoder_outputs` and only keep the states.
# encoder_states = [state_h, state_c]

# # Set up the decoder, using `encoder_states` as initial state.
# decoder_inputs = Input(shape=(None, num_decoder_tokens))
# # We set up our decoder to return full output sequences,
# # and to return internal states as well. We don't use the
# # return states in the training model, but we will use them in inference.
# decoder = LSTM(latent_dim, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder(decoder_inputs, initial_state=encoder_states)
# decoder_dense = Dense(num_decoder_tokens, activation='softmax')# ,kernel_regularizer=rglz.l2(0.01))
# decoder_outputs = decoder_dense(decoder_outputs)

# # Define the model that will turn
# # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
# model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

#######################################
### Stacked Bidirectional LSTM model
#######################################

# Define all layers.
encoder_inputs = Input(shape=(None, num_encoder_tokens))

encoder_1 = Bidirectional(LSTM(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')
encoder_2 = Bidirectional(LSTM(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')
encoder_3 = Bidirectional(LSTM(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')

decoder_inputs = Input(shape=(None, num_decoder_tokens))

decoder_1 = LSTM(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))
decoder_2 = LSTM(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))
decoder_3 = LSTM(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))

decoder_dense = Dense(num_decoder_tokens, activation='softmax')

# Encoder
encoder_outputs, ehf1, ecf1, ehb1, ecb1 = encoder_1(encoder_inputs)
encoder_outputs, ehf2, ecf2, ehb2, ecb2 = encoder_2(encoder_outputs)
encoder_outputs, ehf3, ecf3, ehb3, ecb3 = encoder_3(encoder_outputs)

eh1 = Concatenate()([ehf1, ehb1])
ec1 = Concatenate()([ecf1, ecb1])
eh2 = Concatenate()([ehf2, ehb2])
ec2 = Concatenate()([ecf2, ecb2])
eh3 = Concatenate()([ehf3, ehb3])
ec3 = Concatenate()([ecf3, ecb3])

encoder_states = [eh1, ec1, eh2, ec2, eh3, ec3]

# Decoder

decoder_outputs,_,_ = decoder_1(decoder_inputs, initial_state=[eh1, ec1])
decoder_outputs,_,_ = decoder_2(decoder_outputs, initial_state=[eh2, ec2]) 
decoder_outputs,_,_ = decoder_3(decoder_outputs, initial_state=[eh3, ec3])

decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)



Instructions for updating:
Colocations handled automatically by placer.


#### Training Phase - Run only when training model. 

In [None]:
# from keras import optimizers
# sgd = optimizers.SGD(lr=0.7, clipnorm=5.)
# model.compile(optimizer=sgd, loss='categorical_crossentropy')

model.compile(optimizer=RMSprop(), loss='categorical_crossentropy')

model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=30,
          callbacks=[EarlyStopping(patience=3, verbose=1), ReduceLROnPlateau(patience=1, min_lr=10e-6, verbose=1)],
          validation_split=0.2)

model.save_weights('my_trained_model_BiLSTM_rev_noDropout.h5')

####  Inference phase

1. Encode input and retrieve initial decoder state
2. Run one step of decoder with this initial state and a "start of sequence" token as target. Output will be the next target token
3. Repeat with the current target token and current states

In [None]:
# model = load_model('my_trained_model_LSTM.h5')
# model = load_model('my_trained_model_BiLSTM.h5')
model.load_weights('BiLSTM_rev.h5')

In [None]:
# def decode_sequence(input_seq, encoder_model, decoder_model):
#     # Encode the input as state vectors.
#     states_value = encoder_model.predict(input_seq)

#     # Generate empty target sequence of length 1.
#     target_seq = np.zeros((1, 1, num_decoder_tokens))
#     # Populate the first character of target sequence with the start character.
#     target_seq[0, 0, target_token_index['\t']] = 1.

#     # Sampling loop for a batch of sequences
#     # (to simplify, here we assume a batch of size 1).
#     stop_condition = False
#     decoded_sentence = ''
#     while not stop_condition:
#         output_tokens, h, c = decoder_model.predict(
#             [target_seq] + states_value)

#         # Sample a token
#         sampled_token_index = np.argmax(output_tokens[0, -1, :])
#         sampled_char = reverse_target_char_index[sampled_token_index]
#         decoded_sentence += sampled_char

#         # Exit condition: either hit max length
#         # or find stop character.
#         if (sampled_char == '\n' or
#            len(decoded_sentence) > max_decoder_seq_length):
#             stop_condition = True

#         # Update the target sequence (of length 1).
#         target_seq = np.zeros((1, 1, num_decoder_tokens))
#         target_seq[0, 0, sampled_token_index] = 1.

#         # Update states
#         states_value = [h, c]

#     return decoded_sentence


##############################################
### STACKED DECODE SEQUENCE BIDIRECTIONAL
##############################################

def decode_sequence(input_seq, encoder_model, decoder_model):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h1, c1, h2, c2, h3, c3 = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h1, c1, h2, c2, h3, c3]

    return decoded_sentence

In [None]:
# Define sampling models

# encoder_model = Model(encoder_inputs, encoder_states)

# print(decoder_inputs.shape)

# decoder_state_input_h = Input(shape=(latent_dim,))
# decoder_state_input_c = Input(shape=(latent_dim,))
# decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
# print(decoder_states_inputs[0].shape, decoder_states_inputs[1].shape)

# decoder_outputs, state_h, state_c = decoder(
#     decoder_inputs, initial_state=decoder_states_inputs)
# print(decoder_outputs.shape)

# decoder_states = [state_h, state_c]
# print(decoder_states[0].shape, decoder_states[1].shape)

# decoder_outputs = decoder_dense(decoder_outputs)
# print(decoder_outputs.shape)

# decoder_model = Model(
#     [decoder_inputs] + decoder_states_inputs,
#     [decoder_outputs] + decoder_states)


# #######################################################
# ### STACKED LSTM INFERENCE
# #######################################################

# encoder_model = Model(encoder_inputs, encoder_states)

# decoder_state_input_h1 = Input(shape=(latent_dim,))
# decoder_state_input_c1 = Input(shape=(latent_dim,))
# decoder_state_input_h2 = Input(shape=(latent_dim,))
# decoder_state_input_c2 = Input(shape=(latent_dim,))
# decoder_state_input_h3 = Input(shape=(latent_dim,))
# decoder_state_input_c3 = Input(shape=(latent_dim,))

# decoder_states_inputs = [decoder_state_input_h1, decoder_state_input_c1,
#                         decoder_state_input_h2, decoder_state_input_c2,
#                         decoder_state_input_h3, decoder_state_input_c3]


# decoder_outputs, dh1, dc1 = decoder_1(decoder_inputs, initial_state=decoder_states_inputs[0:2])
# decoder_outputs, dh2, dc2 = decoder_2(decoder_outputs, initial_state=decoder_states_inputs[2:4])
# decoder_outputs, dh3, dc3 = decoder_3(decoder_outputs, initial_state=decoder_states_inputs[4:])                            

# decoder_states = [dh1, dc1, dh2, dc2, dh3, dc3]


# decoder_outputs = decoder_dense(decoder_outputs)

# decoder_model = Model(
#     [decoder_inputs] + decoder_states_inputs,
#     [decoder_outputs] + decoder_states)


#######################################################
### STACKED LSTM INFERENCE BIDIRECTIONAL
#######################################################

encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h1 = Input(shape=(latent_dim*2,))
decoder_state_input_c1 = Input(shape=(latent_dim*2,))
decoder_state_input_h2 = Input(shape=(latent_dim*2,))
decoder_state_input_c2 = Input(shape=(latent_dim*2,))
decoder_state_input_h3 = Input(shape=(latent_dim*2,))
decoder_state_input_c3 = Input(shape=(latent_dim*2,))

decoder_states_inputs = [decoder_state_input_h1, decoder_state_input_c1,
                        decoder_state_input_h2, decoder_state_input_c2,
                        decoder_state_input_h3, decoder_state_input_c3]


decoder_outputs, dh1, dc1 = decoder_1(decoder_inputs, initial_state=decoder_states_inputs[0:2])
decoder_outputs, dh2, dc2 = decoder_2(decoder_outputs, initial_state=decoder_states_inputs[2:4])
decoder_outputs, dh3, dc3 = decoder_3(decoder_outputs, initial_state=decoder_states_inputs[4:])                            

decoder_states = [dh1, dc1, dh2, dc2, dh3, dc3]


decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

####  Test sequence

In [None]:
sample_idx = random.sample(range(len(input_texts)), 1000)
test_texts = ['.sknahT', '.olleH', '?uoy era woH'] + [input_texts[i] for i in sample_idx]
test_outputs = ['Merci.', 'Bonjour.', 'Comment vas-tu?'] + [target_texts[i] for i in sample_idx]

def encode_texts_to_1hot_seq(input_texts):
    input_seq = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
    for i, text in enumerate(input_texts):
        for t, char in enumerate(text):
            input_seq[i, t, input_token_index[char]] = 1.
    return input_seq

i=0
BLEUscore=0

for test_text, test_output in zip(test_texts, test_outputs):
  
    # Take one sequence (part of the training set) for trying out decoding.
    input_seq = encode_texts_to_1hot_seq([test_text])
    decoded_sentence = decode_sequence(input_seq, encoder_model, decoder_model)
    
    # BLEU score
    BLEUscore += nltk.translate.bleu_score.sentence_bleu([decoded_sentence.split(" ")], test_output.split(" "), weights = [0.5, 0.5, 0, 0]) #(0.5, 0.5)
    i+=1
    
#     print('---------------------------------------------------------')
#     print('Input sentence: ', test_text[::-1])
#     print('Decoded sentence: ', decoded_sentence)
#     print('Actual translation: ', test_output)


print("The BLEU score for this dataset is: ", BLEUscore*1.0/i)

**################################################################**

### GRU BASED MODEL

**################################################################**

In [None]:
import keras # 2.2.4
from keras.models import Model, load_model
from keras.layers import Input, LSTM, GRU, Dense
from keras.layers import Bidirectional, Concatenate
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.optimizers import RMSprop
from keras.layers import Lambda
from keras import backend as K
from keras.initializers import RandomUniform
import nltk

#### Model

In [None]:
#######################################
### Stacked Bidirectional GRU model
#######################################

# Define all layers.
encoder_inputs = Input(shape=(None, num_encoder_tokens))

encoder_1 = Bidirectional(GRU(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')
encoder_2 = Bidirectional(GRU(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')
encoder_3 = Bidirectional(GRU(latent_dim, return_state=True, return_sequences=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None)), merge_mode='concat')

decoder_inputs = Input(shape=(None, num_decoder_tokens))

decoder_1 = GRU(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))
decoder_2 = GRU(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))
decoder_3 = GRU(latent_dim*2, return_sequences=True, return_state=True, kernel_initializer=RandomUniform(minval=-0.08, maxval=0.08, seed=None))

decoder_dense = Dense(num_decoder_tokens, activation='softmax')

# Encoder
encoder_outputs, ehf1, ehb1 = encoder_1(encoder_inputs) 

encoder_outputs, ehf2, ehb2 = encoder_2(encoder_outputs)

encoder_outputs, ehf3, ehb3 = encoder_3(encoder_outputs)

eh1 = Concatenate()([ehf1, ehb1])
eh2 = Concatenate()([ehf2, ehb2])
eh3 = Concatenate()([ehf3, ehb3])

encoder_states = [eh1, eh2, eh3]


# Decoder

decoder_outputs,_ = decoder_1(decoder_inputs, initial_state=eh1)
decoder_outputs,_ = decoder_2(decoder_outputs, initial_state=eh2)
decoder_outputs,_ = decoder_3(decoder_outputs, initial_state=eh3)

decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)


#### Training Phase - Run only when training model. 

In [None]:
model.compile(optimizer=RMSprop(), loss='categorical_crossentropy')


model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[EarlyStopping(patience=3, verbose=1), ReduceLROnPlateau(patience=1, min_lr=10e-6, verbose=1)],
          validation_split=0.2)

model.save_weights('BiGRU_rev.h5')

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 8000 samples, validate on 2000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/30
Epoch 10/30

Epoch 00010: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 11/30

Epoch 00011: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 12/30
Epoch 00012: early stopping


#### Inference phase

1. Encode input and retrieve initial decoder state
2. Run one step of decoder with this initial state and a "start of sequence" token as target. Output will be the next target token
3. Repeat with the current target token and current states

In [None]:
model.load_weights('BiGRU_rev.h5')

In [None]:
def decode_sequence_gru(input_seq, encoder_model, decoder_model):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h1, h2, h3 = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h1, h2, h3]

    return decoded_sentence

In [None]:
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h1 = Input(shape=(latent_dim*2,))
decoder_state_input_h2 = Input(shape=(latent_dim*2,))
decoder_state_input_h3 = Input(shape=(latent_dim*2,))

decoder_states_inputs = [decoder_state_input_h1, decoder_state_input_h2, decoder_state_input_h3]


decoder_outputs, dh1 = decoder_1(decoder_inputs, initial_state=decoder_states_inputs[0])
decoder_outputs, dh2 = decoder_2(decoder_outputs, initial_state=decoder_states_inputs[1])
decoder_outputs, dh3 = decoder_3(decoder_outputs, initial_state=decoder_states_inputs[2])                            

decoder_states = [dh1, dh2, dh3]


decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

#### Test Sequences

In [None]:
sample_idx = random.sample(range(len(input_texts)), 1000)
test_texts = ['.sknahT', '.oellH', '?uoy era woH'] + [input_texts[i] for i in sample_idx]
test_outputs = ['Merci.', 'Bonjour.', 'Comment vas-tu?'] + [target_texts[i] for i in sample_idx]

def encode_texts_to_1hot_seq(input_texts):
    input_seq = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
    for i, text in enumerate(input_texts):
        for t, char in enumerate(text):
            input_seq[i, t, input_token_index[char]] = 1.
    return input_seq

i=0
BLEUscore=0

for test_text, test_output in zip(test_texts, test_outputs):
  
    # Take one sequence (part of the training set) for trying out decoding.
    input_seq = encode_texts_to_1hot_seq([test_text])
    decoded_sentence = decode_sequence_gru(input_seq, encoder_model, decoder_model)
    
    # BLEU score
    BLEUscore += nltk.translate.bleu_score.sentence_bleu([decoded_sentence.split(" ")], test_output.split(" "), weights = [0.5, 0.5, 0, 0]) #(0.5, 0.5)
    i+=1
    
#     print('---------------------------------------------------------')
#     print('Input sentence: ', test_text[::-1])
#     print('Decoded sentence: ', decoded_sentence)
#     print('Actual translation: ', test_output)
#     print('BLEU score is: ', BLEUscore)


print("The BLEU score for this dataset is: ", BLEUscore*1.0/i)