## Import packages
* numpy - package for scientific computing with Python

In [1]:
import numpy as np

## Import keras packages
* Model - Import the functional API to define layers as functions
* Input - the input layer for the models defined using functional api
* LSTM - the Long Short-Term Memory RNN layer
* plot_model - module provides utility functions to plot a Keras model
* TensorBoard - TensorBoard is a visualization tool provided with TensorFlow.
* ModelCheckpoint - Saves the model after every epoch.

In [2]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.utils import plot_model
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Define model related constants
* Batch size for training.
* Number of epochs to train for.
* Latent dimensionality of the encoding space.
* Number of samples to train on.
* Path to the data txt file on disk. (Downloadable from http://www.manythings.org/anki/)

In [3]:
batch_size = 64  

In [4]:
epochs = 100  

In [5]:
latent_dimension = 256  

In [6]:
num_samples = 10000  

In [7]:
data_path = 'fra-eng/fra.txt'

## Vectorize the input
* Define lists and sets to hold the input and target words and characters respectively.
* Open the file from the data path and read the contents into a list, splitting each line at new line.
* Iterate over the contents of the list and split each line into input and target text using tab as the start sequence and \n as the end sequence.
* If num_samples < length of lines, we iterate until the end of the list. Otherwise over the first 10,000 lines.
* For each character in the input and target text, add them to the sets we defined unless they alread exist.
* Sort the input and target characters alphabetically.The sorted() method sorts the elements of a given iterable in a specific order.

In [8]:
input_texts = []
target_texts = []

In [9]:
input_characters = set()
target_characters = set()

In [10]:
with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')

In [11]:
for line in lines[: min(num_samples, len(lines) - 1)]:
    
    input_text, target_text = line.split('\t')
    target_text = '\t' + target_text + '\n'
    
    input_texts.append(input_text)
    target_texts.append(target_text)
    
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
            
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

In [12]:
len(input_texts)

10000

In [13]:
input_texts

['Go.',
 'Run!',
 'Run!',
 'Wow!',
 'Fire!',
 'Help!',
 'Jump.',
 'Stop!',
 'Stop!',
 'Stop!',
 'Wait!',
 'Wait!',
 'Go on.',
 'Go on.',
 'Go on.',
 'I see.',
 'I try.',
 'I won!',
 'I won!',
 'Oh no!',
 'Attack!',
 'Attack!',
 'Cheers!',
 'Cheers!',
 'Cheers!',
 'Cheers!',
 'Get up.',
 'Go now.',
 'Go now.',
 'Go now.',
 'Got it!',
 'Got it!',
 'Got it?',
 'Got it?',
 'Got it?',
 'Hop in.',
 'Hop in.',
 'Hug me.',
 'Hug me.',
 'I fell.',
 'I fell.',
 'I know.',
 'I left.',
 'I left.',
 'I lost.',
 "I'm 19.",
 "I'm OK.",
 "I'm OK.",
 'Listen.',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'No way!',
 'Really?',
 'Really?',
 'Really?',
 'Thanks.',
 'We try.',
 'We won.',
 'We won.',
 'We won.',
 'We won.',
 'Ask Tom.',
 'Awesome!',
 'Be calm.',
 'Be calm.',
 'Be calm.',
 'Be cool.',
 'Be fair.',
 'Be fair.',
 'Be fair.',
 'Be fair.',
 'Be fair.',
 'Be fair.',
 'Be kind.',
 'Be nice.',
 'Be nice.',
 'Be nice.',
 'Be nice.',
 'Be nice.',

In [14]:
len(target_texts)

10000

In [15]:
target_texts

['\tVa !\n',
 '\tCours\u202f!\n',
 '\tCourez\u202f!\n',
 '\tÇa alors\u202f!\n',
 '\tAu feu !\n',
 "\tÀ l'aide\u202f!\n",
 '\tSaute.\n',
 '\tÇa suffit\u202f!\n',
 '\tStop\u202f!\n',
 '\tArrête-toi !\n',
 '\tAttends !\n',
 '\tAttendez !\n',
 '\tPoursuis.\n',
 '\tContinuez.\n',
 '\tPoursuivez.\n',
 '\tJe comprends.\n',
 "\tJ'essaye.\n",
 "\tJ'ai gagné !\n",
 "\tJe l'ai emporté !\n",
 '\tOh non !\n',
 '\tAttaque !\n',
 '\tAttaquez !\n',
 '\tSanté !\n',
 '\tÀ votre santé !\n',
 '\tMerci !\n',
 '\tTchin-tchin !\n',
 '\tLève-toi.\n',
 '\tVa, maintenant.\n',
 '\tAllez-y maintenant.\n',
 '\tVas-y maintenant.\n',
 "\tJ'ai pigé !\n",
 '\tCompris !\n',
 '\tPigé\u202f?\n',
 '\tCompris\u202f?\n',
 "\tT'as capté\u202f?\n",
 '\tMonte.\n',
 '\tMontez.\n',
 '\tSerre-moi dans tes bras !\n',
 '\tSerrez-moi dans vos bras !\n',
 '\tJe suis tombée.\n',
 '\tJe suis tombé.\n',
 '\tJe sais.\n',
 '\tJe suis parti.\n',
 '\tJe suis partie.\n',
 "\tJ'ai perdu.\n",
 "\tJ'ai 19 ans.\n",
 '\tJe vais bien.\n',
 '\tÇa v

In [16]:
len(input_characters)

71

In [17]:
input_characters = sorted(list(input_characters))

In [18]:
input_characters

[' ',
 '!',
 '$',
 '%',
 '&',
 "'",
 ',',
 '-',
 '.',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'Y',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [19]:
len(target_characters)

94

In [20]:
target_characters = sorted(list(target_characters))

In [21]:
target_characters

['\t',
 '\n',
 ' ',
 '!',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '0',
 '1',
 '3',
 '5',
 '6',
 '8',
 '9',
 ':',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'Y',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '\xa0',
 '«',
 '»',
 'À',
 'Ç',
 'É',
 'Ê',
 'à',
 'â',
 'ç',
 'è',
 'é',
 'ê',
 'ë',
 'î',
 'ï',
 'ô',
 'ù',
 'û',
 'œ',
 '\u2009',
 '’',
 '\u202f']

## Define input related constants
* Set the number of unique input tokens as the length of input_characters
* Set the number of unique output tokens as the length of target_characters
* Set the maximum length of each input sequence as the length of the longest line in the input texts
* Set the maximum length of each output sequence as the length of the longest line in the target texts

In [22]:
num_encoder_tokens = len(input_characters)

In [23]:
num_decoder_tokens = len(target_characters)

In [24]:
max_encoder_seq_length = max([len(txt) for txt in input_texts])

In [25]:
max_decoder_seq_length = max([len(txt) for txt in target_texts])

In [26]:
print('Max sequence length for inputs:', max_encoder_seq_length)

Max sequence length for inputs: 16


In [27]:
print('Max sequence length for outputs:', max_decoder_seq_length)

Max sequence length for outputs: 59


## Input preprocessing
* Index the input and output characters in two dictionaries respectively
* Initialize two numpy arrays with zeroes of dimensions [num_samples, max_seq_length, num_tokens] for input and output respectively.
* Turn the sentences into 3 Numpy arrays, encoder_input_data, decoder_input_data, decoder_target_data:
    * encoder_input_data is a 3D array of shape (num_pairs, max_english_sentence_length, num_english_characters) containing a one-hot vectorization of the English sentences.
    * decoder_input_data is a 3D array of shape (num_pairs, max_french_sentence_length, num_french_characters) containg a one-hot vectorization of the French sentences.
    * decoder_target_data is the same as decoder_input_data but offset by one timestep. decoder_target_data[:, t, :] will be the same as decoder_input_data[:, t + 1, :].
* This is done as follows:
    * Use zip to map the contents of input_texts to target_texts and use enumerate to index them. 
    * Iterate over each character in the input/target text to perform one hot encoding in the postions of the character in the np arrays. 

In [28]:
input_token_index = dict(
    [(char, i) for i, char in enumerate(input_characters)])

In [29]:
input_token_index

{' ': 0,
 '!': 1,
 '$': 2,
 '%': 3,
 '&': 4,
 "'": 5,
 ',': 6,
 '-': 7,
 '.': 8,
 '0': 9,
 '1': 10,
 '2': 11,
 '3': 12,
 '4': 13,
 '5': 14,
 '6': 15,
 '7': 16,
 '8': 17,
 '9': 18,
 ':': 19,
 '?': 20,
 'A': 21,
 'B': 22,
 'C': 23,
 'D': 24,
 'E': 25,
 'F': 26,
 'G': 27,
 'H': 28,
 'I': 29,
 'J': 30,
 'K': 31,
 'L': 32,
 'M': 33,
 'N': 34,
 'O': 35,
 'P': 36,
 'Q': 37,
 'R': 38,
 'S': 39,
 'T': 40,
 'U': 41,
 'V': 42,
 'W': 43,
 'Y': 44,
 'a': 45,
 'b': 46,
 'c': 47,
 'd': 48,
 'e': 49,
 'f': 50,
 'g': 51,
 'h': 52,
 'i': 53,
 'j': 54,
 'k': 55,
 'l': 56,
 'm': 57,
 'n': 58,
 'o': 59,
 'p': 60,
 'q': 61,
 'r': 62,
 's': 63,
 't': 64,
 'u': 65,
 'v': 66,
 'w': 67,
 'x': 68,
 'y': 69,
 'z': 70}

In [30]:
target_token_index = dict(
    [(char, i) for i, char in enumerate(target_characters)])

In [31]:
target_token_index

{'\t': 0,
 '\n': 1,
 ' ': 2,
 '!': 3,
 '$': 4,
 '%': 5,
 '&': 6,
 "'": 7,
 '(': 8,
 ')': 9,
 ',': 10,
 '-': 11,
 '.': 12,
 '0': 13,
 '1': 14,
 '3': 15,
 '5': 16,
 '6': 17,
 '8': 18,
 '9': 19,
 ':': 20,
 '?': 21,
 'A': 22,
 'B': 23,
 'C': 24,
 'D': 25,
 'E': 26,
 'F': 27,
 'G': 28,
 'H': 29,
 'I': 30,
 'J': 31,
 'K': 32,
 'L': 33,
 'M': 34,
 'N': 35,
 'O': 36,
 'P': 37,
 'Q': 38,
 'R': 39,
 'S': 40,
 'T': 41,
 'U': 42,
 'V': 43,
 'Y': 44,
 'a': 45,
 'b': 46,
 'c': 47,
 'd': 48,
 'e': 49,
 'f': 50,
 'g': 51,
 'h': 52,
 'i': 53,
 'j': 54,
 'k': 55,
 'l': 56,
 'm': 57,
 'n': 58,
 'o': 59,
 'p': 60,
 'q': 61,
 'r': 62,
 's': 63,
 't': 64,
 'u': 65,
 'v': 66,
 'w': 67,
 'x': 68,
 'y': 69,
 'z': 70,
 '\xa0': 71,
 '«': 72,
 '»': 73,
 'À': 74,
 'Ç': 75,
 'É': 76,
 'Ê': 77,
 'à': 78,
 'â': 79,
 'ç': 80,
 'è': 81,
 'é': 82,
 'ê': 83,
 'ë': 84,
 'î': 85,
 'ï': 86,
 'ô': 87,
 'ù': 88,
 'û': 89,
 'œ': 90,
 '\u2009': 91,
 '’': 92,
 '\u202f': 93}

In [32]:
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')

In [33]:
encoder_input_data.shape

(10000, 16, 71)

In [34]:
encoder_input_data

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [35]:
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

In [36]:
decoder_input_data.shape

(10000, 59, 94)

In [37]:
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

In [38]:
decoder_target_data.shape

(10000, 59, 94)

In [39]:
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
            
# print("Sample values of i, input_text, target_text : ",i,input_text, target_text)
# print("Sample values of t, char, input_token_index[char] : ", t,char,input_token_index[char] )

In [40]:
print(encoder_input_data)

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [41]:
print(decoder_input_data)

[[[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [42]:
print(decoder_target_data)

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


## Define the encoder
* Define the input layer. The input to the encoder is a sequence of characters, each encoded as one-hot vectors with length of num_encoder_tokens.
* Define an LSTM layer with the return_state argument set to True. This returns the hidden state output returned by LSTM layers generally, as well as the hidden and cell state for all cells in the layer. These are used when defining the decoder.
* Discard `encoder_outputs` and only keep the states.

In [43]:
encoder_inputs = Input(shape=(None, num_encoder_tokens))

In [44]:
encoder = LSTM(latent_dimension, return_state=True)

In [45]:
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

In [46]:
encoder_states = [state_h, state_c]

## Define the decoder
* The decoder input is defined as a sequence of French character one-hot encoded to binary vectors with a length of num_decoder_tokens.
* The LSTM layer is defined to both return sequences and state. The final hidden and cell states are ignored and only the output sequence of hidden states is referenced.
* The final hidden and cell state from the encoder is used to initialize the state of the decoder. This means every time that the encoder model encodes an input sequence, the final internal states of the encoder model are used as the starting point for outputting the first character in the output sequence. This also means that the encoder and decoder LSTM layers must have the same number of cells, in this case, 256.
* A Dense output layer is used to predict each character. This Dense is used to produce each character in the output sequence in a one-shot manner, rather than recursively, at least during training. This is because the entire target sequence required for input to the model is known during training.

In [47]:
decoder_inputs = Input(shape=(None, num_decoder_tokens))

In [48]:
decoder_lstm = LSTM(latent_dimension, return_sequences=True, return_state=True)

In [49]:
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)

In [50]:
decoder_dense = Dense(num_decoder_tokens, activation='softmax')

In [51]:
decoder_outputs = decoder_dense(decoder_outputs)

## Define the model
* Define the model with inputs for the encoder and the decoder and the output target sequence.
* Compile the model with rmsprop as optimizer which is the usual choice for recurrent neural networks and categorical_crossentropy as the loss funciton as the targets are categorical.
* Define a checkpoint. The model checkpoints will be saved with the epoch number and the validation loss in the filename.
* start tensorboard from the terminal using the command 'tensorboard --logdir=/tmp/autoencoder'
* Fit the model.Set the validation_split argument in model.fit to 0.25, then the validation data used will be the last  25% of the data.
* Pass in the checkpoint callback and the tensorboard callback to the callbacks argument.

In [52]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [53]:
plot_model(model, to_file='model.png', show_shapes=True)

In [54]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [55]:
filepath="saved_models/weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

In [56]:
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2,
          callbacks=[TensorBoard(log_dir='/tmp/autoencoder'),checkpoint])

Train on 8000 samples, validate on 2000 samples
Epoch 1/100

Epoch 00001: loss improved from inf to 0.92424, saving model to saved_models/weights.best.hdf5
Epoch 2/100


  str(node.arguments) + '. They will not be included '



Epoch 00002: loss improved from 0.92424 to 0.73313, saving model to saved_models/weights.best.hdf5
Epoch 3/100

Epoch 00003: loss improved from 0.73313 to 0.61879, saving model to saved_models/weights.best.hdf5
Epoch 4/100

Epoch 00004: loss improved from 0.61879 to 0.56188, saving model to saved_models/weights.best.hdf5
Epoch 5/100

Epoch 00005: loss improved from 0.56188 to 0.52227, saving model to saved_models/weights.best.hdf5
Epoch 6/100

Epoch 00006: loss improved from 0.52227 to 0.48879, saving model to saved_models/weights.best.hdf5
Epoch 7/100

Epoch 00007: loss improved from 0.48879 to 0.46229, saving model to saved_models/weights.best.hdf5
Epoch 8/100

Epoch 00008: loss improved from 0.46229 to 0.43954, saving model to saved_models/weights.best.hdf5
Epoch 9/100

Epoch 00009: loss improved from 0.43954 to 0.41947, saving model to saved_models/weights.best.hdf5
Epoch 10/100

Epoch 00010: loss improved from 0.41947 to 0.40179, saving model to saved_models/weights.best.hdf5
Epo


Epoch 00042: loss improved from 0.15469 to 0.15162, saving model to saved_models/weights.best.hdf5
Epoch 43/100

Epoch 00043: loss improved from 0.15162 to 0.14810, saving model to saved_models/weights.best.hdf5
Epoch 44/100

Epoch 00044: loss improved from 0.14810 to 0.14472, saving model to saved_models/weights.best.hdf5
Epoch 45/100

Epoch 00045: loss improved from 0.14472 to 0.14132, saving model to saved_models/weights.best.hdf5
Epoch 46/100

Epoch 00046: loss improved from 0.14132 to 0.13832, saving model to saved_models/weights.best.hdf5
Epoch 47/100

Epoch 00047: loss improved from 0.13832 to 0.13525, saving model to saved_models/weights.best.hdf5
Epoch 48/100

Epoch 00048: loss improved from 0.13525 to 0.13280, saving model to saved_models/weights.best.hdf5
Epoch 49/100

Epoch 00049: loss improved from 0.13280 to 0.12972, saving model to saved_models/weights.best.hdf5
Epoch 50/100

Epoch 00050: loss improved from 0.12972 to 0.12702, saving model to saved_models/weights.best.h


Epoch 00082: loss improved from 0.07547 to 0.07448, saving model to saved_models/weights.best.hdf5
Epoch 83/100

Epoch 00083: loss improved from 0.07448 to 0.07374, saving model to saved_models/weights.best.hdf5
Epoch 84/100

Epoch 00084: loss improved from 0.07374 to 0.07263, saving model to saved_models/weights.best.hdf5
Epoch 85/100

Epoch 00085: loss improved from 0.07263 to 0.07164, saving model to saved_models/weights.best.hdf5
Epoch 86/100

Epoch 00086: loss improved from 0.07164 to 0.07044, saving model to saved_models/weights.best.hdf5
Epoch 87/100

Epoch 00087: loss improved from 0.07044 to 0.06952, saving model to saved_models/weights.best.hdf5
Epoch 88/100

Epoch 00088: loss improved from 0.06952 to 0.06848, saving model to saved_models/weights.best.hdf5
Epoch 89/100

Epoch 00089: loss improved from 0.06848 to 0.06775, saving model to saved_models/weights.best.hdf5
Epoch 90/100

Epoch 00090: loss improved from 0.06775 to 0.06641, saving model to saved_models/weights.best.h

<keras.callbacks.History at 0x1c3d6b1f28>

## Define the encoder model for sampling
* Because the training process and inference process (decoding sentences) are quite different, we use different models for both, albeit they all leverage the same inner layers.
* The encoder model is defined as taking the input layer from the encoder in the trained model (encoder_inputs) and outputting the hidden and cell state tensors (encoder_states).

In [57]:
encoder_model = Model(encoder_inputs, encoder_states)

In [58]:
plot_model(encoder_model, to_file='encoder_model.png', show_shapes=True)

## Define the decoder model for sampling
* The decoder requires the hidden and cell states from the encoder as the initial state of the newly defined encoder model. 
* Because the decoder is a separate standalone model, these states will be provided as input to the model, and therefore must first be defined as inputs.
* They can then be specified for use as the initial state of the decoder LSTM layer.
* Both the encoder and decoder will be called recursively for each character that is to be generated in the translated sequence.
* On the first call, the hidden and cell states from the encoder will be used to initialize the decoder LSTM layer, provided as input to the model directly.
* On subsequent recursive calls to the decoder, the last hidden and cell state must be provided to the model. 
* Therefore, the decoder must output the hidden and cell states along with the predicted character on each call, so that these states can be assigned to a variable and used on each subsequent recursive call for a given input sequence of English text to be translated.

In [59]:
decoder_state_input_h = Input(shape=(latent_dimension,))

In [60]:
decoder_state_input_c = Input(shape=(latent_dimension,))

In [61]:
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

In [62]:
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)

In [63]:
decoder_states = [state_h, state_c]

In [64]:
decoder_outputs = decoder_dense(decoder_outputs)

In [65]:
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

In [66]:
plot_model(decoder_model, to_file='decoder_model.png', show_shapes=True)

## Sampling
* Reverse-lookup token index to decode sequences back to something readable.
* Define method to test the inference model:
    * Encode the input as state vectors and retrieve initial decoder state.
    * Generate empty target sequence of length 1.
    * Populate the first character of target sequence with the start character(tab).
    * Set stop condition to false and initalise an empty string for the output.
    * Feed the state vectors and 1-char target sequence to the decoder to produce predictions for the next character.
    * Sample the next character using these predictions (we simply use argmax).
    * Append the sampled character to the target sequence
    * Repeat until we generate the end-of-sequence character or we hit the character limit.
* Pass a subset of training set to this method for trying out decoding.

In [67]:
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())

In [68]:
reverse_input_char_index

{0: ' ',
 1: '!',
 2: '$',
 3: '%',
 4: '&',
 5: "'",
 6: ',',
 7: '-',
 8: '.',
 9: '0',
 10: '1',
 11: '2',
 12: '3',
 13: '4',
 14: '5',
 15: '6',
 16: '7',
 17: '8',
 18: '9',
 19: ':',
 20: '?',
 21: 'A',
 22: 'B',
 23: 'C',
 24: 'D',
 25: 'E',
 26: 'F',
 27: 'G',
 28: 'H',
 29: 'I',
 30: 'J',
 31: 'K',
 32: 'L',
 33: 'M',
 34: 'N',
 35: 'O',
 36: 'P',
 37: 'Q',
 38: 'R',
 39: 'S',
 40: 'T',
 41: 'U',
 42: 'V',
 43: 'W',
 44: 'Y',
 45: 'a',
 46: 'b',
 47: 'c',
 48: 'd',
 49: 'e',
 50: 'f',
 51: 'g',
 52: 'h',
 53: 'i',
 54: 'j',
 55: 'k',
 56: 'l',
 57: 'm',
 58: 'n',
 59: 'o',
 60: 'p',
 61: 'q',
 62: 'r',
 63: 's',
 64: 't',
 65: 'u',
 66: 'v',
 67: 'w',
 68: 'x',
 69: 'y',
 70: 'z'}

In [69]:
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

In [None]:
reverse_target_char_index

In [70]:
def decode_sequence(input_seq):

    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, num_decoder_tokens))

    target_seq[0, 0, target_token_index['\t']] = 1

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)
        
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char


        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True


        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        states_value = [h, c]

    return decoded_sentence

In [71]:
for seq_index in range(100):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    
    decoded_sentence = decode_sequence(input_seq)
    
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: Go.
Decoded sentence: Va !

-
Input sentence: Run!
Decoded sentence: Cours !

-
Input sentence: Run!
Decoded sentence: Cours !

-
Input sentence: Wow!
Decoded sentence: Ça arrez pour qui con lif.

-
Input sentence: Fire!
Decoded sentence: Au feu !

-
Input sentence: Help!
Decoded sentence: À l'aide !

-
Input sentence: Jump.
Decoded sentence: Saute.

-
Input sentence: Stop!
Decoded sentence: Arrête-toi !

-
Input sentence: Stop!
Decoded sentence: Arrête-toi !

-
Input sentence: Stop!
Decoded sentence: Arrête-toi !

-
Input sentence: Wait!
Decoded sentence: Attendez !

-
Input sentence: Wait!
Decoded sentence: Attendez !

-
Input sentence: Go on.
Decoded sentence: Poursuivez.

-
Input sentence: Go on.
Decoded sentence: Poursuivez.

-
Input sentence: Go on.
Decoded sentence: Poursuivez.

-
Input sentence: I see.
Decoded sentence: Je comprends.

-
Input sentence: I try.
Decoded sentence: J'essaye.

-
Input sentence: I won!
Decoded sentence: Je n'ais pas la mais en avance