https://keras.io/examples/nlp/lstm_seq2seq/

In [4]:
import tensorflow as tf
import numpy as np

In [5]:
tf.__version__

'2.0.0'

In [3]:
with open('fra-eng/fra.txt', 'r') as f:
    text_data = f.read().split('\n')
    
text_data = text_data[:1000]

In [4]:
english_sentences = []
french_sentences = []
english_characters = set()
french_characters = set()

for idx, val in enumerate(text_data):
    eng_data, fre_data, _ = val.split('\t')
    english_sentences.append(eng_data)
    for char in eng_data:
        if char not in english_characters:
            english_characters.add(char)
    
    fre_data = '\t' + fre_data + '\n'
    french_sentences.append(fre_data)
    for char in fre_data:
        if char not in french_characters:
            french_characters.add(char)

In [5]:
encoder_char_len = len(english_characters)
decoder_char_len = len(french_characters)

encoder_chars = sorted(english_characters)
decoder_chars = sorted(french_characters)

max_encoder_sequence_len = max([len(seq) for seq in english_sentences])
max_decoder_sequence_len = max([len(seq) for seq in french_sentences])

In [6]:
input_encoder_data = np.zeros((len(english_sentences), max_encoder_sequence_len, encoder_char_len), dtype='float32')
input_decoder_data = np.zeros((len(french_sentences), max_decoder_sequence_len, decoder_char_len), dtype='float32')
target_decoder_data = np.zeros((len(french_sentences), max_decoder_sequence_len, decoder_char_len), dtype='float32')

In [7]:
encoder_input_index = dict([(char, idx) for idx, char in enumerate(encoder_chars)])
decoder_input_index = dict([(char, idx) for idx, char in enumerate(decoder_chars)])

In [8]:
assert len(english_sentences) == len(french_sentences)

In [9]:
sentence_break_space = ' '

In [10]:
for row, (encoder_data, decoder_data) in enumerate(zip(english_sentences, french_sentences)):
    
    for col, char in enumerate(encoder_data):
        input_encoder_data[row, col, encoder_input_index[char]] = 1.0
    input_encoder_data[row, col+1:, encoder_input_index[sentence_break_space]] = 0
    
    for col, char in enumerate(decoder_data):
        input_decoder_data[row, col, decoder_input_index[char]] = 1.0
        
        if col > 0:
            target_decoder_data[row, col-1, decoder_input_index[char]] = 1.0
            
    input_decoder_data[row, col+1:, decoder_input_index[sentence_break_space]] = 1.0
    target_decoder_data[row, col:, decoder_input_index[sentence_break_space]] = 1.0

In [11]:
encoder_inputs = tf.keras.Input(shape=(None, encoder_char_len))
encoder_lstm = tf.keras.layers.LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = tf.keras.Input(shape=(None, decoder_char_len))
decoder_lstm = tf.keras.layers.LSTM(256, return_state=True, return_sequences=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state= encoder_states)

decoder_dense = tf.keras.layers.Dense(decoder_char_len, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, 58)]   0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None, 74)]   0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     [(None, 256), (None, 322560      input_1[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, None, 256),  338944      input_2[0][0]                    
                                                                 lstm[0][1]                   

In [None]:
model.compile(optimizer='rmsprop', metrics=['accuracy'], loss='categorical_crossentropy')

model.fit([input_encoder_data, input_decoder_data], target_decoder_data, batch_size=64, epochs=50, validation_split=0.3)

model.save('seq2seq_lstm')

Train on 700 samples, validate on 300 samples
Epoch 1/50
