In [2]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

In [7]:
# Sample data - English to French translation
english_sentences = ['hello', 'how are you', 'good morning', 'good night', 'thank you']
french_sentences = ['bonjour', 'comment ça va', 'bonjour', 'bonne nuit', 'merci']

In [8]:
# Add START and END tokens to the French sentences
french_sentences = ['starttoken ' + sentence + ' endtoken' 
                    for sentence in french_sentences]

# Hyperparameters
batch_size = 2
epochs = 100
latent_dim = 256  # Dimensionality of the encoding space

In [9]:
french_sentences

['starttoken bonjour endtoken',
 'starttoken comment ça va endtoken',
 'starttoken bonjour endtoken',
 'starttoken bonne nuit endtoken',
 'starttoken merci endtoken']

In [10]:
# Initialize the Tokenizer for both source and target languages
eng_tokenizer = Tokenizer(char_level=False)
fra_tokenizer = Tokenizer(char_level=False)

# Fit the Tokenizer on the sentences
eng_tokenizer.fit_on_texts(english_sentences)
fra_tokenizer.fit_on_texts(french_sentences)

In [12]:
eng_tokenizer.word_index

{'you': 1,
 'good': 2,
 'hello': 3,
 'how': 4,
 'are': 5,
 'morning': 6,
 'night': 7,
 'thank': 8}

In [13]:
fra_tokenizer.word_index

{'starttoken': 1,
 'endtoken': 2,
 'bonjour': 3,
 'comment': 4,
 'ça': 5,
 'va': 6,
 'bonne': 7,
 'nuit': 8,
 'merci': 9}

In [14]:
# Convert the sentences into sequences of integers
encoder_input_data = eng_tokenizer.texts_to_sequences(english_sentences)
decoder_input_data = fra_tokenizer.texts_to_sequences(french_sentences)

In [15]:
# Pad the sequences to ensure uniform length
max_encoder_seq_length = max([len(seq) for seq in encoder_input_data])
max_decoder_seq_length = max([len(seq) for seq in decoder_input_data])

In [16]:
max_encoder_seq_length

3

In [17]:
max_decoder_seq_length

5

In [22]:
encoder_input_data = pad_sequences(encoder_input_data, 
                                   maxlen=max_encoder_seq_length, padding='post')
decoder_input_data = pad_sequences(decoder_input_data, 
                                   maxlen=max_decoder_seq_length, padding='post')

In [23]:
encoder_input_data

array([[3, 0, 0],
       [4, 5, 1],
       [2, 6, 0],
       [2, 7, 0],
       [8, 1, 0]], dtype=int32)

In [24]:
decoder_input_data

array([[1, 3, 2, 0, 0],
       [1, 4, 5, 6, 2],
       [1, 3, 2, 0, 0],
       [1, 7, 8, 2, 0],
       [1, 9, 2, 0, 0]], dtype=int32)

In [25]:
# Prepare the target data (decoder output, shifted by one time step)
decoder_output_data = np.zeros_like(decoder_input_data)
decoder_output_data[:, 0:-1] = decoder_input_data[:, 1:]

In [26]:
decoder_output_data

array([[3, 2, 0, 0, 0],
       [4, 5, 6, 2, 0],
       [3, 2, 0, 0, 0],
       [7, 8, 2, 0, 0],
       [9, 2, 0, 0, 0]], dtype=int32)

In [27]:
decoder_input_data

array([[1, 3, 2, 0, 0],
       [1, 4, 5, 6, 2],
       [1, 3, 2, 0, 0],
       [1, 7, 8, 2, 0],
       [1, 9, 2, 0, 0]], dtype=int32)

In [28]:
# Define the vocabulary size (total unique tokens)
num_encoder_tokens = len(eng_tokenizer.word_index) + 1
num_decoder_tokens = len(fra_tokenizer.word_index) + 1

In [29]:
num_encoder_tokens

9

In [31]:
num_decoder_tokens

10

In [32]:
# Define the input sequence and output sequence for the Seq2Seq model
encoder_inputs = Input(shape=(None,))
decoder_inputs = Input(shape=(None,))

In [33]:
encoder_inputs

<KerasTensor shape=(None, None), dtype=float32, sparse=False, name=keras_tensor>

In [35]:
# Encoder
encoder_embedding = Embedding(input_dim=num_encoder_tokens, 
                              output_dim=latent_dim, 
                              input_length=max_encoder_seq_length)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

In [41]:
# Decoder
decoder_embedding = Embedding(input_dim=num_decoder_tokens, 
                              output_dim=latent_dim, 
                              input_length=max_decoder_seq_length)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, 
                                     initial_state=[state_h, state_c])


In [42]:
# Dense layer for generating predictions
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [43]:
# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [44]:
model.summary()

In [46]:
# Compile the model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [48]:
# Train the model
model.fit([encoder_input_data, decoder_input_data], 
          np.expand_dims(decoder_output_data, -1),
          batch_size=batch_size, epochs=epochs)

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.0000 - loss: 0.0010
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0000 - loss: 9.9400e-04
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 1.0000 - loss: 0.0011
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 1.0000 - loss: 8.3423e-04
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 1.0000 - loss: 0.0011
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 1.0000 - loss: 9.0958e-04
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 1.0000 - loss: 0.0011
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.0000 - loss: 8.4908e-04
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x72d025fb3bb0>

In [49]:
# Create inference models for the encoder and decoder for prediction
# Encoder model
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])

In [51]:
# Decoder model (for inference)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_lstm_inf = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_output_inf, state_h_inf, state_c_inf = decoder_lstm_inf(
    decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_output_inf = decoder_dense(decoder_output_inf)
decoder_model = Model([decoder_inputs, decoder_state_input_h, 
                       decoder_state_input_c],
                      [decoder_output_inf, state_h_inf, state_c_inf])

In [55]:
# Function to decode the sequence
def decode_sequence(input_seq):
    # Get the encoder states
    states_value = encoder_model.predict(input_seq)

    # Generate an initial target sequence (the start token)
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = fra_tokenizer.word_index['starttoken']  # start token index

    # Sample output tokens
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        # Correct the prediction input to match expected inputs
        output_tokens, h, c = decoder_model.predict(
            [target_seq, states_value[1], states_value[2]])

        # Sample the next token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = fra_tokenizer.index_word[sampled_token_index]

        decoded_sentence += ' ' + sampled_token

        # Stop if we hit the end token or reach max length
        if sampled_token == 'endtoken' or len(decoded_sentence) > max_decoder_seq_length:
            stop_condition = True

        # Update the target sequence
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update the states
        states_value = [h, c]

    return decoded_sentence

In [74]:
# Test the decoder with a sample sentence
input_seq = encoder_input_data[3:4]  # Sample input sentence

In [75]:
input_seq

array([[2, 7, 0]], dtype=int32)

In [76]:
encoder_input_data

array([[3, 0, 0],
       [4, 5, 1],
       [2, 6, 0],
       [2, 7, 0],
       [8, 1, 0]], dtype=int32)

In [77]:
decoded_sentence = decode_sequence(input_seq)
print("Decoded Sentence:", decoded_sentence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Decoded Sentence:  bonne


In [73]:
new = 'good night'