In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense,Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

Step 2 : DataSet Defination

This is the dataset where each tuples consist if a simple English phrase and its french translation.This is a small toy dataset fro the purpose of demonstration

In [None]:
data = [
    ("hello", "bonjour"),
    ("how are you", "comment ça va"),
    ("thank you", "merci"),
    ("good morning", "bonjour"),
    ("good night", "bonne nuit"),
    ("see you later", "à plus tard"),
    ("I love you", "je t'aime"),
]

Step 3: Text Preparation

zip(*data):Separates the data tuples into two seperate lists:one for input_text(English and onefor target_text(French)

In [None]:
input_texts,target_texts = zip(*data)

**Step 4 : Tokenization**

**Tokenization():** Creates a tokenizer that will convert text into sequences of integer.

**fit_on_texts():** This method creates a vocabulary from the input_texts and target_texts and assigns a unique integer to each word.

In [None]:
input_tokenizer = Tokenizer()
target_tokenizer = Tokenizer()

input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

**texts_to_sequences():** Converts each text(sentences) into a sequence of integers.Each word in the text is replaced by its corresponding integer from the vocabulary

In [None]:
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

**Step 5 : Vocabulary and Sequence Length Calculations**

**word_index :**
This dictionary holds the integer mappings for each word.We add 1 to account for the 0th based indexing of sequences.input_vocab_size and

**target_vocab_size:**
Stores the size of the vocabulary for the input and target languages

In [None]:
input_vocab_size = len(input_tokenizer.word_index) + 1
target_vocab_size = len(target_tokenizer.word_index) + 1

**max_input_len and max_target_len :**
Store the maximum length of sequences in the input and target languages,respectively.This helps with padding the sequences to a uniform length

In [None]:
max_input_len = max(len(seq) for seq in input_sequences)
max_target_len = max(len(seq) for seq in target_sequences)

**Step 6 : Padding Sequences pad_sequences() :**
Pads each sequence to ensure that all sequences have the same length.Padding is applied to the end of the sequences(padding="post")

In [None]:
encoder_input_data = pad_sequences(input_sequences,maxlen=max_input_len,padding="post")
decoder_input_data = pad_sequences(target_sequences,maxlen=max_target_len,padding="post")

Step 7: One-Hot Encoding Target Sequences

np.zeros(): Creates a zero matrix where each row corresponds to a sentence and each column corresponds to a time step in the sequence. The depth corresponds to the size of the vocabulary (for one hot encoding)
for loop: Loops over the target sequences and creates one-hot encoded vectors where only the index corresponding to the word is 1. The shift by one ensures that the target data starts predicting from the second word

In [None]:
decoder_target_data = np.zeros((len(target_sequences),max_target_len,target_vocab_size),dtype="float32")
for i, seq in enumerate(target_sequences):
    for t,word in enumerate(seq):
        if t>0:                    #Target sequence shifted by 1
            decoder_target_data[i,t-1,word] = 1.0

Step 8: Splitting the data

train_test_split(): Splits the input data (encoder and decoder inputs) and target data into training and testing sets.

test_size=0.2 means 20% of data is used for testing and 80% for training.

In [None]:
X_train, X_test, y_train, y_test, decoder_input_train, decoder_input_test = train_test_split(encoder_input_data, decoder_target_data, decoder_input_data, test_size=0.2)

Step 9: Model Architecture

In [None]:
#embedding_dim = 128 Or any other value you'd like, typically 50, 100, or 300
#Define hyperparameters
latent_dim = 128          #No. of units in LSTM
embedding_dim = 128       #Size of word embeddings

Input(shape=(max_input_len)): Defines the input shape for the encoder (input sentence length).
Embedding(): Maps the input word indices to dense vectors of size embedding_dim.
LSTM(): The LSTM layer processes the input embeddings and returns two things the final hidden state (state_h) and cell state (state_c). These states will be passed to the decoder.

In [None]:
encoder_inputs = Input(shape=(max_input_len,))
encoder_embedding = Embedding(input_vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

Similar to the encoder, the decoder also has an embedding layer followed by an LSTM. The LSTM receives the encoder's final states (state_h, state_c) as initial states for the decoding process.

return_sequences = True ensures that the decoder produces a sequence of outputs rather than just the last output.

In [None]:
decoder_inputs = Input(shape=(max_target_len,))
decoder_embedding = Embedding(target_vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])

**Dense Layer**

Dense(): A fully connected layer that outputs a probability distribution over the target vocabulary (for each word in the sequence).

softmax: Ensures the output is a probability distribution.

In [None]:
decoder_dense = Dense(target_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

Step 10: Defining the Model

In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the Model
model.fit([X_train, decoder_input_train], y_train, batch_size=32, epochs=100, validation_data=([X_test, decoder_input_test], y_test))

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step - accuracy: 0.0667 - loss: 1.0267 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.2667 - loss: 1.0209 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.2667 - loss: 1.0151 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.3333 - loss: 1.0092 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.3333 - loss: 1.0031 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.3333 - loss: 0.9967 - val_accuracy: 0.0000e+00 - val_loss: 0.00

<keras.src.callbacks.history.History at 0x7f8865ede860>

In [None]:
# Purpose of Inference Models

#After the model has been trained, we need to define the inference process to actually generate translations.
#In the training process, both the encoder and decoder receive complete sequences. However, during inference (prediction), we only have the input sentence,
#and the decoder must generate the output word by word, one step at a time.
#Thus, we create two separate models for Inference:
#Encoder model: Converts the input sentence into internal states (hidden and cell states)
#that are passed to the decoder.
#Decoder model: Takes the encoder's internal states and generates the output sequence word by word
#Define Inference models for translation

#Encoder model
encoder_model = Model(encoder_inputs, [state_h, state_c])

#Purpose: The encoder processes the input sequence and outputs its final internal states
#(hidden state state_h and cell state state_c).
#These states will be passed to the decoder during inference.
#encoder_inputs: The input sequence for the encoder (which is padded).
#[state_h, state_c]: The encoder's final states that the decoder will use to start
#generating the output sequence.

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))

#decoder_state_input_h and decoder_state_input_c: Inputs to the decoder.
#These are the hidden state (state_h) and cell state (state_c)
#that were produced by the encoder.
#In Inference, we don't have these states at the beginning.
#so they are taken as Inputs for the decoder.

decoder_lstm_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_outputs = decoder_dense(decoder_lstm_outputs)
decoder_model = Model([decoder_inputs, decoder_state_input_h, decoder_state_input_c], [decoder_outputs, decoder_state_h, decoder_state_c])

#The decoder LSTH takes in the current word (embedded using the decoder embedding layer)
#along with the hidden and cell states (decoder_state_input_h_and_decoder_state_input_c)
#as initial states.
#decoder_lstm_outputs: The LSTM output for the current time step
#(which represents the probabilities for each word in the vocabulary).
#decoder_state_h, decoder_state_c: The updated hidden and cell states after
#processing the currert word.These states will be passed back into the LSTM for
#the next time step.

#Function to decode a sequence using the trained model
#The fuction takes an Input sequence (from a source language, for example)
#and uses an encoder-decoder model to generate a translated sequence ( target language).
#It perfores this in an iterative manner, predicting one word at a time,
#until it either predicts the end-of-sequence talken or reaches a specified maximum length.

def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    #input_seq: This is the sequence that you want to translate.
    #The encoder nodel processes the Input sequence and returns the states_value
    #(Hidden and cell states) that represent the context learned from the input sequence.
    #These states are used as the initial state for the decoder.

    target_seq = np.zeros((1, 1))

    #target seq: This starts as an array of zeros because at the beginning,
    #there is no input to the decoder. As the decoder predicts words,
    #this array will hold the index of the word generated at the previous step.
    #decoded_sentence: An empty string that will hold the generated translation.

    stop_condition = False
    decoded_sentence = ""
    #decode_sentence:  An empty string that will hold the generated translation.
    #stop_condition:A flag to indicate when the decoding process should stop.


    while not stop_condition:
    #The loop continues until the translation is complete
    #i.e., when the decoder generates an end token or exceeds the allowed length).
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        #decoder model uses the current target sequence (target seq)
        #and the encoder's final states (states_value) to predict the next word.
        #output tokens: The predicted probabilities of the next word.
        #h, c: The updated hidden and cell states. These states are passed to the next iteration to ensure
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        #sampled_token_index: The index of the predicted word.
        sampled_char = target_tokenizer.index_word[sampled_token_index]
        #sampled_char: The word corresponding to the predicted index.
        #decoded_sentence += ' ' + sampled
        #This loop continue untill the translation
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        #decoder model uses the current target sequence (target_seq)
        #and the encoder's final states (states value) to predict the next word.
        #output tokens: The predicted probabilities of the next word.
        #h, c: The updated hidden and cell states. These states are passed to the next iteration to ensure

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = target_tokenizer.index_word.get(sampled_token_index, "")

        #output_tokens [0, 1,:]:
        #The output_tokens array contains the predicted probabilities for each possible word
        #in the vocabulary.
        #The shape of output_tokens is typically (batch_size, sequence_length, vocabulary_size).
        #In this case, batch_size is 1 because we are decoding one sentence.
        #sequence_length is 1 because at each time step, only one word is generated.
        #vocabulary_size is the number of possible words in the target vocabulary.
        #output tokens [0, -1,:] selects the predicted probabilities of words at the current time step
        #from the vocabulary.
        #illustration: Suppose the vocabulary has 5 words: {0: "hello", 1: 'world', 2: "how", 3: 'are',4:'you}
        #The output tokens might look something like this:
        #output tokens[0, -1,:]= (0.1, 0.6, 0.05, 0.15, 0.1)
        #sampled_token_index=np.argmax(output_tokens[0,-1:]):


        #np.argmax() finds the index of the highest probability from the output tokens array.
        #In this case, it will select the index 1 because the highest probability (0.6)
        #corresponds to the word "world".
        #Now, using the sampled token_index 1:
        #sampled word target tokenizer.index_word.get(1, "")
        #sampled word "world"
        #Putting it all together:
        #After running np.argmax(), the most likely word's Index (1 in this case) is selected.
        #This index is then used to retrieve the corresponding word ('world' in this case)
        #from the tokenizer's dictionary.


        decoded_sentence += sampled_word + " "
        # The Predicted word is appended to the decoded_sentence string.
        if sampled_word == "<end>" or len(decoded_sentence) > max_target_len:
            stop_condition = True
            # The decoding process stops when the send> token is predicted,
            #or if the sentence exceeds the maximum allowed length (max_target_len).
            #Update the target sequence for the next iteration
        target_seq = np.zeros((1, 1))
        # This line creates a 2D NumPy array filled with zeros, with the shape (1, 1).
        # In the context of sequence-to-sequence models (such as machine translation),
        # this is used to hold the token (word Index) that will be fed as input into the decoder
        target_seq[0, 0] = sampled_token_index
        # target seq[0, 0] sampled_token_index:
        #This line assigns the value of sampled token index (which is the index of the word predicted
        #by the decoder in the previous step) to the target seq. The value is placed at position [0, 0] because it's a 1x1 array, and [0, 0]
        #refers to the only element in that array.
        # sampled_token Index 1 (from the previous word prediction step).
        #After this assignment, the target seq will look like this:
        #target seq[0, 0] = 1
        #Result: target seq [[1.]]
        #Purpose:
        #The target seq is used as the input for the decoder at the next tine step.
        #At each decoding step, the decoder needs to be fed the token (or word) predicted
        #in the previous time step. So, this array is updated with the index of the last
        #predicted word (sampled_token_index) and then passed to the decoder for the next prediction.

        states_value = [h, c]
        #The updated hidden and cell states (h and c) are passed back into the decoder
        #to maintain the flow of information across time steps.
    return decoded_sentence

    # translate(sentence): This function translates a given sentence.
    # input_tokenizer.texts_to_sequences([sentence]): Converts the input sentence into a sequence of tokens.
    # pad_sequences(): Pads the input sequence to the maximum length (since the model expects inputs to be of uniform length).
    # decode_sequence(): Calls the decoding function to generate the translation for the given input sequence.

    def translate(sentence):
      sequence = input_tokenizer.texts_to_sequences([sentence])
      # sentence: This is the input sentence you want to translate (from the source language).
      # input_tokenizer.texts_to_sequences([sentence]):
      # input_tokenizer is a tokenizer that has already been trained on the source language.
      # It contains a vocabulary mapping words to numerical indices (tokens).
      # texts_to_sequences converts the sentence (a list of words) into a list of numerical indices
      # representing the words in the sentence.
      # For example, if the input sentence is "hello world" and the tokenizer maps 'hello' to 1
      # and 'world' to 2, the resulting sequence will be [1, 2].
      sequence = pad_sequences(sequence, maxlen=max_input_len, padding='post')
      # pad_sequences():
      # This function ensures that all sequences (inputs) are of the same length.
      # Since neural networks often require fixed-length input, the input sequence is either
      # truncated (if too long) or padded with zeros (if too short) to match the required length.
      # maxlen=max_input_len: The maximum length that the input sequence should be.
      # This is a predefined length based on how the model was trained.
      # padding="post": If padding is needed, zeros will be added to the end (or "post") of the sequence.

      # Result: sequence = [[1, 2, 0, 0, 0]] (example)
      translation = decode_sequence(sequence)
      # This function is the core of the translation process. It takes the processed input sequence (now padded).
      # Inside the decode_sequence function, the model predicts one word at a time
      # (as explained earlier) until it reaches an end token (<end>) or a maximum sentence length.

      return translation


#Example Usage
translated_sentence = translate("hello")
print("Translated sentence:", translated_sentence)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Translated sentence: nuit 


In [None]:
# Purpose of Inference models
# After the model has been trained, we need to define the inference process to actually generate translation
# In this training process, both the encoder and decoder receive complete sequences.
# However, during inference (prediction), we only have the input sentence, and the decoder must generate the output word by word, one step at a time.
# Thus, we create two seperate models for inference:

# Encoder model: Converts the input sentence into internal states (hidden and cell states)
# that are passed to the decoder.
# Decoder model: Takes the encoder's internal states and generates the output sequence word by word.

# Define inference models for translation

# Encoder model
encoder_model = Model(encoder_inputs, [state_h, state_c])

# Purpose: The encoder processes the input sequence and outputs its final internal states
# (hidden state state_h and cell state state_c).
# These states will be passed to the decoder during inference.
# encoder_inputs: The input sequence for the encoder (which is padded).
# [state_h, state_c]: The encoder's final states that the decoder will use to start
# generating the output sequence.

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
# decoder_state_input_h and decoder_state_input_c: Inputs to the decoder.
# These are the hidden state (state_h) and cell state (state_c)
# that were produced by the encoder.
# In inference, we don't have these states at the beginning,
# so they are taken as inputs for the decoder.

decoder_lstm_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
    decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c]
)
decoder_outputs = decoder_dense(decoder_lstm_outputs)
decoder_model = Model(
    [decoder_inputs, decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs, decoder_state_h, decoder_state_c],

)
# The decoder LSTM takes in the current word(embedded using the decoder_embedding_layer)
# along with the hidden and cell states (decoder_state_input_h and decoder_state_input_c)
# as initial states.
# decoder_lstm_outputs: The LSTM output for the current for the time stop
# (which representa the probabilities for each word in the vocabulary).
# decoder_state_h, decoder_state_c: The updated and cell states after preprocessing the current word.
# These states will be passed back into the LSTM for the next step.
# decoder_outputs: The output probabilities for each word in the vocabulary.




# Function to decode a sequence using the trained model
# The function takes an input sequence (from a source language, for example)
# and uses an encoder-decoder model to generate a translated sequence (target language).
# It performs this in an iterative manner, predicting one word at a time,
# until it either predicts the end-of-sequence token or reaches a specified maximum length.
def decode_sequence(input_seq):
     # Get the states from the encoder model
    states_value = encoder_model.predict(input_seq)

# input_seq: This is the sequence that you want to translate.
# The encoder_model processes the input sequence and returns the states_value
# (hidden and cell states) that represent the context learned from the input sequence.
# These states are used as the initial state for the decoder

    target_seq = np.zeros((1,1))
# target_seq: This starts as an array of zeros because at the beginning.
# there is no input to the decoder. As the decoder predicts words,
# This array will hold the index of the word generated at the previous step.



    stop_condition = False
    decoded_sentence = ""

# decoded_sentence: An empty string will hold the generated translation.
# stop_condition: A flag to indicate when the decoding process should stop.
    while not stop_condition:
# The loop continues until the translation is complete
# (i.e., when the decoder generates an end token or exceeds the allowed length).
      output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
# decoder_model uses the current target sequence (target_seq)
# and the encoder's final states (states_value) to predict the next word.
# output_tokens: The predicted probabilities of the next word.
# h, c: The updated hidden and cell states. These states are passed to the next iteration to ensure continuity.

      sampled_token_index = np.argmax(output_tokens[0, -1, :])
      sampled_word = target_tokenizer.index_word.get(sampled_token_index, "")
# output_tokens[0, -1, :]

#The output_tokens array contains the predicted probabilities for each possible word in the vocal
#The shape of output_tokens is typically (batch_size, sequence_length, vocabulary_size).
#In this case, batch_size is 1 because we are decoding one sentence.


#sequence_length is 1 because at each time step, only one word is generated.
#vocabulary_size is the number of possible words in the target vocabulary.
#output tokens [0, 1] selects the predicted probabilities of words at the current time step from the vocabulary.
# Illustration: Suppose teh vocabulary has 5 words:  (0: 'hello', 1: 'world', 2: 'how', 3: 'are', 4: 'you')
#The output_tokens might look something like this:
#output_tokens [0, -1, :] = [0.1, 0.6, 0.05, 0.15, 0.1]
#sampled_token_index = np.argmax(output_tokens[0, -1, :])

#np.argmax() finds the index of the highest probability from the output tokens array.
#In this case, it will select the index 1 because the highest probability (0.6)
#corresponds to the word 'world'.
#Now, using the sampled_token_index = 1:
#sampled_word=target_tokenizer.index_word.get(1, "")
#sampled_word = "world"
#Putting it all together:
#After running np.argmax(), the most likely word's index (1 in this case) is selected.
#This index is then used to retrieve the corresponding word ('world' in this case)
#from the tokenizer's dictionary.
      decoded_sentence += sampled_word + " "
      # The predicted word is appended to the decoded_sentence stirng.

      if sampled_word == "<end>" or len(decoded_sentence) > max_target_len:
        stop_condition = True

  # The decoding process stops when the <end> token is predicted .
  # or if the sentence exceeds the maximum allowed length (max_target_len).
# Update the target sequence for the next iteration:
      target_seq = np.zeros((1,1))

  # This line creates a 2D numpy array filled with zeros, with the shape (1,1).
# In the context of sequence-to-sequence models (such as mahcine translation)
# this is used to hold the token(word index) that will be fed as input into the decoder
# at the time step
      target_seq[0,0] = sampled_token_index
# target_seq[0,0] = sampled_token_index:
# This line assign the value of sampled_token_index (which is the index of the word predicted
# by the decoder inthe previous step) to the target_seq
# The value is palced at position [0,0] because it's a 1*1 array, and [0,0]
# refers to the only element in the array.

#sampled_token_index= 1 (from the previous word prediction step).
#After this assignment, the target_seq will look like this:
#target_seq[0, 0] = 1
#Result: target_seq= [[1.]]
#Purpose:
#The target_seq is used as the input for the decoder at the next time step.
#At each decoding step, the decoder needs to be fed the token (or word) predicted
#in the previous time step. So, this array is updated with the index of the last
# predicted work (sampled_token_index) and then passed to the decoder for the next prediction.
      states_value = [h,c]
# The updated hidden and cell states (h and c ) are passed back into the decoder
# to maintain the flow of information across time steps:
    return decoded_sentence

# translate(sentence): This function translates a given sentence.

# input_tokenizer.texts_to_sequences([sentence]): Converts the input sentence into a sequence of tokens.
# pad_sequences(): Pads the input sequence to the maximum length (since the model expects inputs to be of uniform length).
# decode_sequence(): Calls the decoding function to generate the translation for the given input sequence.

# Translate a sentence
def translate(sentence):

    sequence = input_tokenizer.texts_to_sequences([sentence])

    # sentence: This is the input sentence you want to translate (from the source language).
    # input_tokenizer.texts_to_sequences([sentence]):
    # input_tokenizer is a tokenizer that has already been trained on the source language.
    # It contains a vocabulary mapping words to numerical indices (tokens).
    # texts_to_sequences converts the sentence (a list of words) into a list of numerical indices
    # representing the words in the sentence.
    # For example, if the input sentence is "hello world" and the tokenizer maps 'hello' to 1
    # and 'world' to 2, the resulting sequence will be [1, 2].

    sequence = pad_sequences(sequence, maxlen=max_input_len, padding="post")

    # pad_sequences():
    # This function ensures that all sequences (inputs) are of the same length.
    # Since neural networks often require fixed-length input, the input sequence is either
    # truncated (if too long) or padded with zeros (if too short) to match the required length.
    # maxlen=max_input_len: The maximum length that the input sequence should be.
    # This is a predefined length based on how the model was trained.
    # padding="post": If padding is needed, zeros will be added to the end (or "post") of the sequence.

    # Result: sequence = [[1, 2, 0, 0, 0]] (example)

    translation = decode_sequence(sequence)

    # This function is the core of the translation process. It takes the processed input sequence (now padded).
    # Inside the decode_sequence function, the model predicts one word at a time
    # (as explained earlier) until it reaches an end token (<end>) or a maximum sentence length.

    return translation

# Example usage:
translated_sentence = translate("hello world")
print("Translated sentence:", translated_sentence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
Translated sentence: nuit 
