In [1]:
# ===============================
# IMPORT LIBRARIES
# ===============================
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# ===============================
# SAMPLE TRANSLATION DATASET
# ===============================
# English -> French (Toy Dataset)
input_texts = [
    "i am a student",
    "i love machine learning",
    "this is a book",
    "how are you",
    "i like python"
]

target_texts = [
    "je suis un etudiant",
    "j aime l apprentissage automatique",
    "ceci est un livre",
    "comment allez vous",
    "j aime python"
]

# Add start and end tokens
target_texts = ["<start> " + txt + " <end>" for txt in target_texts]

# ===============================
# TOKENIZATION
# ===============================
input_tokenizer = Tokenizer()
target_tokenizer = Tokenizer()

input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

# Padding sequences
max_input_len = max(len(seq) for seq in input_sequences)
max_target_len = max(len(seq) for seq in target_sequences)

encoder_input_data = pad_sequences(
    input_sequences, maxlen=max_input_len, padding="post"
)

decoder_input_data = pad_sequences(
    target_sequences, maxlen=max_target_len, padding="post"
)

# Decoder target data (shifted)
decoder_target_data = np.zeros_like(decoder_input_data)
decoder_target_data[:, :-1] = decoder_input_data[:, 1:]

# Expand dimensions for sparse categorical crossentropy
decoder_target_data = np.expand_dims(decoder_target_data, -1)

# ===============================
# MODEL PARAMETERS
# ===============================
input_vocab_size = len(input_tokenizer.word_index) + 1
target_vocab_size = len(target_tokenizer.word_index) + 1

embedding_dim = 64
latent_dim = 128

# ===============================
# ENCODER
# ===============================
encoder_inputs = Input(shape=(None,), name="encoder_inputs")
encoder_embedding = Embedding(
    input_vocab_size, embedding_dim, name="encoder_embedding"
)(encoder_inputs)

encoder_lstm = LSTM(
    latent_dim, return_state=True, name="encoder_lstm"
)

_, state_h, state_c = encoder_lstm(encoder_embedding)

encoder_states = [state_h, state_c]

# ===============================
# DECODER (NO ATTENTION)
# ===============================
decoder_inputs = Input(shape=(None,), name="decoder_inputs")
decoder_embedding = Embedding(
    target_vocab_size, embedding_dim, name="decoder_embedding"
)(decoder_inputs)

decoder_lstm = LSTM(
    latent_dim,
    return_sequences=True,
    return_state=True,
    name="decoder_lstm"
)

decoder_outputs, _, _ = decoder_lstm(
    decoder_embedding,
    initial_state=encoder_states
)

decoder_dense = Dense(
    target_vocab_size, activation="softmax", name="decoder_dense"
)

decoder_outputs = decoder_dense(decoder_outputs)

# ===============================
# BUILD & COMPILE MODEL
# ===============================
model = Model(
    [encoder_inputs, decoder_inputs],
    decoder_outputs
)

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# ===============================
# TRAIN MODEL
# ===============================
model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=2,
    epochs=300,
    verbose=1
)



Epoch 1/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - accuracy: 0.1161 - loss: 2.9431
Epoch 2/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3000 - loss: 2.9186
Epoch 3/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3357 - loss: 2.8835
Epoch 4/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3268 - loss: 2.8417
Epoch 5/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3179 - loss: 2.7762
Epoch 6/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3268 - loss: 2.6423
Epoch 7/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.3089 - loss: 2.4113
Epoch 8/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.3357 - loss: 2.1071
Epoch 9/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x7bdcd036d220>