In [None]:
# Install required libraries
!pip install tensorflow
!pip install sentencepiece
!pip install pandas
!pip install numpy

# Verify TensorFlow and GPU
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU Available: ", tf.config.list_physical_devices('GPU'))

# Enable memory growth for GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

TensorFlow version: 2.18.0
GPU Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import all necessary libraries
import pandas as pd
import tensorflow as tf
import sentencepiece as spm
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import gc
import os

# Create necessary directories in Google Drive
os.makedirs('/content/drive/MyDrive/seq2seq_models', exist_ok=True)
os.makedirs('/content/drive/MyDrive/seq2seq_data', exist_ok=True)

Mounted at /content/drive


In [None]:
# Load large dataset in chunks
def load_parallel_corpus(amh_file, eng_file, chunk_size=10000):  # Increased chunk size for GPU
    amharic_sentences = []
    english_sentences = []

    with open(amh_file, 'r', encoding='utf-8') as f_amh, open(eng_file, 'r', encoding='utf-8') as f_eng:
        while True:
            amh_chunk = [next(f_amh, '').strip() for _ in range(chunk_size)]
            eng_chunk = [next(f_eng, '').strip() for _ in range(chunk_size)]

            if not amh_chunk[0] or not eng_chunk[0]:  # End of file
                break

            amharic_sentences.extend(amh_chunk)
            english_sentences.extend(eng_chunk)

            # Clear memory
            gc.collect()

    return amharic_sentences, english_sentences

def tokenize_in_chunks(sentences, tokenizer, chunk_size=1000):  # Increased chunk size for GPU
    encoded = []
    for i in range(0, len(sentences), chunk_size):
        chunk = sentences[i:i + chunk_size]
        encoded.extend([tokenizer.encode_as_ids(sent) for sent in chunk])
        gc.collect()
    return encoded

In [None]:
# Move the uploaded files to Google Drive
!mv amh.txt /content/drive/MyDrive/seq2seq_data/
!mv eng.txt /content/drive/MyDrive/seq2seq_data/

# Load data
print("Loading data...")
amharic_sentences, english_sentences = load_parallel_corpus(
    "/content/drive/MyDrive/seq2seq_data/amh.txt",
    "/content/drive/MyDrive/seq2seq_data/eng.txt"
)

# Train SentencePiece tokenizers
print("Training tokenizers...")
for lang, sentences in [("amh", amharic_sentences), ("eng", english_sentences)]:
    with open(f"/content/drive/MyDrive/seq2seq_data/{lang}.txt", "w", encoding="utf-8") as f:
        f.write("\n".join(sentences))
    spm.SentencePieceTrainer.train(
        input=f"/content/drive/MyDrive/seq2seq_data/{lang}.txt",
        model_prefix=f"/content/drive/MyDrive/seq2seq_models/tokenizer_{lang}",
        vocab_size=4000,
        model_type="unigram",
        character_coverage=1.0,
        pad_id=0,
        unk_id=1,
        bos_id=2,
        eos_id=3
    )

Loading data...
Training tokenizers...


In [None]:
# Load tokenizers
print("Loading tokenizers...")
sp_amh = spm.SentencePieceProcessor(model_file="/content/drive/MyDrive/seq2seq_models/tokenizer_amh.model")
sp_eng = spm.SentencePieceProcessor(model_file="/content/drive/MyDrive/seq2seq_models/tokenizer_eng.model")

print("Tokenizing data...")
amh_encoded = tokenize_in_chunks(amharic_sentences, sp_amh)
eng_encoded = tokenize_in_chunks(english_sentences, sp_eng)

# Clear memory
del amharic_sentences, english_sentences
gc.collect()

# Pad sequences
print("Padding sequences...")
max_len_amh = max(len(x) for x in amh_encoded)
max_len_eng = max(len(x) for x in eng_encoded)
amh_padded = tf.keras.preprocessing.sequence.pad_sequences(amh_encoded, maxlen=max_len_amh, padding="post")
eng_padded = tf.keras.preprocessing.sequence.pad_sequences(eng_encoded, maxlen=max_len_eng, padding="post")

# Clear memory
del amh_encoded, eng_encoded
gc.collect()

# Split data into train and validation
print("Splitting data...")
val_split = 0.1
val_size = int(len(amh_padded) * val_split)
train_amh = amh_padded[:-val_size]
train_eng = eng_padded[:-val_size]
val_amh = amh_padded[-val_size:]
val_eng = eng_padded[-val_size:]

# Clear memory
del amh_padded, eng_padded
gc.collect()

Loading tokenizers...
Tokenizing data...
Padding sequences...
Splitting data...


0

In [None]:
# Model parameters - increased for GPU
embedding_dim = 128  # Increased for GPU
lstm_units = 256    # Increased for GPU

print("Building model...")
# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(input_dim=4000, output_dim=embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(lstm_units, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_embedding)

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(input_dim=4000, output_dim=embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])
decoder_dense = Dense(4000, activation="softmax")(decoder_outputs)

# Create model
model = Model([encoder_inputs, decoder_inputs], decoder_dense)

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

Building model...


In [None]:
# Callbacks
callbacks = [
    EarlyStopping(
        monitor="val_loss",
        patience=3,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        "/content/drive/MyDrive/seq2seq_models/seq2seq_model.h5",
        monitor="val_loss",
        save_best_only=True
    )
]

# Train model with larger batch size for GPU
print("Starting training...")
history = model.fit(
    [train_amh, train_eng[:, :-1]],
    tf.expand_dims(train_eng[:, 1:], -1),
    validation_data=(
        [val_amh, val_eng[:, :-1]],
        tf.expand_dims(val_eng[:, 1:], -1)
    ),
    batch_size=64,  # Increased batch size for GPU
    epochs=50,
    callbacks=callbacks,
    verbose=1
)

Starting training...
Epoch 1/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - accuracy: 0.9312 - loss: 0.4715



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 159ms/step - accuracy: 0.9312 - loss: 0.4715 - val_accuracy: 1.0000 - val_loss: 9.2231e-04
Epoch 2/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.9332 - loss: 0.4529



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 170ms/step - accuracy: 0.9332 - loss: 0.4528 - val_accuracy: 1.0000 - val_loss: 4.9197e-04
Epoch 3/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - accuracy: 0.9339 - loss: 0.4339



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 172ms/step - accuracy: 0.9339 - loss: 0.4339 - val_accuracy: 1.0000 - val_loss: 3.0912e-04
Epoch 4/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.9365 - loss: 0.4069



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 169ms/step - accuracy: 0.9365 - loss: 0.4069 - val_accuracy: 1.0000 - val_loss: 2.0901e-04
Epoch 5/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9375 - loss: 0.3944



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 173ms/step - accuracy: 0.9375 - loss: 0.3943 - val_accuracy: 1.0000 - val_loss: 1.5236e-04
Epoch 6/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9395 - loss: 0.3751



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9395 - loss: 0.3751 - val_accuracy: 1.0000 - val_loss: 1.1201e-04
Epoch 7/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9413 - loss: 0.3564



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 172ms/step - accuracy: 0.9413 - loss: 0.3564 - val_accuracy: 1.0000 - val_loss: 8.9001e-05
Epoch 8/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9412 - loss: 0.3511



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 172ms/step - accuracy: 0.9412 - loss: 0.3510 - val_accuracy: 1.0000 - val_loss: 6.7463e-05
Epoch 9/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9421 - loss: 0.3401



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 172ms/step - accuracy: 0.9421 - loss: 0.3401 - val_accuracy: 1.0000 - val_loss: 5.1754e-05
Epoch 10/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9423 - loss: 0.3339



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9423 - loss: 0.3339 - val_accuracy: 1.0000 - val_loss: 4.3334e-05
Epoch 11/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - accuracy: 0.9438 - loss: 0.3206



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 171ms/step - accuracy: 0.9438 - loss: 0.3206 - val_accuracy: 1.0000 - val_loss: 3.8839e-05
Epoch 12/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9436 - loss: 0.3173



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 173ms/step - accuracy: 0.9436 - loss: 0.3173 - val_accuracy: 1.0000 - val_loss: 2.9798e-05
Epoch 13/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9452 - loss: 0.3048



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9452 - loss: 0.3048 - val_accuracy: 1.0000 - val_loss: 2.3841e-05
Epoch 14/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9456 - loss: 0.2987



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9456 - loss: 0.2987 - val_accuracy: 1.0000 - val_loss: 1.9953e-05
Epoch 15/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9468 - loss: 0.2890



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9468 - loss: 0.2890 - val_accuracy: 1.0000 - val_loss: 1.6618e-05
Epoch 16/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9465 - loss: 0.2875



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9465 - loss: 0.2875 - val_accuracy: 1.0000 - val_loss: 1.4044e-05
Epoch 17/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - accuracy: 0.9475 - loss: 0.2792



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 172ms/step - accuracy: 0.9475 - loss: 0.2792 - val_accuracy: 1.0000 - val_loss: 1.1898e-05
Epoch 18/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9491 - loss: 0.2684



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 179ms/step - accuracy: 0.9491 - loss: 0.2684 - val_accuracy: 1.0000 - val_loss: 1.0983e-05
Epoch 19/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9487 - loss: 0.2678



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 173ms/step - accuracy: 0.9487 - loss: 0.2678 - val_accuracy: 1.0000 - val_loss: 8.1081e-06
Epoch 20/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.9498 - loss: 0.2603



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 171ms/step - accuracy: 0.9498 - loss: 0.2603 - val_accuracy: 1.0000 - val_loss: 7.2628e-06
Epoch 21/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9505 - loss: 0.2548



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 174ms/step - accuracy: 0.9505 - loss: 0.2548 - val_accuracy: 1.0000 - val_loss: 6.5286e-06
Epoch 22/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.9511 - loss: 0.2503



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 170ms/step - accuracy: 0.9511 - loss: 0.2503 - val_accuracy: 1.0000 - val_loss: 5.2158e-06
Epoch 23/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9521 - loss: 0.2436



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 173ms/step - accuracy: 0.9521 - loss: 0.2436 - val_accuracy: 1.0000 - val_loss: 5.1653e-06
Epoch 24/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9535 - loss: 0.2351



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9535 - loss: 0.2351 - val_accuracy: 1.0000 - val_loss: 4.2297e-06
Epoch 25/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9537 - loss: 0.2331



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9537 - loss: 0.2331 - val_accuracy: 1.0000 - val_loss: 3.8234e-06
Epoch 26/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9552 - loss: 0.2251



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9551 - loss: 0.2251 - val_accuracy: 1.0000 - val_loss: 3.0400e-06
Epoch 27/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9553 - loss: 0.2232



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 175ms/step - accuracy: 0.9553 - loss: 0.2232 - val_accuracy: 1.0000 - val_loss: 2.7731e-06
Epoch 28/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9564 - loss: 0.2168



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.9564 - loss: 0.2168 - val_accuracy: 1.0000 - val_loss: 2.2308e-06
Epoch 29/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9569 - loss: 0.2139



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.9569 - loss: 0.2139 - val_accuracy: 1.0000 - val_loss: 1.9766e-06
Epoch 30/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9576 - loss: 0.2097



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9576 - loss: 0.2097 - val_accuracy: 1.0000 - val_loss: 1.9675e-06
Epoch 31/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9582 - loss: 0.2066



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 173ms/step - accuracy: 0.9582 - loss: 0.2066 - val_accuracy: 1.0000 - val_loss: 1.2408e-06
Epoch 32/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9602 - loss: 0.1956



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9602 - loss: 0.1956 - val_accuracy: 1.0000 - val_loss: 9.5554e-07
Epoch 33/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 170ms/step - accuracy: 0.9598 - loss: 0.1976 - val_accuracy: 1.0000 - val_loss: 1.1417e-06
Epoch 34/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9608 - loss: 0.1925



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 173ms/step - accuracy: 0.9608 - loss: 0.1925 - val_accuracy: 1.0000 - val_loss: 6.3700e-07
Epoch 35/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.9617 - loss: 0.1868 - val_accuracy: 1.0000 - val_loss: 6.7033e-07
Epoch 36/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9618 - loss: 0.1862



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.9618 - loss: 0.1862 - val_accuracy: 1.0000 - val_loss: 4.2859e-07
Epoch 37/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9622 - loss: 0.1847



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.9622 - loss: 0.1847 - val_accuracy: 1.0000 - val_loss: 4.1132e-07
Epoch 38/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.9629 - loss: 0.1804 - val_accuracy: 1.0000 - val_loss: 6.4317e-07
Epoch 39/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 172ms/step - accuracy: 0.9633 - loss: 0.1784 - val_accuracy: 1.0000 - val_loss: 5.2159e-07
Epoch 40/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9647 - loss: 0.1715



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.9647 - loss: 0.1715 - val_accuracy: 1.0000 - val_loss: 2.9416e-07
Epoch 41/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9650 - loss: 0.1698



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.9650 - loss: 0.1698 - val_accuracy: 1.0000 - val_loss: 2.5816e-07
Epoch 42/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.9660 - loss: 0.1648 - val_accuracy: 1.0000 - val_loss: 2.5963e-07
Epoch 43/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9659 - loss: 0.1643



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 175ms/step - accuracy: 0.9659 - loss: 0.1643 - val_accuracy: 1.0000 - val_loss: 2.3881e-07
Epoch 44/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.9672 - loss: 0.1582 - val_accuracy: 1.0000 - val_loss: 2.4395e-07
Epoch 45/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - accuracy: 0.9673 - loss: 0.1574



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 171ms/step - accuracy: 0.9673 - loss: 0.1574 - val_accuracy: 1.0000 - val_loss: 1.8055e-07
Epoch 46/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9674 - loss: 0.1568



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 174ms/step - accuracy: 0.9674 - loss: 0.1568 - val_accuracy: 1.0000 - val_loss: 1.2142e-07
Epoch 47/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9673 - loss: 0.1570



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.9673 - loss: 0.1570 - val_accuracy: 1.0000 - val_loss: 1.0502e-07
Epoch 48/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 171ms/step - accuracy: 0.9688 - loss: 0.1506 - val_accuracy: 1.0000 - val_loss: 2.3293e-07
Epoch 49/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 169ms/step - accuracy: 0.9689 - loss: 0.1496 - val_accuracy: 1.0000 - val_loss: 1.3205e-07
Epoch 50/50
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9693 - loss: 0.1473



[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 174ms/step - accuracy: 0.9693 - loss: 0.1473 - val_accuracy: 1.0000 - val_loss: 8.6595e-08


In [None]:

# Cell 7.5 - Model Evaluation with BLEU Score
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')

def translate_sentence(model, input_sequence, sp_amh, sp_eng, max_length=50):
    # Tokenize input
    amh_tokens = sp_amh.encode_as_ids(input_sequence)

    # Pad sequence
    # Note: For translation, we need to predict token by token,
    # so the input to the model's encoder needs to be the sequence.
    # The padding here is only for the encoder input shape.
    # However, the current model predicts the entire target sequence at once,
    # which is a typical setup for training but different for inference.
    # For this architecture's inference, a more standard approach is to
    # use the encoder's state and iteratively decode.
    # The current `translate_sentence` attempts a different approach by
    # feeding the predicted token back into the decoder input, which
    # requires a fixed-size input or dynamic shapes.
    # Let's adjust to use a more standard greedy decoding loop.

    # Encode the input sentence
    encoder_input_sequence = tf.constant([sp_amh.encode_as_ids(input_sequence)])

    # Get the initial state from the encoder
    # Need to access the encoder part of the model
    # Let's create an encoder model for inference
    encoder_model = Model(inputs=model.inputs[0], outputs=encoder_lstm.output[1:]) # assuming encoder_lstm outputs [output, h, c]


    state_h, state_c = encoder_model.predict(encoder_input_sequence, verbose=0)

    # Initialize decoder input with start token
    decoder_input_sequence = tf.constant([[2]])  # 2 is the start token (BOS)

    # Generate translation
    translated_tokens = []
    for _ in range(max_length):
        # Predict the next token
        # Need to access the decoder part of the model for inference
        # Let's create a decoder model for inference
        # This requires defining the decoder layers separately or
        # recreating the decoder model structure.
        # For simplicity, let's adapt the existing model.predict call,
        # but this structure is unusual for seq2seq inference.

        # Predict the next token using the full model, feeding the generated sequence
        # Note: This is not the standard way to do greedy decoding with this model structure.
        # A standard approach would be to have a separate decoder inference model
        # that takes the decoder input and encoder states, and outputs the next token probabilities
        # and updated states.
        # Let's proceed with the current structure's inference logic for now,
        # but acknowledge it's non-standard.

        output = model.predict([encoder_input_sequence, decoder_input_sequence], verbose=0)

        # Get the predicted token (take the last predicted token in the sequence)
        predicted_token = int(tf.argmax(output[0, -1, :]).numpy())

        # Break if end token is predicted or padding
        if predicted_token == 3 or predicted_token == 0:  # 3 is EOS, 0 is PAD
            break

        # Add predicted token to output
        translated_tokens.append(predicted_token)

        # Update decoder input for the next step
        # Append the predicted token to the decoder input sequence
        decoder_input_sequence = tf.concat([decoder_input_sequence, tf.constant([[predicted_token]])], axis=-1)


    # Convert tokens to text
    # Ensure tokens are ints for decoding
    translated_tokens = [int(t) for t in translated_tokens]
    try:
        english_text = sp_eng.decode(translated_tokens)
    except Exception as e:
        print(f"Error during decoding: {e}")
        # Fallback or handle error
        english_text = "" # Or return a placeholder

    return english_text

print("Evaluating model with BLEU score...")

# Get a sample of validation data for evaluation
num_samples = min(100, len(val_amh))  # Evaluate on up to 100 samples
sample_indices = np.random.choice(len(val_amh), num_samples, replace=False)

# Prepare reference translations and model predictions
references = []
hypotheses = []

for idx in sample_indices:
    # Get the Amharic sentence (original tokens before padding)
    # We need to get the original sentences or decode the padded ones and clean.
    # Let's decode the padded sequence and remove padding.
    amh_padded_tokens = val_amh[idx]
    # Filter out padding tokens (assuming pad_id is 0)
    amh_tokens = [token for token in amh_padded_tokens if token != 0]
    # Convert to text for the translate_sentence function
    amh_text = sp_amh.decode(amh_tokens)


    # Get the reference English translation (original tokens before padding)
    eng_padded_tokens = val_eng[idx]
     # Filter out padding tokens (assuming pad_id is 0) and BOS/EOS
    eng_tokens = [token for token in eng_padded_tokens if token not in [0, 2, 3]]
    eng_text = sp_eng.decode(eng_tokens)

    # Get model's translation
    predicted_text = translate_sentence(model, amh_text, sp_amh, sp_eng)

    # Tokenize for BLEU score
    # BLEU score typically uses space-tokenized words
    reference_tokens = [word_tokenize(eng_text.lower())]
    hypothesis_tokens = word_tokenize(predicted_text.lower())

    references.append(reference_tokens)
    hypotheses.append(hypothesis_tokens)

# Calculate BLEU score
bleu_score = corpus_bleu(references, hypotheses)
print(f"BLEU Score: {bleu_score:.4f}")

# Print some example translations
print("\nExample Translations:")
print("-" * 60)
for i in range(min(5, num_samples)):
    idx = sample_indices[i]
    # Decode padded sequences for display
    amh_padded_tokens = val_amh[idx]
    amh_tokens_display = [token for token in amh_padded_tokens if token != 0]
    amh_text_display = sp_amh.decode(amh_tokens_display)

    eng_padded_tokens = val_eng[idx]
    eng_tokens_display = [token for token in eng_padded_tokens if token not in [0, 2, 3]]
    eng_text_display = sp_eng.decode(eng_tokens_display)

    # Get the predicted translation using the function
    predicted_text_display = translate_sentence(model, amh_text_display, sp_amh, sp_eng)


    print(f"Amharic: {amh_text_display}")
    print(f"Reference: {eng_text_display}")
    print(f"Predicted: {predicted_text_display}")
    print("-" * 60)

# Save BLEU score to history
# Ensure history is a dictionary
if not hasattr(history, 'history'):
    history.history = {}
history.history['bleu_score'] = bleu_score

In [None]:
# Save final model
print("Saving model...")
model.save("/content/drive/MyDrive/seq2seq_models/seq2seq_model_final.h5")

# Save training history
import json
with open("/content/drive/MyDrive/seq2seq_models/training_history.json", "w") as f:
    json.dump(history.history, f)

print("Training completed!")

# Create a zip file with all the necessary files
!zip -r /content/drive/MyDrive/seq2seq_models.zip /content/drive/MyDrive/seq2seq_models/

print("\nInstructions for downloading the models:")
print("1. Go to your Google Drive")
print("2. Find the seq2seq_models.zip file")
print("3. Download it")
print("4. Extract the contents to your local 'models' directory")



Saving model...
Training completed!
  adding: content/drive/MyDrive/seq2seq_models/ (stored 0%)
  adding: content/drive/MyDrive/seq2seq_models/tokenizer_amh.model (deflated 46%)
  adding: content/drive/MyDrive/seq2seq_models/tokenizer_amh.vocab (deflated 66%)
  adding: content/drive/MyDrive/seq2seq_models/tokenizer_eng.model (deflated 44%)
  adding: content/drive/MyDrive/seq2seq_models/tokenizer_eng.vocab (deflated 64%)
  adding: content/drive/MyDrive/seq2seq_models/seq2seq_model.h5 (deflated 7%)
  adding: content/drive/MyDrive/seq2seq_models/seq2seq_model_final.h5 (deflated 7%)
  adding: content/drive/MyDrive/seq2seq_models/training_history.json (deflated 56%)

Instructions for downloading the models:
1. Go to your Google Drive
2. Find the seq2seq_models.zip file
3. Download it
4. Extract the contents to your local 'models' directory
