In [None]:
!pip install GPUtil

In [None]:
# ===== Set up project ======
import os
import shutil

# Copy source code to working directory
src_path = r"/kaggle/input/machine-translation/"
dst_path = r"/kaggle/working/machine-translation/"

shutil.copytree(src_path, dst_path, dirs_exist_ok=True)

In [None]:
# Add root directory to PYTHON path
import sys
from pathlib import Path

# === LOCAL ===
# root_dir = str(Path.cwd().parent.parent.absolute())
# if not root_dir in sys.path:
#     sys.path.insert(0, root_dir)

# === KAGGLE ===
root_dir = "/kaggle/working/machine-translation"
if not root_dir in sys.path:
    sys.path.insert(0, root_dir)

# **Model Training - LSTM & BiLSTM**

In [None]:
import matplotlib.pyplot as plt

from config import Config
from src.utils.gpu_utils import GPUMemoryManager
from src.data.preprocessing import DataPreprocessor
from src.models.bilstm_attention import BiLSTMAttentionModel
from src.models.lstm_attention import LSTMAttentionModel
from src.training.trainer import ModelTrainer
from src.utils.helpers import save_tokenizer

In [None]:
# Copy outputs from previous version to working directory 
# (Used to continue training the model if the allowed session runs out)
input_dirs = [
    d for d in os.listdir('/kaggle/input/') if 'machine-translation-model-training' in d
]

if input_dirs:
    input_dir = f"/kaggle/input/{input_dirs[0]}/"
    work_dir = "/kaggle/working/machine-translation/"
    
    os.makedirs(input_dir, exist_ok=True)
    shutil.copytree(
        input_dir, 
        work_dir, 
        ignore=["src"], 
        dirs_exist_ok=True
    )
    
    print("Loaded outputs from previous version")
else:
    print("No previous outputs found. Starting fresh.")

## **1. GPU Setup**

In [None]:
GPUMemoryManager.clear_session()
GPUMemoryManager.setup_gpu(
    memory_limit_mb=Config.GPU_MEMORY_LIMIT,
    allow_growth=Config.GPU_MEMORY_GROWTH
)
if Config.USE_MIXED_PRECISION:
    GPUMemoryManager.enable_mixed_precision()
    GPUMemoryManager.get_memory_info()

## **2. Configuration**

In [None]:
config = Config.to_dict()

print("Configuration:")
for key, value in config.items():
    print(f"   {key}: {value}")

print("\nMemory Estimate:")
for key, value in Config.estimate_memory().items():
    print(f"   {key}: {value}")

## **3. Data Preprocessing**

In [None]:
# Tokenizer
tokenizer_path = f"{Config.ARTIFACT_PATH}/tokenizers"

preprocessor = DataPreprocessor(
    max_vocab_src=Config.MAX_VOCAB_SIZE_SRC,
    max_vocab_trg=Config.MAX_LENGTH_TRG,
    min_frequency=Config.MIN_WORD_FREQUENCY,
    name_logger="data_preprocessing",
    filename_logger=f"{Config.LOG_DIR}/data_preprocessing.log"
)

# Load data
df = preprocessor.load_data(
    src_path=f"{Config.DATA_PATH}/raw/en.txt",
    trg_path=f"{Config.DATA_PATH}/raw/vi.txt",
    max_length_src=Config.MAX_LENGTH_SRC,
    max_length_trg=Config.MAX_LENGTH_TRG
)
print(f"Dataset: {df.shape}")

# Split
train_df, val_df, test_df = preprocessor.split_data(df)
print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# Build tokenizers
tokenizer_en, tokenizer_vi = preprocessor.build_tokenizers(train_df)
# Save
os.makedirs(tokenizer_path, exist_ok=True)
save_tokenizer(tokenizer_en, f'{tokenizer_path}/tokenizer_en.pkl')
save_tokenizer(tokenizer_vi, f'{tokenizer_path}/tokenizer_vi.pkl')

# Prepare sequences
en_train, vi_in_train, vi_out_train = preprocessor.prepare_sequences(
    train_df, Config.MAX_LENGTH_SRC, Config.MAX_LENGTH_TRG
)
en_val, vi_in_val, vi_out_val = preprocessor.prepare_sequences(
    val_df, Config.MAX_LENGTH_SRC, Config.MAX_LENGTH_TRG
)

print(f"Training sequences: {en_train.shape}")

## **4. Build BiLSTM Model**

In [None]:
model_builder = BiLSTMAttentionModel(
    config=config,
    name_logger="bilstm_attention",
    filename_logger=f"{Config.LOG_DIR}/bilstm_attention.log"
)
bilstm_model = model_builder.build(
    vocab_size_src=Config.MAX_VOCAB_SIZE_SRC,
    vocab_size_trg=Config.MAX_VOCAB_SIZE_TRG,
    max_len_src=Config.MAX_LENGTH_SRC,
    max_len_trg=Config.MAX_LENGTH_TRG
)

bilstm_model.summary()

In [None]:
for layer in bilstm_model.layers[:5]:
    print(f"{layer.name}: dtype={layer.dtype}, compute_dtype={layer.compute_dtype}")

## **5. Train BiLSTM**

In [None]:
steps_per_epoch = len(en_train) // Config.BATCH_SIZE
Config.TOTAL_STEPS = steps_per_epoch * Config.EPOCHS
model_name = "bilstm"

print(f"Total training steps: {Config.TOTAL_STEPS}")
print(f"Steps per epoch: {steps_per_epoch}")
print(f"Batch size (global): {Config.BATCH_SIZE}")

trainer = ModelTrainer(
    model=bilstm_model, 
    config=config, 
    model_name=model_name,
    logger_name="bilstm_attention",
    logger_file=f"{Config.LOG_DIR}/bilstm_attention.log"
)

bilstm_history = trainer.train(
    train_data=(en_train, vi_in_train, vi_out_train),
    val_data=(en_val, vi_in_val, vi_out_val),
    resume=False
)

In [None]:
# Save model
model_path = f"{Config.ARTIFACT_PATH}/{model_name}"
os.makedirs(model_path, exist_ok=True)
trainer.save_model(f"{model_path}/bilstm_model.h5")

## **6. Build LSTM Model**

In [None]:
GPUMemoryManager.clear_session()

lstm_builder = LSTMAttentionModel(
    config=config,
    name_logger="lstm_attention",
    filename_logger=f"{Config.LOG_DIR}/lstm_attention.log"
)
lstm_model = lstm_builder.build(
    vocab_size_src=Config.MAX_VOCAB_SIZE_SRC,
    vocab_size_trg=Config.MAX_VOCAB_SIZE_TRG,
    max_len_src=Config.MAX_LENGTH_SRC,
    max_len_trg=Config.MAX_LENGTH_TRG
)

lstm_model.summary()

## **7. Train LSTM**

In [None]:
model_name = "lstm"

lstm_trainer = ModelTrainer(
    model=lstm_model,
    config=config,
    model_name=model_name,
    logger_name="lstm_attention",
    logger_file=f"{Config.LOG_DIR}/lstm_attention.log"
)

lstm_history = lstm_trainer.train(
    train_data=(en_train, vi_in_train, vi_out_train),
    val_data=(en_val, vi_in_val, vi_out_val),
    resume=False
)

In [None]:
# Save
model_path = f"{Config.ARTIFACT_PATH}/{model_name}"
os.makedirs(model_path, exist_ok=True)
lstm_trainer.save_model(f"{model_path}/lstm_model.h5")

## **8. Compare Results**

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(bilstm_history.history['loss'], label='BiLSTM Train')
axes[0].plot(bilstm_history.history['val_loss'], label='BiLSTM Val')
axes[0].plot(lstm_history.history['loss'], label='LSTM Train', linestyle='--')
axes[0].plot(lstm_history.history['val_loss'], label='LSTM Val', linestyle='--')
axes[0].set_title('Model Loss Comparison')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(bilstm_history.history['accuracy'], label='BiLSTM Train')
axes[1].plot(bilstm_history.history['val_accuracy'], label='BiLSTM Val')
axes[1].plot(lstm_history.history['accuracy'], label='LSTM Train', linestyle='--')
axes[1].plot(lstm_history.history['val_accuracy'], label='LSTM Val', linestyle='--')
axes[1].set_title('Model Accuracy Comparison')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f"{config['assets_path']}/comparison.png", dpi=300)
plt.show()

In [None]:
print(f"BiLSTM - Final Val Loss: {bilstm_history.history['val_loss'][-1]:.4f}")
print(f"LSTM - Final Val Loss: {lstm_history.history['val_loss'][-1]:.4f}")