In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
from scripts.utils import load_config
config = load_config('../config.json')

Configuration loaded successfully from ../config.json


In [3]:
from scripts.data_service import DataService

BATCH_SIZE = config["BATCH_SIZE"]
SRC_LANGUAGE = config["SRC_LANGUAGE"]
TGT_LANGUAGE = config["TGT_LANGUAGE"]

data_service = DataService(src_language=SRC_LANGUAGE,
                           tgt_language=TGT_LANGUAGE, batch_size=BATCH_SIZE)



In [4]:
import torch

from scripts.model_service import ModelService

MODEL_SAVE_PATH = "../" + config["MODEL_SAVE_PATH"]

src_vocab, tgt_vocab = data_service.get_vocabularies()
src_vocab_size, tgt_vocab_size = len(src_vocab), len(tgt_vocab)

EMBED_DIM = config["EMBED_DIM"]
NUM_HEADS = config["NUM_HEADS"]
FF_DIM = config["FF_DIM"]
NUM_LAYERS = config["NUM_LAYERS"]
DROPOUT = config["DROPOUT"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_service = ModelService()
model = model_service.load_model(
    model_path=MODEL_SAVE_PATH,
    src_vocab_size=src_vocab_size,
    tgt_vocab_size=tgt_vocab_size,
    embed_dim=EMBED_DIM,
    num_heads=NUM_HEADS,
    ff_dim=FF_DIM,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT,
    device=device
)



Model loaded from .././transformer_model.pth


In [None]:
from tqdm import tqdm
from torch import nn
from torchtext.data.metrics import bleu_score


def evaluate_model(model, valid_loader, criterion, tgt_vocab, device):
    """
    Evaluate the Transformer model on the validation dataset.

    Parameters:
    - model: The trained Transformer model.
    - valid_loader: DataLoader for the validation dataset.
    - criterion: Loss function (e.g., CrossEntropyLoss).
    - tgt_vocab: Target vocabulary (to map indices to tokens).
    - device: The device to perform computation on ('cuda' or 'cpu').

    Returns:
    - avg_loss: The average loss over the validation dataset.
    - bleu: The BLEU score for the validation dataset.
    """
    model.eval()
    total_loss = 0
    translated_sentences = []
    reference_sentences = []

    with torch.no_grad():
        for src, tgt in tqdm(valid_loader, desc="Evaluating", unit="batch"):
            src, tgt = src.to(device), tgt.to(device)
            tgt_input = tgt[:-1, :]
            tgt_output = tgt[1:, :]

            output = model(src, tgt_input)
            output = output.view(-1, output.size(-1))
            tgt_output = tgt_output.view(-1)

            loss = criterion(output, tgt_output)
            total_loss += loss.item()

            output_tokens = output.argmax(
                dim=-1).view(tgt.size(0) - 1, tgt.size(1))
            for i in range(output_tokens.size(1)):
                translated_sentence = [
                    tgt_vocab.get_itos()[token] for token in output_tokens[:, i].tolist()
                    if token not in {data_service.PAD_IDX, data_service.BOS_IDX, data_service.EOS_IDX}
                ]
                reference_sentence = [
                    tgt_vocab.get_itos()[token] for token in tgt[1:, i].tolist()
                    if token not in {data_service.PAD_IDX, data_service.BOS_IDX, data_service.EOS_IDX}
                ]
                translated_sentences.append(translated_sentence)
                reference_sentences.append([reference_sentence])

    avg_loss = total_loss / len(valid_loader)
    bleu = bleu_score(translated_sentences, reference_sentences)

    print(f"Validation Loss: {avg_loss:.4f}")
    print(f"BLEU Score: {bleu:.4f}")

    return avg_loss, bleu


criterion = nn.CrossEntropyLoss(ignore_index=data_service.PAD_IDX)
valid_loader = data_service.get_valid_loader()

valid_loss, valid_bleu = evaluate_model(
    model, valid_loader, criterion, tgt_vocab, device)

Evaluating: 100%|██████████| 32/32 [01:24<00:00,  2.63s/batch]


Validation Loss: 1.5938
BLEU Score: 0.2283
