In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
from scripts.utils import load_config
config = load_config('../config.json')

Configuration loaded successfully from ../config.json


In [3]:
from scripts.data_service import DataService

BATCH_SIZE = config["BATCH_SIZE"]
SRC_LANGUAGE = config["SRC_LANGUAGE"]
TGT_LANGUAGE = config["TGT_LANGUAGE"]

data_service = DataService(src_language=SRC_LANGUAGE,
                           tgt_language=TGT_LANGUAGE, batch_size=BATCH_SIZE)




In [4]:
import torch

from scripts.model_service import ModelService

MODEL_SAVE_PATH = "../" + config["MODEL_SAVE_PATH"]

src_vocab, tgt_vocab = data_service.get_vocabularies()
src_vocab_size, tgt_vocab_size = len(src_vocab), len(tgt_vocab)

EMBED_DIM = config["EMBED_DIM"]
NUM_HEADS = config["NUM_HEADS"]
FF_DIM = config["FF_DIM"]
NUM_LAYERS = config["NUM_LAYERS"]
DROPOUT = config["DROPOUT"]



Model loaded from .././transformer_model.pth


In [None]:
import torch
from torchtext.data.utils import get_tokenizer


def run_inference(
    model,
    sentence: str,
    src_tokenizer,
    tgt_vocab,
    src_vocab,
    device,
    max_len: int = 50,
    bos_idx: int = 1,
    eos_idx: int = 2
):
    """
    Translate a German sentence to English using the trained Transformer model.

    Args:
        model: The trained Transformer model.
        sentence: Input German sentence (str).
        src_tokenizer: Tokenizer for the source language.
        tgt_vocab: Target vocabulary.
        src_vocab: Source vocabulary.
        device: Device to perform computation ('cuda' or 'cpu').
        max_len: Maximum length of the generated translation.

    Returns:
        Translated sentence in English (str).
    """
    model.eval()

    tokens = [bos_idx] + [src_vocab[token]
                          for token in src_tokenizer(sentence)] + [eos_idx]
    src_tensor = torch.tensor(tokens, dtype=torch.long).unsqueeze(
        1).to(device)  # Shape: (seq_len, 1)

    tgt_indices = [bos_idx]

    for _ in range(max_len):
        tgt_tensor = torch.tensor(
            tgt_indices, dtype=torch.long).unsqueeze(1).to(device)

        output = model(src_tensor, tgt_tensor)

        next_token = output[-1, 0].argmax(dim=-1).item()
        tgt_indices.append(next_token)

        if next_token == eos_idx:
            break

    translated_tokens = [tgt_vocab.get_itos()[idx]
                         for idx in tgt_indices[1:-1]]
    translated_sentence = " ".join(translated_tokens)
    return translated_sentence

In [5]:

model_service = ModelService()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model_service.load_model(
    model_path="../transformer_model.pth",
    src_vocab_size=src_vocab_size,
    tgt_vocab_size=tgt_vocab_size,
    embed_dim=EMBED_DIM,
    num_heads=NUM_HEADS,
    ff_dim=FF_DIM,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT,
    device=device
)

data_service = DataService()
src_vocab, tgt_vocab = data_service.get_vocabularies()
src_tokenizer = get_tokenizer("spacy", language="de_core_news_sm")

Model loaded from ../transformer_model.pth


In [6]:

german_sentence = "Hallo?"
english_translation = run_inference(
    model=model,
    sentence=german_sentence,
    src_tokenizer=src_tokenizer,
    tgt_vocab=tgt_vocab,
    src_vocab=src_vocab,
    device=device
)

print(f"Input (German): {german_sentence}")
print(f"Output (English): {english_translation}")

Input (German): Hallo?
Output (English): <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos>
