In [1]:
from translation_datapipe import create_translation_datapipe_train, create_translation_datapipe_val
from util import load_config, load_tokenizers, get_tokenizer_params
import os
import time
import torch
from model import Transformer

In [2]:
# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")
torch.set_default_device(device)

# Load config
config = load_config("config.yaml")

# Load the tokenizers
source_tokenizer, target_tokenizer = load_tokenizers(**config["tokenizer"])

# Load the training and validation pipe
train_pipe = create_translation_datapipe_train(
    source_tokenizer=source_tokenizer,
    target_tokenizer=target_tokenizer,
    **config["datapipe_train"]
)
val_pipe = create_translation_datapipe_val(
    source_tokenizer=source_tokenizer,
    target_tokenizer=target_tokenizer,
    max_generation_length=config["max_generation_length"],
    **config["datapipe_val"]
)

# Load the models state_dict
model_state_dict = torch.load("/home/nils/Informatik/SS23/nlp/transformer/logs/run_final/checkpoint_100000", map_location=torch.device('cpu'))["model_state_dict"]

# Create the model
transformer = Transformer(
    **get_tokenizer_params(source_tokenizer, target_tokenizer),
    **config["transformer_params"]
)
transformer.load_state_dict(model_state_dict)
transformer.eval()
print()

Using cpu



In [3]:
def generate_translation(sentence):
    sentence = sentence.lower()
    print(f"Tokenized sentence: {source_tokenizer.encode_as_pieces(sentence)}")
    token_tensor = torch.IntTensor(source_tokenizer.encode(sentence, add_bos=True, add_eos=True))
    predictions = transformer.generate(token_tensor)
    return target_tokenizer.decode(predictions.tolist())[0]

In [4]:
sentence = "Deutschland ist meine Heimat."
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁deutschland', '▁ist', '▁meine', '▁heimat', '.']
my country is germany.


In [5]:
sentence = "Wir müssen diese angelegenheit diskutieren."
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁wir', '▁müssen', '▁diese', '▁angelegenheit', '▁diskutieren', '.']
we must discuss this issue.


In [6]:
sentence = "ich liebe kuchen."
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁ich', '▁liebe', '▁k', 'uchen', '.']
i will be talking about my own.


In [7]:
sentence = "ich finde den genuss von lebensmitteln zufriedenstellend."
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁ich', '▁finde', '▁den', '▁genuss', '▁von', '▁lebensmitteln', '▁zufrieden', 'stellend', '.']
i believe that food is a good thing.


In [8]:
sentence = "ich möchte hiermit verkünden, dass dieses projekt sehr zufriedenstellend war!"
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁ich', '▁möchte', '▁hiermit', '▁verk', 'ünden', ',', '▁dass', '▁dieses', '▁projekt', '▁sehr', '▁zufrieden', 'stellend', '▁war', '!']
i would like to say that i am very pleased with the project.


In [9]:
sentence = "ich mochte dieses projekt sehr"
translation = generate_translation(sentence)
print(translation)

Tokenized sentence: ['▁ich', '▁m', 'och', 'te', '▁dieses', '▁projekt', '▁sehr']
i was a very successful commissioner.
