In [None]:
import torch
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu, SmoothingFunction
import nltk
device = device = 'cuda' if torch.cuda.is_available() else 'cpu'

nltk.download('punkt')

def translate(model, sentence, SRC, TARGET, myTokenizerDE, max_length=50):
    model.eval()
    src_tokens = SRC.process([myTokenizerDE(sentence)]).to(device)
    trg_tokens = [TARGET.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        trg_tensor = torch.LongTensor(trg_tokens).unsqueeze(1).to(device)
        with torch.no_grad():
            output = model(src_tokens, trg_tensor)

        predicted_token_id = output.argmax(2)[-1].item()

        if predicted_token_id == TARGET.vocab.stoi["<eos>"]:
            break

        trg_tokens.append(predicted_token_id)

    translation = [TARGET.vocab.itos[token_id] for token_id in trg_tokens if token_id not in (TARGET.vocab.stoi["<sos>"], TARGET.vocab.stoi["<eos>"], TARGET.vocab.stoi["<pad>"])]

    return " ".join(translation)

def evaluate_model(model, test_iterator, SRC, TARGET, myTokenizerDE):
    translations = []
    reference_sentences = []

    for batch in test_iterator:
        src = batch.de
        trg = batch.en

        batch_translations = []
        for i in range(src.shape[1]):
            source_sentence = ' '.join([SRC.vocab.itos[token] for token in src[:, i]])
            translation = translate(model, source_sentence, SRC, TARGET, myTokenizerDE)
            batch_translations.append(translation)

        translations.extend(batch_translations)
        reference_sentences.extend([[' '.join([TARGET.vocab.itos[token] for token in trg[1:, i]])]] for i in range(trg.shape[1]))

    # Calculate BLEU score
    smoothing = SmoothingFunction().method3
    my_corpus_bleu = corpus_bleu(reference_sentences, translations, smoothing_function=smoothing, weights=(0.25, 0.25, 0.25, 0.25))

    return my_corpus_bleu