In [None]:
import pandas as pd

# Load the cleaned Spanish -> English dataset
df = pd.read_csv('../datasets/cleaned_es_en_dataset.csv', delimiter=';', encoding='utf-8')
inputs = df['input_text'].tolist()
references = df['reference_translation'].tolist()
references_lower = [s.lower() for s in references]

# Load the translated text dataset
tr_df = pd.read_csv('../translated-datasets/mbart-large-50-many-to-many-mmt-translated_es_en_dataset.csv', delimiter=';', encoding='utf-8')
translations = tr_df['translated_text'].tolist()
translations_lower = [s.lower() for s in translations]

In [None]:
import nltk
from nltk.translate.meteor_score import meteor_score
nltk.download('punkt')
nltk.download('wordnet')

tokenized_translations = [nltk.word_tokenize(trans) for trans in translations_lower]
tokenized_references = [nltk.word_tokenize(ref) for ref in references_lower]

meteor_scores = [meteor_score([ref], trans) for ref, trans in zip(tokenized_references, tokenized_translations)]

# Average METEOR score (0.25)
average_meteor = sum(meteor_scores) / len(meteor_scores)
print(f'Average METEOR score: {average_meteor:.4f}')
print('===================================================')

# Print individual translations and their METEOR scores
for input_text, ref, trans, meteor in zip(inputs, references_lower, translations_lower, meteor_scores):
    print(f'Input: {input_text}')
    print(f'Reference: {ref}')
    print(f'Translation: {trans}')
    print(f'METEOR score: {meteor:.4f}\n')