In [6]:
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import sacrebleu
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score

def read_tsv(file_path):
    data = pd.read_csv(file_path, sep="\t", header=None, names=["source", "reference"])
    return data["source"].tolist(), data["reference"].tolist()

def translate_texts(texts, model_name="Helsinki-NLP/opus-mt-en-vi", batch_size=16):
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name).to("cuda")

    # Chia dữ liệu thành các batch nhỏ hơn
    translated_texts = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True).to("cuda")
        translated_tokens = model.generate(**inputs)
        translated_texts.extend([tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens])
    
    return translated_texts

def calculate_bleu(translated_texts, reference_texts):
    bleu = sacrebleu.corpus_bleu(translated_texts, [reference_texts])
    return bleu.score/100

def calculate_rouge(translated_texts, reference_texts):
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    scores = [scorer.score(ref, trans) for ref, trans in zip(reference_texts, translated_texts)]
    avg_scores = {
        "rouge1": sum([score["rouge1"].fmeasure for score in scores]) / len(scores),
        "rouge2": sum([score["rouge2"].fmeasure for score in scores]) / len(scores),
        "rougeL": sum([score["rougeL"].fmeasure for score in scores]) / len(scores),
    }
    return avg_scores

def calculate_meteor(translated_texts, reference_texts):
    scores = [meteor_score([ref], trans) for ref, trans in zip(reference_texts, translated_texts)]
    return sum(scores) / len(scores)

def save_results(file_path, source_texts, translated_texts, reference_texts):
    output_data = pd.DataFrame({
        "source": source_texts,
        "translated": translated_texts,
        "reference": reference_texts
    })
    output_data.to_csv(file_path, sep="\t", index=False, encoding="utf-8")

def main_pipeline(tsv_path, output_path):
    source_texts, reference_texts = read_tsv(tsv_path)

    translated_texts = translate_texts(source_texts)

    bleu_score = calculate_bleu(translated_texts, reference_texts)
    print(f"BLEU score: {bleu_score}")

    rouge_scores = calculate_rouge(translated_texts, reference_texts)
    print(f"ROUGE scores: {rouge_scores}")

    meteor_score_avg = calculate_meteor(translated_texts, reference_texts)
    print(f"Average METEOR score: {meteor_score_avg}")

    save_results(output_path, source_texts, translated_texts, reference_texts)
    print(f"Results saved to {output_path}")

In [7]:
if __name__ == "__main__":
    tsv_path = "../../data/OurDataEn-Vi/test.txt"
    output_path = "results.tsv"
    main_pipeline(tsv_path, output_path)



BLEU score: 0.3726958828584083
ROUGE scores: {'rouge1': 0.7453659338693679, 'rouge2': 0.5548548123860473, 'rougeL': 0.6957350950510507}
Average METEOR score: 0.599388257827987
Results saved to results.tsv
