In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_path = "./mbart_finetuned_2e-4_16/checkpoint-326"

# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda")

def translate(text):
    inputs = tokenizer(text, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, num_beams=5, forced_bos_token_id=tokenizer.convert_tokens_to_ids("<chv_XX>"))
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated_text


In [None]:
import pandas as pd
import sacrebleu

# Load and read test data
test_file = "test1.csv"
df = pd.read_csv(test_file, encoding="latin1")

hypotheses = []
references = []

for index, row in df.iterrows():
    try:
        source_text = row['source']
        target_text = row['target']

        generated_text = translate(source_text) 

        print("Input: ", source_text)
        print("Translation: ", generated_text)

        hypotheses.append(generated_text)  
        references.append(target_text) 

    except Exception as e:
        print(f"Error generating translation for row {index}: {e}")
        continue

# Calculate sacreBLEU score
bleu_score = sacrebleu.corpus_bleu(hypotheses, [references]) # references needs to be a list of lists

In [None]:
print(f"BLEU score: {bleu_score}")

In [None]:
from transformers import pipeline
import pandas as pd
import sacrebleu

#Load model
translator = pipeline('translation', model='./helsinki_finetuned_1e-4_16/checkpoint-574')

#Load and read test data
test_file = "validation1.csv"
df = pd.read_csv(test_file, encoding = "latin1")

hypotheses = []
references = []

for index, row in df.iterrows():
    try:
        source_text = row['source']
        target_text = row['target']

        generated_text = translator(source_text)
        generated_text = generated_text[0].get('translation_text')

        print("Input: ", source_text)
        print("Translation: ", generated_text)

        hypotheses.append(generated_text) 
        references.append(target_text)  

    except Exception as e:
        print(f"Error generating translation for row {index}: {e}")
        continue

# Calculate sacreBLEU score
bleu_score = sacrebleu.corpus_bleu(hypotheses, [references]) # references needs to be a list of lists

In [None]:
print(f"BLEU score: {bleu_score}")