In [7]:
def read_data_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = file.read()
    return data

In [26]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize
import numpy as np

In [27]:
from sentence_transformers import SentenceTransformer, util
import torch

model = SentenceTransformer('sentence-transformers/LaBSE')

In [9]:
import re

def split_and_concatenate_text_by_language(data):
    clean_data = re.sub(r'\d+|—', '', data)
    pattern = re.compile(r"(Ukrainian|Hutsul):(.*?)(?=(Ukrainian|Hutsul):|$)", re.DOTALL)
    text_by_language = {'Ukrainian': "", 'Hutsul': ""}
    matches = pattern.findall(clean_data)
    for lang, text, _ in matches:
        text_by_language[lang] += text.strip() + " "

    return {'Ukrainian': text_by_language['Ukrainian'].strip(), 'Hutsul': text_by_language['Hutsul'].strip()}


In [38]:
def get_avg_scores(scores):
    bleu_scores = [score[2] for score in scores]
    labse_scores = [score[3] for score in scores]
    
    average_bleu = np.mean(bleu_scores)
    median_bleu = np.median(bleu_scores)
    average_labse = np.mean(labse_scores)
    median_labse = np.median(labse_scores)
    
    #print(f"Average BLEU Score: {average_bleu}")
    print(f"Median BLEU Score: {median_bleu}")
    #print(f"Average LaBSE Score: {average_labse}")
    print(f"Median LaBSE Score: {median_labse}")
    
    return average_bleu, average_labse#average_bleu, median_bleu, average_labse, median_labse

In [53]:
def parse_and_calculate_bleu(data):
    j=0
    segments = data.split('-------------------------------')
    bleu_scores1 = []
    bleu_scores2 = []
    for segment in segments:
        if segment.strip():
            j+=1
            
            parts = segment.split('!!!')
            concatenated_data = split_and_concatenate_text_by_language(parts[0])
            ukrainian_text1 = concatenated_data['Ukrainian']
            hutsul_text1 = concatenated_data['Hutsul']
            reference1 = word_tokenize(ukrainian_text1)
            candidate1 = word_tokenize(hutsul_text1)
            
            concatenated_data = split_and_concatenate_text_by_language(parts[1])
            ukrainian_text2 = concatenated_data['Ukrainian']
            hutsul_text2 = concatenated_data['Hutsul']
            reference2 = word_tokenize(hutsul_text2)
            candidate2 = word_tokenize(ukrainian_text2) 

            source_embedding = model.encode(hutsul_text2, convert_to_tensor=True)
            translated_embedding = model.encode(hutsul_text1, convert_to_tensor=True)
            cosine_similarity = util.pytorch_cos_sim(source_embedding, translated_embedding)
            
            score = sentence_bleu([reference2], candidate1)
            bleu_scores1.append((hutsul_text2, hutsul_text1, score, cosine_similarity.item()))
            
            source_embedding = model.encode(ukrainian_text1, convert_to_tensor=True)
            translated_embedding = model.encode(ukrainian_text2, convert_to_tensor=True)
            cosine_similarity = util.pytorch_cos_sim(source_embedding, translated_embedding)

            score = sentence_bleu([reference1], candidate2)
            bleu_scores2.append((ukrainian_text1, ukrainian_text2, score, cosine_similarity.item()))
    return bleu_scores1, bleu_scores2

In [23]:
data = read_data_from_file('llama-3-hutsul-finetune-combined_eval_post.txt')

In [None]:
scores, scores2 = parse_and_calculate_bleu(data)
for original, translation, score, cosine_similarity in scores:
    print(f"Original: {original}\nTranslation: {translation}\nBLEU Score: {score}\nLaBSE Score: {cosine_similarity}\n")

In [None]:
for original, translation, score, cosine_similarity in scores2:
    print(f"Original: {original}\nTranslation: {translation}\nBLEU Score: {score}\nLaBSE Score: {cosine_similarity}\n")

In [39]:
print(get_avg_scores(scores))
print('---------------------')
print(get_avg_scores(scores2))

Median BLEU Score: 2.66669249540442e-78
Median LaBSE Score: 0.7877578735351562
(0.1713403423750469, 0.6732441560462391)
---------------------
Median BLEU Score: 0.24384183193426084
Median LaBSE Score: 0.882727861404419
(0.27870532035242085, 0.8437750962065773)


In [42]:
data = read_data_from_file('m_eval_done.txt')

In [43]:
m_scores, m_scores2 = parse_and_calculate_bleu(data)

In [44]:
print(get_avg_scores(m_scores))
print('---------------------')
print(get_avg_scores(m_scores2))

Median BLEU Score: 4.0622028886850106e-78
Median LaBSE Score: 0.7612000703811646
(0.15998786146580393, 0.7133132157723109)
---------------------
Median BLEU Score: 0.17376436413676968
Median LaBSE Score: 0.8063327074050903
(0.21484042900157818, 0.7642296598354975)


In [45]:
data = read_data_from_file('chatGpt-3.5_preprocessed_eval.txt')
gpt_scores, gpt_scores2 = parse_and_calculate_bleu(data)

In [46]:
print(get_avg_scores(gpt_scores))
print('---------------------')
print(get_avg_scores(gpt_scores2))

Median BLEU Score: 4.2077067846993234e-78
Median LaBSE Score: 0.8610229194164276
(0.12099316710994074, 0.8314545747637748)
---------------------
Median BLEU Score: 0.28582555866764475
Median LaBSE Score: 0.932696521282196
(0.3095447983505376, 0.9078365230560302)


In [54]:
data = read_data_from_file('mistral-hutsul-finetuned-v1_eval.txt')
v1_scores, v1_scores2 = parse_and_calculate_bleu(data)

print(get_avg_scores(v1_scores))
print('---------------------')
print(get_avg_scores(v1_scores2))

Median BLEU Score: 3.1537317208636084e-78
Median LaBSE Score: 0.747868001461029
(0.14063703910833475, 0.6724886310005945)
---------------------
Median BLEU Score: 0.2102369368326755
Median LaBSE Score: 0.772828996181488
(0.22987647790345245, 0.720658637701519)
