In [2]:
from langchain_ollama import OllamaLLM
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from sacrebleu import sentence_bleu
from nltk.translate.meteor_score import meteor_score  # For METEOR score
from transformers import pipeline

In [3]:
import nltk
nltk.download('wordnet')

# load_dotenv()

model = OllamaLLM(model="translator",max_tokens = 25)

df = pd.read_csv('data/production_train_test/English-German/balanced/English-German-train_production_balanced.csv')
# df = df.head(10)  

def generate_gpt_context(word):
    prompt = f"Write a natural German very small sentence using the word '{word}' in context.nothing else."
    answer = model.invoke(prompt)
    # print(f"Generated Sentence: {answer}")
    # return answer.content
    return answer


translator = pipeline("translation", model="Helsinki-NLP/opus-mt-de-en")

def calculate_bleu(reference, hypothesis):
    bleu_score = sentence_bleu(hypothesis, [reference]).score / 100  # Normalize to [0, 1]
    return bleu_score

def calculate_meteor(reference, hypothesis):
    # Tokenize sentences into lists of words
    reference_tokens = reference.split()
    hypothesis_tokens = hypothesis.split()
    meteor = meteor_score([reference_tokens], hypothesis_tokens)
    return meteor

def generate_reference_sentence(sentence):
    prompt = f"Translate this German sentence: '{sentence}' to English. Write only the translated sentence, nothing else."
    answer = model.invoke(prompt)
    # print(f"Reference Sentence: {answer.content}")
    # return answer.content
    return answer.strip()

context_sentences = []
print("Generating context sentences...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    loan_word = row['original_word']
    context = generate_gpt_context(loan_word)
    context_sentences.append(context)

df['generated_context'] = context_sentences

bleu_scores = []
meteor_scores = []
translated_sentence = []
ref = []
ref_word = []

print("Translating and evaluating sentences...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    german_sentence = row['generated_context']
    loan_word = row['original_word']
    
    english_translation = generate_reference_sentence(german_sentence)
    translated_sentence.append(english_translation)

    reference_sentence = translator(german_sentence)[0]['translation_text']
    ref.append(reference_sentence)

    reference_word = translator(loan_word)[0]['translation_text']
    ref_word.append(reference_word)
    

    bleu_score = calculate_bleu(reference_sentence, english_translation)
    bleu_scores.append(bleu_score)
    
    meteor_score_value = calculate_meteor(reference_sentence, english_translation)
    meteor_scores.append(meteor_score_value)
    
    # print(f"Loanword: {loan_word}")
    # print(f"German Sentence: {german_sentence}")
    # print(f"Translated Sentence: {english_translation}")
    # print(f"Reference Sentence: {reference_sentence}")
    # print(f"BLEU Score: {bleu_score:.4f}, METEOR Score: {meteor_score_value:.4f}")


df['reference_sentence'] = ref
df["translated_sentence"] = translated_sentence
df['bleu_score'] = bleu_scores
df['meteor_score'] = meteor_scores
df['reference_word'] = ref_word

important_columns = ['loan_word', 'original_word', 'generated_context', 'reference_sentence', 'translated_sentence',"reference_word",'label' ,'bleu_score', 'meteor_score']
df_important = df[important_columns]

output_path = 'score.csv'
df_important.to_csv(output_path, index=False)
print(f"Saved with context and scores to: {output_path}")

[nltk_data] Downloading package wordnet to
[nltk_data]     /usr/local/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Device set to use cpu


Generating context sentences...


100%|██████████| 5294/5294 [1:55:32<00:00,  1.31s/it]  


Translating and evaluating sentences...


100%|██████████| 5294/5294 [2:35:12<00:00,  1.76s/it]  


Saved with context and scores to: score.csv


In [None]:
df = pd.read_csv('score_data/score.csv')
df

Unnamed: 0,loan_word,original_word,generated_context,reference_sentence,translated_sentence,reference_word,label,bleu_score,meteor_score
0,Mirth,Fröhlichkeit,"Die Frau lächelte plötzlich und sagte: ""Ich fü...","The woman smiled suddenly and said, ""I feel ve...","The woman smiled suddenly and said: ""I feel ve...",Cheerfulness,synonym,0.800320,0.905455
1,Schnorr,Chromatogramm,Der Chemiker studierte die Chromatogramme der ...,The chemist studied the chromatograms of the v...,The chemist studied the chromatograms of diffe...,Chromatogram,random,0.582823,0.771560
2,Zettelkasten,Zettelkasten,"Ich habe meine Zettelkasten vollgefüllt, um al...",I filled up my paperbox to write down all the ...,I have filled my notebook with all ideas and t...,Paper box,loan,0.431177,0.627753
3,Meiring,Meiring,"Der kleine Hund rannte durch den Wald, um nach...",The little dog ran through the forest to look ...,The little dog ran through the woods in search...,Meiring,loan,0.423118,0.721960
4,Speth,Speth,"Der Speth fuhr durch die Felder, um frische Ge...",The Speth drove through the fields to buy fres...,The farmer drove through the fields to buy fre...,Speth,loan,0.630807,0.750388
...,...,...,...,...,...,...,...,...,...
5289,Meisinger,Meisinger,"Der kleine Hund rannte durch den Wald, um nach...",The little dog ran through the forest to look ...,"Der kleine Hund rannte durch den Wald, um nach...",Meisinger,loan,0.024456,0.000000
5290,Frankenberger,bleiben lassen,"Der Hund bleibt lassen, wenn man ihn nicht füt...",Keep the dog if you don't feed it.,The dog will not leave if you do not feed him.,Keep,hard_negative,0.085166,0.537349
5291,esteemed,geehrt,Mein Vater gehert mich immer noch.,My father's still insinuating me.,My father still loves me.,Honored,synonym,0.193049,0.300000
5292,Meier,Kauffmann,Der kleine Kaufmann kaufte ein Stück Brot auf ...,The little merchant bought a piece of bread in...,The small merchant bought a loaf of bread on t...,Kauffmann,random,0.178275,0.576994
