In [2]:
from langchain_ollama import OllamaLLM
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from sacrebleu import sentence_bleu
from nltk.translate.meteor_score import meteor_score  # For METEOR score
from transformers import pipeline

In [25]:
import nltk
nltk.download('wordnet')

load_dotenv()

model = OllamaLLM(model="translator")

df = pd.read_csv('data/production_train_test/English-German/balanced/English-German-train_production_balanced.csv')
df = df.head(10)  

def generate_gpt_context(word):
    prompt = f"Write a natural German very small sentence using the word '{word}' in context.nothing else."
    answer = model.invoke(prompt)
    # print(f"Generated Sentence: {answer}")
    # return answer.content
    return answer


translator = pipeline("translation", model="Helsinki-NLP/opus-mt-de-en")

def calculate_bleu(reference, hypothesis):
    bleu_score = sentence_bleu(hypothesis, [reference]).score / 100  # Normalize to [0, 1]
    return bleu_score

def calculate_meteor(reference, hypothesis):
    # Tokenize sentences into lists of words
    reference_tokens = reference.split()
    hypothesis_tokens = hypothesis.split()
    meteor = meteor_score([reference_tokens], hypothesis_tokens)
    return meteor

def generate_reference_sentence(sentence):
    prompt = f"Translate this German sentence: '{sentence}' to English. Write only the translated sentence, nothing else."
    answer = model.invoke(prompt)
    # print(f"Reference Sentence: {answer.content}")
    # return answer.content
    return answer

context_sentences = []
print("Generating context sentences...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    loan_word = row['original_word']
    context = generate_gpt_context(loan_word)
    context_sentences.append(context)

df['generated_context'] = context_sentences

bleu_scores = []
meteor_scores = []
translated_sentence = []
ref = []
ref_word = []

print("Translating and evaluating sentences...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    german_sentence = row['generated_context']
    loan_word = row['original_word']
    
    english_translation = generate_reference_sentence(german_sentence)
    translated_sentence.append(english_translation)

    reference_sentence = translator(german_sentence)[0]['translation_text']
    ref.append(reference_sentence)

    reference_word = translator(loan_word)[0]['translation_text']
    ref_word.append(reference_word)
    

    bleu_score = calculate_bleu(reference_sentence, english_translation)
    bleu_scores.append(bleu_score)
    
    meteor_score_value = calculate_meteor(reference_sentence, english_translation)
    meteor_scores.append(meteor_score_value)
    
    # print(f"Loanword: {loan_word}")
    # print(f"German Sentence: {german_sentence}")
    # print(f"Translated Sentence: {english_translation}")
    # print(f"Reference Sentence: {reference_sentence}")
    # print(f"BLEU Score: {bleu_score:.4f}, METEOR Score: {meteor_score_value:.4f}")


df['reference_sentence'] = ref
df["translated_sentence"] = translated_sentence
df['bleu_score'] = bleu_scores
df['meteor_score'] = meteor_scores
df['reference_word'] = ref_word

important_columns = ['loan_word', 'original_word', 'generated_context', 'reference_sentence', 'translated_sentence',"reference_word",'label' ,'bleu_score', 'meteor_score']
df_important = df[important_columns]

output_path = 'score.csv'
df_important.to_csv(output_path, index=False)
print(f"Saved with context and scores to: {output_path}")

[nltk_data] Downloading package wordnet to
[nltk_data]     /usr/local/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Device set to use cpu


Generating context sentences...


100%|██████████| 10/10 [00:12<00:00,  1.24s/it]


Translating and evaluating sentences...


100%|██████████| 10/10 [00:15<00:00,  1.56s/it]

Saved with context and scores to: score.csv





In [31]:
df = pd.read_csv('loanwords_with_context_and_scores.csv')
df

Unnamed: 0,loan_word,original_word,generated_context,reference_sentence,translated_sentence,reference_word,label,bleu_score,meteor_score
0,Mirth,Fröhlichkeit,Die Kinder lachten fröhlich zusammen am Strand.,The children laughed happily together on the b...,The children laughed joyfully together at the ...,Cheerfulness,synonym,0.310202,0.703125
1,Schnorr,Chromatogramm,"Der Chemiker studierte die Chromatogramme, um ...",The chemist studied the chromatograms to detec...,The chemist studied the chromatograms in order...,Chromatogram,random,0.403250,0.587589
2,Zettelkasten,Zettelkasten,"Der Archivator griff sein Zettelkasten auf, um...",The archivist picked up his paper box to read ...,The archivist grabbed his filing cabinet to re...,Paper box,loan,0.310526,0.635239
3,Meiring,Meiring,Der Fußballspieler Meiring ist bekannt für sei...,The football player Meiring is known for his i...,"The German sentence translates to:\n\n""Meiring...",Meiring,loan,0.063369,0.239110
4,Speth,Speth,Der Vogel gefliegt schnell um den Baum mit ein...,The bird flies quickly around the tree with a ...,The bird flew quickly around the tree with a l...,Speth,loan,0.585906,0.813692
...,...,...,...,...,...,...,...,...,...
5289,Meisinger,Meisinger,Der kleine Junge kaufte eine Cola von Meisinge...,The little boy bought a Coke from Meisinger at...,The little boy bought a Meisinger cola at the ...,Meisinger,loan,0.634361,0.844064
5290,Frankenberger,bleiben lassen,"Sie müssen sich nicht wärmen, da das Zimmer se...",You don't have to warm yourself because the ro...,"She does not need to warm up, because the room...",Keep,hard_negative,0.173958,0.638448
5291,esteemed,geehrt,"Der alte Mann ruft ""Geheert!"" als die Sonne un...","The old man shouts, ""Goes!"" as the sun went down.","The old man cries ""Go away!"" as the sun is set...",Honored,synonym,0.347916,0.583058
5292,Meier,Kauffmann,"Der Kaufmann betonte, dass die neue Kaffee-Mas...",The merchant stressed that the new coffee mach...,"Der Kaufmann betonte, dass die neue Kaffee-Mas...",Kauffmann,random,0.024456,0.000000
