In [22]:
from datasets import load_dataset
import evaluate
import random

# Load 100 sample reference translations
dataset = load_dataset("colesimmons/SumTablets_English", split="train[:100]")
references = [ex["translation"] for ex in dataset]  

# Simple perturbation functions
def delete_word(text):
    words = text.split()
    if len(words) > 3:
        del words[random.randint(0, len(words)-1)]
    return " ".join(words)

def swap_words(text):
    words = text.split()
    if len(words) > 3:
        i = random.randint(0, len(words)-2)
        words[i], words[i+1] = words[i+1], words[i]
    return " ".join(words)

def replace_with_synonym(text):
    replacements = {"king": "monarch", "temple": "shrine", "city": "town", "god": "deity", "gave": "granted"}
    return " ".join([replacements.get(w, w) for w in text.split()])

perturbed = {
    "deleted_word": [delete_word(text) for text in references],
    "swapped_words": [swap_words(text) for text in references],
    "synonyms": [replace_with_synonym(text) for text in references],
}

meteor = evaluate.load("meteor")
chrf = evaluate.load("chrf")

for name, hypotheses in perturbed.items():
    meteor_score = meteor.compute(predictions=hypotheses, references=references)["meteor"]
    chrf_score = chrf.compute(predictions=hypotheses, references=references)["score"]
    
    print(f"\n{name.upper()}:")
    print(f"  METEOR: {meteor_score:.4f}")
    print(f"  chrF  : {chrf_score:.4f}")


[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/lucamartucci/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/lucamartucci/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/lucamartucci/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!



DELETED_WORD:
  METEOR: 0.9579
  chrF  : 98.3615

SWAPPED_WORDS:
  METEOR: 0.9950
  chrF  : 98.3496

SYNONYMS:
  METEOR: 0.9959
  chrF  : 98.9052


In [23]:
# Reference
ref = [
    "2 goats. On the 22nd day. From Abbasaga. Received by Lu-dingira. Month: “Festival of Šulgi,” Year: “Ša-ašru was destroyed;” Total: 2."
]

# Variants (Hypotheses)
hypotheses = {
    "deleted_phrase": "2 goats. From Abbasaga. Received by Lu-dingira. Month: “Festival of Šulgi,” Year: “Ša-ašru was destroyed;” Total: 2.",
    
    "reordered": "On the 22nd day. 2 goats. Received by Lu-dingira. From Abbasaga. Year: “Ša-ašru was destroyed;” Month: “Festival of Šulgi,” Total: 2.",
    
    "synonyms": "2 animals. On the 22nd day. Originating from Abbasaga. Handled by Lu-dingira. Month: “Šulgi's Festival,” Year: “Ša-ašru fell;” Total: 2.",
    
    "insertion": "2 goats. On the 22nd day. From Abbasaga. Received by Lu-dingira. Document was verified. Month: “Festival of Šulgi,” Year: “Ša-ašru was destroyed;” Total: 2.",
}

# Score each variant
for name, hyp in hypotheses.items():
    meteor_score = meteor.compute(predictions=[hyp], references=ref)["meteor"]
    chrf_score = chrf.compute(predictions=[hyp], references=ref)["score"]
    print(f"\n{name.upper()}:")
    print(f"  METEOR: {meteor_score:.4f}")
    print(f"  chrF  : {chrf_score:.4f}")



DELETED_PHRASE:
  METEOR: 0.8694
  chrF  : 88.7925

REORDERED:
  METEOR: 0.9522
  chrF  : 88.5518

SYNONYMS:
  METEOR: 0.8490
  chrF  : 64.0524

INSERTION:
  METEOR: 0.9828
  chrF  : 95.0308
