In [None]:
!pip install datasets transformers==4.51.3 nltk evaluate tqdm bert_score wandb
!pip install --upgrade datasets fsspec

Collecting transformers==4.51.3
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.0->bert_score)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.0->bert_score)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.0.0->bert_score)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.0.0->bert_score)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.

In [None]:
from tqdm import tqdm
import sys
import os
import nltk
nltk.download('punkt')
from nltk.translate.bleu_score import corpus_bleu
sys.path.append(os.path.abspath(os.path.join(os.path.dirname("blue_score.ipynb"), "..")))
from datasets import load_dataset
import random
import numpy as np

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
SEED_VALUE = 42


random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)

In [None]:
from nltk.corpus import wordnet
nltk.download('wordnet')
nltk.download('omw-1.4')

#GETS SYNONYM FOR A WORD

def get_synonym(word):
    synonyms = wordnet.synsets(word)
    if not synonyms:
        return None

    lemmas = synonyms[0].lemmas()
    for lemma in lemmas:
        synonym = lemma.name().replace("_", " ")
        if synonym.lower() != word.lower():
            return synonym
    return None


In [None]:
#GETS THE NEIGHBORS OF A QWERTY KEYBOARD
def build_qwerty_neighbors():
    layout = [
        "qwertyuiop",
        "asdfghjkl",
        "zxcvbnm"
    ]
    neighbors = {}

    for row in layout:
        for i, char in enumerate(row):
            neighbor_chars = []
            if i > 0:
                neighbor_chars.append(row[i - 1])
            if i < len(row) - 1:
                neighbor_chars.append(row[i + 1])
            neighbors[char] = ''.join(neighbor_chars)

    return neighbors

QWERTY_NEIGHBORS = build_qwerty_neighbors()


In [None]:
#INTRODUCE TYPOS
def typo_char(c):
    if c.lower() in QWERTY_NEIGHBORS:
        return random.choice(QWERTY_NEIGHBORS[c.lower()])
    return c

In [None]:
def add_noise(text, noise_fraction):
    words = text.split()
    noisy_words = words.copy()

    n_total = len(words)
    n_to_noise = max(1, int(noise_fraction * n_total))
    noise_indices = random.sample(range(n_total), n_to_noise)

    for idx in noise_indices:
        word = noisy_words[idx]
        noise_type = random.choice(["delete_word","punctuation_insert", "char_noise"])

        if noise_type == "delete_word":
            noisy_words[idx] = ""
        elif noise_type == "punctuation_insert":
          punct = random.choice([".", ",", "!", "?", ";"])
          insert_pos = random.randint(0, len(word))
          noisy_words[idx] = word[:insert_pos] + punct + word[insert_pos:]
        elif noise_type == "char_noise":
            corruption_type = random.choice(["replace", "shuffle", "delete_char", "add_char", "typo_char"])
            if corruption_type == "replace":
                noisy_words[idx] = ''.join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in word)
            elif corruption_type == "shuffle":
                chars = list(word)
                random.shuffle(chars)
                noisy_words[idx] = ''.join(chars)
            elif corruption_type == "delete_char":
                noisy_words[idx] = ''.join(char for char in word if char != random.choice(word))
            elif corruption_type == "add_char":
                noisy_words[idx] = word + random.choice("abcdefghijklmnopqrstuvwxyz")
            elif corruption_type == "typo_char":
                noisy_words[idx] = ''.join(typo_char(char) for char in word)

    # Remove deleted words
    noisy_words = [w for w in noisy_words if w != ""]
    return " ".join(noisy_words)

In [None]:
def retrieve_data(max_length, add_text_noise, noise_level):
    dataset = load_dataset("wmt14", "de-en")

    raw_subset = dataset["train"].select(range(200000))

    def is_short(example):
        return len(example["translation"]["de"].split()) <= max_length and len(example["translation"]["en"].split()) <= max_length

    filtered = raw_subset.filter(is_short)

    if add_text_noise:
        def apply_noise(example):
            example["translation"]["en"] = add_noise(example["translation"]["en"], noise_level)
            return example

        filtered = filtered.map(apply_noise)

    train_data = filtered.select(range(50000))
    val_data = filtered.select(range(50000, 53000))
    test_data = filtered.select(range(53000, 56000))

    return {
        "train": train_data,
        "validation": val_data,
        "test": test_data
    }

In [None]:
def translated(n, model):
    return model.translate_text(n)

In [None]:
# pip install -U datasets fsspec huggingface_hub


In [None]:
data = retrieve_data(max_length=50,add_text_noise=False, noise_level=0.0)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/280M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/265M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/273M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/474k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/509k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4508785 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3003 [00:00<?, ? examples/s]

Filter:   0%|          | 0/200000 [00:00<?, ? examples/s]

In [None]:
print(data["train"].shape)
print(data["test"].shape)
print(data["validation"].shape)

(50000, 1)
(3000, 1)
(3000, 1)


In [None]:
data["validation"]["translation"][:10]

[{'de': 'Der Europäische Forschungsraum ...', 'en': 'So the area ...'},
 {'de': '(Der Präsident entzieht dem Redner das Wort.)',
  'en': '(The President cut the speaker off)'},
 {'de': 'Herr Präsident, ich möchte Frau Plooij-van Gorsel danken, denn sie hat wieder einmal einen ausgezeichneten Bericht vorgelegt.',
  'en': 'Mr President, I wish to thank Mrs Plooij-Van Gorsel for her usual very good report.'},
 {'de': 'Ich danke Herrn Busquin, der diese Debatte in Gang gesetzt und dafür den richtigen Zeitpunkt gewählt hat.',
  'en': 'I thank the Commissioner, whose timing is very good and so is his idea of launching this debate.'},
 {'de': 'Tatsächlich wächst das Unbehagen über die mangelnde Koordinierung im europäischen Wissenschafts- und Forschungsbereich.',
  'en': 'There is indeed a growing feeling of unease at the lack of coordination in European science and research.'},
 {'de': 'Die Reaktionen sind positiv, wie Herr Busquin aufgrund seiner Kontakte mit den entsprechenden Stellen in E

In [None]:
data["test"]["translation"][:10]

[{'de': 'In der französischen Version kommt der Begriff "délit " in der englischen dagegen der Begriff "crime " vor.',
  'en': 'The French version contains the word "délit" , whereas this is written as "crime" in the English version.'},
 {'de': 'Wie Sie wissen, hat der französische Begriff nicht dieselbe Bedeutung.',
  'en': 'As you know, Madam President, this does not have the same meaning in French.'},
 {'de': 'Mit dem Änderungsantrag soll diese Version berichtigt werden.',
  'en': 'The amendment seeks to correct this.'},
 {'de': 'Ich habe ihn zwar nicht eingereicht, aber ich denke, dies ist sein Zweck.',
  'en': 'I am not the one who tabled the amendment, but it does seem to me that this is the point of it.'},
 {'de': 'Darauf wollte ich hinweisen.', 'en': 'I wanted to point this out.'},
 {'de': 'Zum Änderungsantrag 5:', 'en': 'Relating to Amendment No 5'},
 {'de': 'Frau Präsidentin! In den Gesprächen mit Frau Martens von der PPE-Fraktion haben wir beschlossen, den Änderungsantrag zu

In [None]:
data["train"]["translation"][:10]

[{'de': 'Wiederaufnahme der Sitzungsperiode',
  'en': 'Resumption of the session'},
 {'de': 'Ich erkläre die am Freitag, dem 17. Dezember unterbrochene Sitzungsperiode des Europäischen Parlaments für wiederaufgenommen, wünsche Ihnen nochmals alles Gute zum Jahreswechsel und hoffe, daß Sie schöne Ferien hatten.',
  'en': 'I declare resumed the session of the European Parliament adjourned on Friday 17 December 1999, and I would like once again to wish you a happy new year in the hope that you enjoyed a pleasant festive period.'},
 {'de': 'Wie Sie feststellen konnten, ist der gefürchtete "Millenium-Bug " nicht eingetreten. Doch sind Bürger einiger unserer Mitgliedstaaten Opfer von schrecklichen Naturkatastrophen geworden.',
  'en': "Although, as you will have seen, the dreaded 'millennium bug' failed to materialise, still the people in a number of countries suffered a series of natural disasters that truly were dreadful."},
 {'de': 'Im Parlament besteht der Wunsch nach einer Aussprache im

In [None]:
from transformers import MarianMTModel, MarianTokenizer
import torch

model_name1 = "Helsinki-NLP/opus-mt-en-de"
marian_tokenizer = MarianTokenizer.from_pretrained(model_name1)
marian_model = MarianMTModel.from_pretrained(model_name1)

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.27M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [None]:
from transformers import M2M100Model, M2M100Tokenizer, M2M100ForConditionalGeneration

model_name2 = "facebook/m2m100_418M"
m2m100_tokenizer = M2M100Tokenizer.from_pretrained(model_name2)
m2m100_model =  M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")

tokenizer_config.json:   0%|          | 0.00/298 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/298M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/908 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

In [None]:
english = [n["en"] for n in data["train"]["translation"]]
german = [n["de"] for n in data["train"]["translation"]]

In [None]:
data

{'train': Dataset({
     features: ['translation'],
     num_rows: 50000
 }),
 'validation': Dataset({
     features: ['translation'],
     num_rows: 3000
 }),
 'test': Dataset({
     features: ['translation'],
     num_rows: 3000
 })}

In [None]:
def translate_with_model(model, tokenizer, src_texts, batch_size=16, device="cuda"):
    """
    Translates a list of source texts using the specified model and tokenizer.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    predictions = []

    for i in tqdm(range(0, len(src_texts), batch_size), desc="Translating"):
        batch = src_texts[i:i + batch_size]
        inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=60)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model.generate(**inputs, num_beams=4, max_length=60, early_stopping=True)
        preds = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        predictions.extend(preds)

    return predictions

In [None]:
m2m100_tokenizer.src_lang = "en"
m2m100_tokenizer.tgt_lang = "de"

def preprocess_marian(batch):
    src_texts = [ex["en"] for ex in batch["translation"]]
    tgt_texts = [ex["de"] for ex in batch["translation"]]

    model_inputs_marian = marian_tokenizer(
        src_texts,
        truncation=True,
        padding="max_length",
        max_length=40
    )

    with marian_tokenizer.as_target_tokenizer():
        labels = marian_tokenizer(
            tgt_texts,
            truncation=True,
            padding="max_length",
            max_length=40
        )["input_ids"]

    labels = [
        [(token if token != marian_tokenizer.pad_token_id else -100) for token in seq]
        for seq in labels
    ]

    model_inputs_marian["labels"] = labels
    return model_inputs_marian


In [None]:
def preprocess_m2m100(batch):
    src_texts = [ex["en"] for ex in batch["translation"]]
    tgt_texts = [ex["de"] for ex in batch["translation"]]

    model_inputs_m2m100 = m2m100_tokenizer(
        src_texts,
        truncation=True,
        padding="max_length",
        max_length=40
    )

    with m2m100_tokenizer.as_target_tokenizer():
        labels = m2m100_tokenizer(
            tgt_texts,
            truncation=True,
            padding="max_length",
            max_length=40
        )["input_ids"]

    labels = [
        [(token if token != m2m100_tokenizer.pad_token_id else -100) for token in seq]
        for seq in labels
    ]

    model_inputs_m2m100["labels"] = labels
    return model_inputs_m2m100


In [None]:
tokenized_data_marian = {
    "train": data["train"].map(preprocess_marian, batched = True),
    "validation": data["validation"].map(preprocess_marian, batched=True)
}


Map:   0%|          | 0/50000 [00:00<?, ? examples/s]



Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [None]:
tokenized_data_m2m100 = {
    "train": data["train"].map(preprocess_m2m100, batched = True),
    "validation": data["validation"].map(preprocess_m2m100, batched=True)
}


Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [None]:
for i in range(5):
    data = retrieve_data(max_length=50, add_text_noise=False, noise_level=0.0)
    original = data["test"][i]["translation"]["en"]
    reference = data["test"][i]["translation"]["de"]

    noised_input = add_noise(original, 1.0)

    m2m100_tokenizer.src_lang = "en"
    device = 'cuda'
    encoded = m2m100_tokenizer(noised_input, return_tensors="pt", padding=True).to(device)
    forced_lang_id = m2m100_tokenizer.get_lang_id("de")

    generated = m2m100_model.generate(
        **encoded,
        forced_bos_token_id=forced_lang_id
    )

    m2m_translation = m2m100_tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
    marian_translation = translate_with_model(marian_model, marian_tokenizer, [noised_input])[0]
    #m2m_translation = translate_with_model(m2m100_model, m2m100_tokenizer, [noised_input])[0]

    # Print results
    print(f"Original EN     : {original}")
    print(f"Noised EN       : {noised_input}")
    print(f"Target DE       : {reference}")
    print(f"MarianMT Output : {marian_translation}")
    print(f"M2M100 Output   : {m2m_translation}")
    print("-" * 60)

Translating: 100%|██████████| 1/1 [00:00<00:00,  3.46it/s]


Original EN     : The French version contains the word "délit" , whereas this is written as "crime" in the English version.
Noised EN       : nxk F.rench vers;ion contain!s yvis whereas; thi is ritten "crime". wmfkudg
Target DE       : In der französischen Version kommt der Begriff "délit " in der englischen dagegen der Begriff "crime " vor.
MarianMT Output : nxk F.rench vers; ion enthalten yvis, in der Erwägung, daß es sich hierbei um "Verbrechen" handelt.
M2M100 Output   : nxk F.rench vers;ion enthält!s yvis statt; thi ist ritten "Verbrechen". wmfkudg
------------------------------------------------------------


Translating: 100%|██████████| 1/1 [00:00<00:00,  7.84it/s]


Original EN     : As you know, Madam President, this does not have the same meaning in French.
Noised EN       : ;you know?, Preside?nt, do.es !not gsbw s;ame nrsbomh French.?
Target DE       : Wie Sie wissen, hat der französische Begriff nicht dieselbe Bedeutung.
MarianMT Output : Der Präsident. — Das Wort hat die Fraktion der Europäischen Demokraten.
M2M100 Output   : Du weißt?, Präsident?nt.es nicht gsbw s;ame nrsbomh französisch?
------------------------------------------------------------


Translating: 100%|██████████| 1/1 [00:00<00:00, 10.26it/s]


Original EN     : The amendment seeks to correct this.
Noised EN       : The am;endment to thi?s.
Target DE       : Mit dem Änderungsantrag soll diese Version berichtigt werden.
MarianMT Output : Die Am;endment zu Thi?s.
M2M100 Output   : Die am;endment zu thi?s.
------------------------------------------------------------


Translating: 100%|██████████| 1/1 [00:00<00:00,  3.13it/s]


Original EN     : I am not the one who tabled the amendment, but it does seem to me that this is the point of it.
Noised EN       : I no.t the; oneh who. tabledo amendment,w it? do!es seem! hu .me is theu point, pd i?t.
Target DE       : Ich habe ihn zwar nicht eingereicht, aber ich denke, dies ist sein Zweck.
MarianMT Output : Ich nicht, die; einh wer.Tabledo Änderung,w it? do!es scheinen! hu .me ist der Punkt, pd i?t.
M2M100 Output   : Ich bin nicht der; oneh wer. tabledo Änderung,w es? do!es scheint! hu.me ist dein Punkt, pd i?t.
------------------------------------------------------------


Translating: 100%|██████████| 1/1 [00:00<00:00, 23.28it/s]

Original EN     : I wanted to point this out.
Noised EN       : ated of points t?his
Target DE       : Darauf wollte ich hinweisen.
MarianMT Output : Anzahl der Punkte
M2M100 Output   : mit den Punkten t?his
------------------------------------------------------------





In [None]:
import evaluate
test_subset = data["test"].select(range(3000))
src_texts = [ex["translation"]["en"] for ex in data["test"]]
references = [[ex["translation"]["de"]] for ex in test_subset]

marian_predictions = translate_with_model(marian_model, marian_tokenizer, src_texts)

m2m_predictions = translate_with_model(m2m100_model, m2m100_tokenizer, src_texts)

bleu = evaluate.load("bleu")

models = [marian_model, m2m100_model]
tokenizers = [marian_tokenizer, m2m100_tokenizer]

for i, (model, tokenizer) in enumerate(zip(models, tokenizers), start=1):
    predictions = translate_with_model(model, tokenizer, src_texts)
    references = [[ref] for ref in references]

    bleu_score = bleu.compute(predictions=predictions, references=references)
    print(f"Model {i} BLEU Score: {bleu_score['bleu']:.4f}")

Translating:   2%|▏         | 3/188 [00:02<02:12,  1.40it/s]


KeyboardInterrupt: 

In [None]:
references[:10]

In [None]:
marian_predictions[:10]


In [None]:
m2m_predictions[:10]

In [None]:
import evaluate
meteor = evaluate.load("meteor")
bertscore = evaluate.load("bertscore")

In [None]:
bleu_score_marian= bleu.compute(predictions=marian_predictions, references=references)
meteor_score_marian = meteor.compute(predictions=marian_predictions, references=[r[0] for r in references])
bert_score_marian = bertscore.compute(predictions=marian_predictions, references=[r[0] for r in references], lang="de")


In [None]:
bleu_score_m2m100= bleu.compute(predictions=m2m_predictions, references=references)
meteor_score_m2m100 = meteor.compute(predictions=m2m_predictions, references=[r[0] for r in references])
bert_score_m2m100 = bertscore.compute(predictions=m2m_predictions, references=[r[0] for r in references], lang="de")

In [None]:
def get_scores(bleu_score, meteor_score, bert_score):
    """Extracts and returns the scores as a dictionary."""
    bert_precision = sum(bert_score['precision']) / len(bert_score['precision'])
    bert_recall = sum(bert_score['recall']) / len(bert_score['recall'])
    bert_f1 = sum(bert_score['f1']) / len(bert_score['f1'])
    return {
        'BLEU': bleu_score['bleu'],
        'METEOR': meteor_score['meteor'],
        'Precision(BERT)': bert_precision,
        'Recall(BERT)': bert_recall,
        'F1(BERT)': bert_f1
    }

marian_scores = get_scores(bleu_score_marian, meteor_score_marian, bert_score_marian)
m2m100_scores = get_scores(bleu_score_m2m100, meteor_score_m2m100, bert_score_m2m100)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

data = pd.DataFrame({
    'Metric': marian_scores.keys(),
    'MarianMT': marian_scores.values(),
    'M2M100': m2m100_scores.values()
})

sns.set(style="white", context="talk")
palette = sns.color_palette("viridis", len(data))

plt.figure(figsize=(12, 6))
data_melted = data.melt(id_vars='Metric', var_name='Model', value_name='Score')
ax = sns.barplot(x='Metric', y='Score', hue='Model', data=data_melted, palette='viridis')

for i, bar in enumerate(ax.patches):
    ax.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height() + 0.02,
        f"{bar.get_height():.4f}",
        ha='center', va='bottom', fontsize=10
    )

plt.title("Evaluation Metrics Comparison with 20% noise", fontsize=18, pad=20)
plt.ylim(0, 1.1)
plt.ylabel("Score", fontsize=14)
plt.xlabel("")
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
sns.despine()
ax.yaxis.grid(True, linestyle='--', alpha=0.7)
ax.set_axisbelow(True)
plt.tight_layout()
plt.legend(title="Model", fontsize=12)
plt.show()

In [None]:
import pandas as pd
noise_levels = np.linspace(0.0, 1.0, 11)
marian_results = []

for noise in noise_levels:
    print(f"MarianMT: Evaluating at {int(noise * 100)}% noise...")

    data = retrieve_data(max_length = 50, add_text_noise=True, noise_level=noise)

    src_texts = [ex["translation"]["en"] for ex in data["test"]]
    tgt_texts = [ex["translation"]["de"] for ex in data["test"]]

    marian_predictions = translate_with_model(marian_model, marian_tokenizer, src_texts)

    bert_score_marian = bertscore.compute(predictions=marian_predictions, references=tgt_texts, lang="de")
    bert_f1_marian = sum(bert_score_marian['f1']) / len(bert_score_marian['f1'])

    marian_results.append({'Noise Level': noise, 'F1(BERT)': bert_f1_marian})

    marian_df = pd.DataFrame(marian_results)
    marian_df["Noise %"] = marian_df["Noise Level"] * 100


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
plt.figure(figsize=(8, 5))
sns.lineplot(data=marian_df, x="Noise %", y="F1(BERT)", marker="o", color="blue")
plt.title("MarianMT BERT F1 Score vs Noise Level")
plt.xlabel("Noise Level")
plt.ylabel("BERT F1 Score")
plt.ylim(0, 1)
plt.show()


In [None]:
m2m_results = []
noise_levels = np.linspace(0.0, 1.0, 11)

for noise in noise_levels:
    print(f"M2M100: Evaluating at {int(noise * 100)}% noise...")

    data = retrieve_data(max_length = 50, add_text_noise=True, noise_level=noise)

    src_texts = [ex["translation"]["en"] for ex in data["test"]]
    tgt_texts = [ex["translation"]["de"] for ex in data["test"]]

    m2m_predictions = translate_with_model(m2m100_model, m2m100_tokenizer, src_texts)

    bert_score_m2m100 = bertscore.compute(predictions=m2m_predictions, references=tgt_texts, lang="de")
    bert_f1_m2m100 = sum(bert_score_m2m100['f1']) / len(bert_score_m2m100['f1'])

    m2m_results.append({'Noise Level': noise, 'F1(BERT)': bert_f1_m2m100})

m2m_df = pd.DataFrame(m2m_results)
m2m_df["Noise %"] = m2m_df["Noise Level"] * 100


In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(8, 5))
sns.lineplot(data=m2m_df, x="Noise %", y="F1(BERT)", marker="o", color="blue")
plt.title("M2M100 BERT F1 Score vs Noise Level")
plt.xlabel("Noise Level")
plt.ylabel("BERT F1 Score")
plt.ylim(0, 1)
plt.show()
