In [8]:
from transformers import MarianMTModel, MarianTokenizer

# Load models for English → French and French → English
en_to_fr_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
en_to_fr_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-fr")

fr_to_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en")
fr_to_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en")

def translate(text, tokenizer, model):
    inputs = tokenizer.encode(text, return_tensors="pt")
    outputs = model.generate(inputs, max_length=64, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def back_translate(text, en_to_fr_tokenizer, en_to_fr_model, fr_to_en_tokenizer, fr_to_en_model):
    """
    Performs back-translation from English to French and then back to English.

    Args:
        text (str): The input English text.
        en_to_fr_tokenizer: Tokenizer for English to French translation.
        en_to_fr_model: Model for English to French translation.
        fr_to_en_tokenizer: Tokenizer for French to English translation.
        fr_to_en_model: Model for French to English translation.

    Returns:
        str: The back-translated English text.
    """
    fr_text = translate(text, en_to_fr_tokenizer, en_to_fr_model)
    back_to_en = translate(fr_text, fr_to_en_tokenizer, fr_to_en_model)
    return back_to_en




In [9]:
meetings = [
    "project backlog refinement",
    "planning sprint",
    "sprint retrospective session",
    "designing system architecture",
    "project discussion",
    "soda",
    "1:1 with manager",
    "attending a meeting",
    "developer catchup",
    "daily standup",
    "daily meeting",
    "internal meeting",
    "running daily standup",
    "meeting with mr colleague",
    "meet with mrs jane",
    "daily scrum meeting",
    "brainstorming session"
]

In [10]:
text = "project backlog refinement"
fr_text = translate(text, en_to_fr_tokenizer, en_to_fr_model)
back_to_en = translate(fr_text, fr_to_en_tokenizer, fr_to_en_model)

print(back_to_en)

Improvement of project backlog


In [11]:
def back_translate(text):
    """
    Performs back-translation from English to French and then back to English.

    Returns:
        str: The back-translated English text.
    """
    fr_text = translate(text, en_to_fr_tokenizer, en_to_fr_model)
    back_to_en = translate(fr_text, fr_to_en_tokenizer, fr_to_en_model)
    return back_to_en

In [12]:
back_translated_meetings = [back_translate(meeting) for meeting in meetings]
display(back_translated_meetings)

['Improvement of project backlog',
 'planning sprint',
 'retrospective sprint session',
 'design of the system architecture',
 'Discussion on the draft',
 'Soda',
 '1:1 with manager',
 'Participation in a meeting',
 "the developer's catch-up",
 'daily standup',
 'Daily meeting',
 'internal meeting',
 'holding daily standups',
 'meeting with Mr.',
 'meet Ms. Jane',
 'Daily meeting',
 'brainstorming session']