## Backtranslation for Data Augmentation

In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p transformers

Author: Sebastian Raschka

Python implementation: CPython
Python version       : 3.10.6
IPython version      : 8.12.0

transformers: 4.27.2



In [7]:
from transformers import MarianMTModel, MarianTokenizer

def back_translate(text):
    # English to German
    en_to_de_model_name = "Helsinki-NLP/opus-mt-en-de"
    en_to_de_tokenizer = MarianTokenizer.from_pretrained(en_to_de_model_name)
    en_to_de_model = MarianMTModel.from_pretrained(en_to_de_model_name)
    
    inputs = en_to_de_tokenizer([text], return_tensors="pt")
    translated_german_tokens = en_to_de_model.generate(**inputs)
    translated_german_text = en_to_de_tokenizer.decode(translated_german_tokens[0], skip_special_tokens=True)
    
    # German to English
    de_to_en_model_name = 'Helsinki-NLP/opus-mt-de-en'
    de_to_en_tokenizer = MarianTokenizer.from_pretrained(de_to_en_model_name)
    de_to_en_model = MarianMTModel.from_pretrained(de_to_en_model_name)

    inputs_back = de_to_en_tokenizer([translated_german_text], return_tensors="pt")
    translated_english_tokens = de_to_en_model.generate(**inputs_back)
    translated_back_english_text = de_to_en_tokenizer.decode(translated_english_tokens[0], skip_special_tokens=True)

    return translated_german_text, translated_back_english_text

In [8]:
text = ("Despite the intermittent rain showers, "
        "Amelia decided to venture outside with "
        "her new umbrella, hoping to enjoy the fresh "
        "air and perhaps bump into some old friends "
        "at the local café down the street."
       )

translated_text, back_translated_text = back_translate(text)

print("Original text:")
print(text)
print("--------------------------")

print("Translated text:")
print(translated_text)
print("--------------------------")
    
print("Backtranslated text:")
print(back_translated_text)
print("--------------------------")

Original text:
Despite the intermittent rain showers, Amelia decided to venture outside with her new umbrella, hoping to enjoy the fresh air and perhaps bump into some old friends at the local café down the street.
--------------------------
Translated text:
Trotz der periodischen Regenschauer entschied sich Amelia, sich mit ihrem neuen Regenschirm nach draußen zu wagen, in der Hoffnung, die frische Luft zu genießen und vielleicht einige alte Freunde im örtlichen Café auf der Straße zu treffen.
--------------------------
Backtranslated text:
Despite the periodic rain showers, Amelia decided to venture outside with her new umbrella, hoping to enjoy the fresh air and perhaps meet some old friends in the local café on the street.
--------------------------


In [11]:
import difflib


d = difflib.Differ()
diff = d.compare(text.split(), 
                 back_translated_text.split())

print('\n'.join(diff))

  Despite
  the
- intermittent
+ periodic
  rain
  showers,
  Amelia
  decided
  to
  venture
  outside
  with
  her
  new
  umbrella,
  hoping
  to
  enjoy
  the
  fresh
  air
  and
  perhaps
+ meet
- bump
- into
  some
  old
  friends
- at
+ in
  the
  local
  café
- down
+ on
  the
  street.
