In [None]:
! pip install transformers
! pip install sentencepiece

In [13]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

In [14]:
def translator(input_sentence, input_lang_code, output_lang_code, model=model,tokenizer=tokenizer):
  # Assign Input Language to the tokenizer
  tokenizer.src_lang = input_lang_code

  # Encode Input Sentence
  encoded_input = tokenizer(input_sentence, return_tensors="pt")

  # Generate Output Tokens
  generated_tokens = model.generate(**encoded_input, forced_bos_token_id=tokenizer.lang_code_to_id[output_lang_code])

  # Convert Tokens to Sequence
  output_sentence = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

  return output_sentence

In [15]:
lang_name_to_code = {'Arabic' : 'ar_AR', 
                 'Czech' : 'cs_CZ', 
                 'German' : 'de_DE', 
                 'English' : 'en_XX', 
                 'Spanish' : 'es_XX', 
                 'Estonian' : 'et_EE', 
                 'Finnish' : 'fi_FI', 
                 'French' : 'fr_XX', 
                 'Gujrati' : 'gu_IN', 
                 'Hindi' : 'hi_IN', 
                 'Italian' : 'it_IT', 
                 'Japanese' : 'ja_XX', 
                 'Kazakh' : 'kk_KZ', 
                 'Korean' : 'ko_KR', 
                 'Lithuanian' : 'lt_LT', 
                 'Latvian' : 'lv_LV', 
                 'Burmese' : 'my_MM', 
                 'Nepali' : 'ne_NP', 
                 'Dutch' : 'nl_XX', 
                 'Romanian' : 'ro_RO', 
                 'Russian' : 'ru_RU', 
                 'Sinhalese' : 'si_LK', 
                 'Turkish' : 'tr_TR', 
                 'Vietnamese' : 'vi_VN', 
                 'Chinese' : 'zh_CN'}

lang_code_to_name = {}
for k,v in lang_name_to_code.items():
  lang_code_to_name[v] = k

In [16]:
inputs = [
          ['Yo! How are you?', 'English', 'Hindi'],
          ['Comment ça va?', 'French', 'Chinese'],
          ['come va?', 'Italian', 'Turkish'],
          ['Қалайсыз?', 'Kazakh', 'Finnish'],
          ['잘 지내고 있나요?', 'Korean', 'German']
]

In [17]:
for inp in inputs:
  sent = inp[0]
  input_ln = inp[1]
  output_ln = inp[2]
  input_lc = lang_name_to_code[input_ln]
  output_lc = lang_name_to_code[output_ln]

  print(f'Translating from {input_ln} to {output_ln}')
  print('Input Sentence : ', sent)
  output_sent = translator(sent, input_lc, output_lc)
  print('Output Sentence : ', output_sent)

Translating from English to Hindi
Input Sentence :  Yo! How are you?
Output Sentence :  ['तुम कैसे हो?']
Translating from French to Chinese
Input Sentence :  Comment ça va?
Output Sentence :  ['如何?']
Translating from Italian to Turkish
Input Sentence :  come va?
Output Sentence :  ['Ne var?']
Translating from Kazakh to Finnish
Input Sentence :  Қалайсыз?
Output Sentence :  ['Ei?']
Translating from Korean to German
Input Sentence :  잘 지내고 있나요?
Output Sentence :  ['Sind Sie gesund?']
