At first, let's use Helsinki-NLP for translation on Ukrainian

In [8]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

def translate(text, model_name="Helsinki-NLP/opus-mt-en-uk"):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    inputs = tokenizer.encode(text, return_tensors="pt", truncation=True)

    translated = model.generate(inputs, num_beams=4, early_stopping=True)

    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    return translated_text


In [9]:
text_to_translate = "The final courses task is finished!"
translated_text = translate(text_to_translate, model_name="Helsinki-NLP/opus-mt-en-uk")

In [10]:
print("Ukrainian Translation:", translated_text)

Ukrainian Translation: Остаточне завдання курсу завершено!


Now it would be interesting to try T5 model to translate on Romanian and French languages

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_name = "t5-base"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

def translate(input_text, language):

    input_text_with_lang_token = f"translate English to {language}: " + input_text
    input_ids = tokenizer.encode(input_text_with_lang_token, return_tensors="pt")

    translated = model.generate(input_ids, num_return_sequences=1, early_stopping=True)

    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
english_text = "The final task of the courses is finished!"
romanian_translation = translate(english_text, "Romanian")
french_translation = translate(english_text, "French")

In [None]:
print("Romanian Translation:", romanian_translation)
print("French Translation:", french_translation)

Romanian Translation: Sarcina finală a cursurilor s-a încheiat!
French Translation: La dernière tâche des cours est terminée!
