In [2]:
# STEP 1: Install required libraries
!pip install -q transformers sentencepiece

# STEP 2: Import libraries
from transformers import MarianMTModel, MarianTokenizer
import torch

# STEP 3: Define English input text
english_text = """
Good morning! My name is Alex, and I live in Toronto, Canada. I work as a software engineer, and I love creating new projects using Python and C#.
During the weekends, I like going for hikes and taking photographs of nature.

Canada is a large country with beautiful landscapes. From the Rocky Mountains to the Atlantic coast, there are many amazing places to visit.
In the winter, people often enjoy skiing, snowboarding, and ice skating.

Canadian culture is diverse and welcoming. People from all over the world come to live here, bringing their own traditions and food.
Maple syrup and poutine are two famous Canadian specialties that visitors love to try.

Thank you for reading this passage. Wishing you a wonderful day ahead!
"""

print("ðŸ“¥ Input English Text:\n")
print(english_text)

# STEP 4: Load the English-to-French translation model (ML-based Transformer)
model_name = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# STEP 5: Split long text into manageable chunks (for safety with long input)
def split_text(text, max_length=512):
    sentences = text.split('. ')
    chunks, current_chunk = [], ''
    for sentence in sentences:
        if len(current_chunk + sentence) < max_length:
            current_chunk += sentence + '. '
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + '. '
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

chunks = split_text(english_text)

# STEP 6: Translate all chunks and combine into one final result
translations = []
for chunk in chunks:
    inputs = tokenizer(chunk, return_tensors="pt", truncation=True)
    with torch.no_grad():
        translated = model.generate(**inputs)
    french_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    translations.append(french_text)

# STEP 7: Combine all chunks and show only the final translation
final_translation = "\n\n".join(translations)

print("\nðŸŽ¯ Final Translated French Text:\n")
print(final_translation)


ðŸ“¥ Input English Text:


Good morning! My name is Alex, and I live in Toronto, Canada. I work as a software engineer, and I love creating new projects using Python and C#.
During the weekends, I like going for hikes and taking photographs of nature.

Canada is a large country with beautiful landscapes. From the Rocky Mountains to the Atlantic coast, there are many amazing places to visit.
In the winter, people often enjoy skiing, snowboarding, and ice skating.

Canadian culture is diverse and welcoming. People from all over the world come to live here, bringing their own traditions and food.
Maple syrup and poutine are two famous Canadian specialties that visitors love to try.

Thank you for reading this passage. Wishing you a wonderful day ahead!


ðŸŽ¯ Final Translated French Text:

Bonjour! Je m'appelle Alex, et je vis Ã  Toronto, au Canada. Je travaille comme ingÃ©nieur logiciel, et j'adore crÃ©er de nouveaux projets en utilisant Python et C#. Pendant les week-ends, j'aime aller 