In [None]:
# Install the tools we need: AI model library, tokenizer, and web interface
!pip install transformers sentencepiece gradio




In [None]:
# Import necessary libraries:
# os → for system operations,
# transformers → to use the M2M100 AI translation model,
# gradio → to create a simple web interface
import os
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import gradio as gr


In [None]:
os.environ.pop("HUGGINGFACE_TOKEN", None)

In [None]:
# Function to translate text from source language to target language
def translate(text, src_lang, tgt_lang):
    tokenizer.src_lang = src_lang                        # Set the source language for the tokenizer
    encoded = tokenizer(text, return_tensors="pt")       # Convert input text into model-friendly format (tensor)
    generated_tokens = model.generate(
        **encoded,
        forced_bos_token_id=tokenizer.get_lang_id(tgt_lang)  # Set target language for translation
    )
    translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)  # Convert tokens back to text
    return translated[0]  # Return the translated text as a string


In [None]:
import gradio as gr
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

# ✅ Load tokenizer and model
model_name = "facebook/m2m100_418M"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)

def translate(text, src_lang, tgt_lang):
    # Convert full language names to language codes
    src_lang_abbr = next(abbr for display_name, abbr in src_langs.items() if display_name == src_lang)
    tgt_lang_abbr = next(abbr for display_name, abbr in src_langs.items() if display_name == tgt_lang)

    tokenizer.src_lang = src_lang_abbr
    encoded = tokenizer(text, return_tensors="pt")
    generated_tokens = model.generate(
        **encoded,
        forced_bos_token_id=tokenizer.get_lang_id(tgt_lang_abbr)
    )
    translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    return translated[0]

# Language map: Full names → language codes
src_langs = {
    "English": "en", "Hindi (हिंदी)": "hi", "French (français)": "fr", "German (Deutsch)": "de", "Spanish (español)": "es",
    "Chinese (中文)": "zh", "Japanese (日本語)": "ja", "Korean (한국어)": "ko", "Russian (русский)": "ru", "Arabic (العربية)": "ar",
    "Portuguese (português)": "pt", "Italian (italiano)": "it", "Dutch (Nederlands)": "nl", "Swedish (svenska)": "sv", "Norwegian (norsk)": "no",
    "Danish (dansk)": "da", "Finnish (suomi)": "fi", "Greek (Ελληνικά)": "el", "Turkish (Türkçe)": "tr", "Hebrew (עברית)": "he",
    "Polish (polski)": "pl", "Ukrainian (українська)": "uk", "Hungarian (magyar)": "hu", "Czech (čeština)": "cs", "Slovak (slovenčina)": "sk",
    "Vietnamese (Tiếng Việt)": "vi", "Thai (ไทย)": "th", "Indonesian (Bahasa Indonesia)": "id", "Malay (Bahasa Melayu)": "ms", "Filipino (Tagalog)": "tl",
    "Bengali (বাংলা)": "bn", "Gujarati (ગુજરાતી)": "gu", "Kannada (ಕನ್ನಡ)": "kn", "Malayalam (മലയാളം)": "ml", "Marathi (मराठी)": "mr",
    "Nepali (नेपाली)": "ne", "Oriya (ଓଡ଼ିଆ)": "or", "Punjabi (ਪੰਜਾਬੀ)": "pa", "Tamil (தமிழ்)": "ta", "Telugu (తెలుగు)": "te",
    "Urdu (اردو)": "ur", "Swahili (Kiswahili)": "sw", "Amharic (አማርኛ)": "am", "Yoruba (Yorùbá)": "yo", "Igbo (Igbo)": "ig",
    "Hausa (Hausa)": "ha", "Zulu (isiZulu)": "zu", "Xhosa (isiXhosa)": "xh", "Afrikaans (Afrikaans)": "af", "Albanian (Shqip)": "sq",
    "Armenian (Հայերեն)": "hy", "Azerbaijani (Azərbaycan dili)": "az", "Basque (Euskara)": "eu", "Belarusian (беларуская)": "be", "Bosnian (bosanski)": "bs",
    "Bulgarian (български)": "bg", "Catalan (català)": "ca", "Croatian (hrvatski)": "hr", "Estonian (eesti)": "et", "Georgian (ქართული)": "ka",
    "Irish (Gaeilge)": "ga", "Icelandic (Íslenska)": "is", "Latvian (latviešu)": "lv", "Lithuanian (lietuvių)": "lt", "Macedonian (македонски)": "mk",
    "Maltese (Malti)": "mt", "Norwegian Bokmål (bokmål)": "nb", "Persian (فارسی)": "fa", "Romanian (română)": "ro", "Serbian (српски)": "sr",
    "Slovenian (slovenščina)": "sl"
}

# Optional CSS styling
css = """
#translator {
    background-color: #f0f0f0;
    padding: 30px;
    border-radius: 15px;
    font-family: 'Arial', sans-serif;
}
h1 {
    text-align: center;
    color: #333;
}
label {
    font-weight: bold;
    color: #555;
}
"""

# Build Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🌍 Multi-Language Translator (M2M100)")
    with gr.Row():
        input_text = gr.Textbox(label="Enter text to translate", lines=3)
    with gr.Row():
        src = gr.Dropdown(choices=list(src_langs.keys()), value="English", label="Source Language")
        tgt = gr.Dropdown(choices=list(src_langs.keys()), value="Hindi (हिंदी)", label="Target Language")
    with gr.Row():
        translate_btn = gr.Button("Translate")
    output = gr.Textbox(label="Translation Output", lines=3)

    translate_btn.click(
        translate,
        inputs=[input_text, src, tgt],
        outputs=output,
        api_name="translate"
    )

# Launch the app
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f19f63e12d4fae6c3c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


