In [None]:
# Install the necessary libraries
!pip install --quiet transformers gradio sentencepiece

# Import the required libraries
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Define the supported languages and their codes for the mBART model
LANG_CODES = {
    "English": "en_XX",
    "French": "fr_XX",
    "German": "de_DE",
    "Spanish": "es_XX",
    "Italian": "it_IT",
    "Russian": "ru_RU",
    "Hindi": "hi_IN"
}
LANGUAGES = list(LANG_CODES.keys())

# Load the model and tokenizer

model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("Model and tokenizer loaded successfully!")

# Define the translation function
def translate_text_mbart(text_to_translate, source_lang, target_lang):
    """
    Translates text using the mBART model.
    The function explicitly sets the language tokens required by the model.
    """
    if source_lang == target_lang:
        return text_to_translate

    # Get the language codes from our dictionary
    src_code = LANG_CODES[source_lang]
    tgt_code = LANG_CODES[target_lang]

    # Set the source language on the tokenizer
    tokenizer.src_lang = src_code

    # Tokenize the input text
    encoded_text = tokenizer(text_to_translate, return_tensors="pt")

    # Generate the translated tokens using the model
    # The `forced_bos_token_id` parameter tells the model which language to translate to.
    forced_bos_token_id = tokenizer.lang_code_to_id[tgt_code]
    generated_tokens = model.generate(
        **encoded_text,
        forced_bos_token_id=forced_bos_token_id
    )

    # Decode the generated tokens to get the final translated text
    translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

    return translated_text[0]

# 4. Create and launch the Gradio interface
if 'model' in locals():
    demo = gr.Interface(
        fn=translate_text_mbart,
        inputs=[
            gr.Textbox(lines=5, label="Input Text"),
            gr.Dropdown(choices=LANGUAGES, value="English", label="Source Language"),
            gr.Dropdown(choices=LANGUAGES, value="French", label="Target Language")
        ],
        outputs=[
            gr.Textbox(lines=5, label="Translated Text")
        ],
        title="Multi-to-Multi Language Translator (mBART-50)",
        description="Translate text between 50 languages using the powerful mBART-50 model."
    )

    # The `share=True` parameter creates a public URL for your app
    demo.launch(share=True)
else:
    print("Gradio interface not created because the model failed to load.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/529 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

Model and tokenizer loaded successfully!
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://94ca0ade67ab20a069.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
