In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langdetect import detect

# Model Setup

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained('facebook/nllb-200-distilled-600M')
tokenizer = AutoTokenizer.from_pretrained('facebook/nllb-200-distilled-600M')

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Text input for sentence
text_input = widgets.Text(
    value='My name is Tanish and i am from chhatisgarh.',
    description='Input:',
    layout=widgets.Layout(width='100%')
)

# Dropdown for language selection
lang_dropdown = widgets.Dropdown(
    options=[('Hindi', 'hin_Deva'), ('Maithili', 'mai_Deva')],
    value='hin_Deva',
    description='Language:'
)

# Output area
output = widgets.Output()

# Button to trigger translation
button = widgets.Button(description="Translate")

def on_button_clicked(b):
    output.clear_output()
    with output:
        translated = translate(text_input.value, target_lang=lang_dropdown.value)
        print(f"Translated Text ({lang_dropdown.label}):\n{translated}")

button.on_click(on_button_clicked)

# Display all widgets
display(text_input, lang_dropdown, button, output)


Text(value='My name is Tanish and i am from chhatisgarh.', description='Input:', layout=Layout(width='100%'))

Dropdown(description='Language:', options=(('Hindi', 'hin_Deva'), ('Maithili', 'mai_Deva')), value='hin_Deva')

Button(description='Translate', style=ButtonStyle())

Output()

In [4]:
from langdetect import detect

def translate(input_text, src_lang=None, target_lang='hin_Deva'):
    # Auto-detect source language if not provided
    if src_lang is None:
        detected_lang = detect(input_text)
        # Map langdetect code to NLLB code
        lang_map = {
            'en': 'eng_Latn',
            'hi': 'hin_Deva',
            'es': 'spa_Latn',
            'fr': 'fra_Latn',
            'de': 'deu_Latn',
            'mai': 'mai_Deva',
            # Add more mappings as needed
        }
        src_lang = lang_map.get(detected_lang, 'eng_Latn')
    translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=target_lang, max_length=400)
    return translator(input_text)[0]['translation_text']

# Translation

In [5]:
input_text = "Gracias."

hindi = translate(input_text, target_lang='hin_Deva')
maithili = translate(input_text, target_lang='mai_Deva')

Device set to use mps:0
Device set to use mps:0


# Output

In [6]:
print(f"Hindi translation: {hindi}\n")
print(f"Maithili translation: {maithili}\n")

Hindi translation: बहुत बहुत धन्यवाद

Maithili translation: आभारी छी।

