In [None]:
!pip install transformers sentencepiece



In [None]:
#!pip install transformers sentencepiece --quiet

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load tokenizer and model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


lang_code_map = {
    'telugu': 'tel_Telu',
    'tamil': 'tam_Taml'
}

def translate_nllb(text, target_language):
    if target_language not in lang_code_map:
        return "Unsupported language"

    src_lang = "eng_Latn"
    tgt_lang = lang_code_map[target_language]


    tokenizer.src_lang = src_lang


    encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)


    bos_token_id = tokenizer.convert_tokens_to_ids(tgt_lang)


    generated_tokens = model.generate(
        **encoded,
        forced_bos_token_id=bos_token_id,
        max_length=512,  # Increased from 128
        min_length=5,    # Ensure minimum output length
        num_beams=4,     # Use beam search for better quality
        early_stopping=True,
        do_sample=False,
        temperature=1.0,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )


    translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]


    if translated.startswith(text):
        translated = translated[len(text):].strip()

    return translated


english_input = input("Enter English sentence: ")
language_choice = input("Translate to (telugu/tamil): ").lower()


translated_output = translate_nllb(english_input, language_choice)
print(f"\nOriginal (English): {english_input}")
print(f"Translated ({language_choice}): {translated_output}")



Enter English sentence: where are you?
Translate to (telugu/tamil): telugu

Original (English): where are you?
Translated (telugu): మీరు ఎక్కడ ఉన్నారు?
