<a href="https://colab.research.google.com/github/nithyasri1009/NLP/blob/main/AutoTrans.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:

!pip install translators
!pip install langdetect
!pip install

import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
from typing import Optional
import translators as ts
from langdetect import detect

# Supported language pairs for MarianMT
SUPPORTED_PAIRS = {
    # English to Others
    "en": {
        "es": "Helsinki-NLP/opus-mt-en-es",
        "fr": "Helsinki-NLP/opus-mt-en-fr",
        "de": "Helsinki-NLP/opus-mt-en-de",
        "hi": "Helsinki-NLP/opus-mt-en-hi",
        "ru": "Helsinki-NLP/opus-mt-en-ru",
        "zh": "Helsinki-NLP/opus-mt-en-zh",
        "ja": "Helsinki-NLP/opus-mt-en-ja",
        "ar": "Helsinki-NLP/opus-mt-en-ar"
    },
    # Others to English
    "es": {"en": "Helsinki-NLP/opus-mt-es-en"},
    "fr": {"en": "Helsinki-NLP/opus-mt-fr-en"},
    "de": {"en": "Helsinki-NLP/opus-mt-de-en"},
    "hi": {"en": "Helsinki-NLP/opus-mt-hi-en"},
    "ru": {"en": "Helsinki-NLP/opus-mt-ru-en"},
    "zh": {"en": "Helsinki-NLP/opus-mt-zh-en"},
    "ja": {"en": "Helsinki-NLP/opus-mt-ja-en"},
    "ar": {"en": "Helsinki-NLP/opus-mt-ar-en"}
}

# Mapping between langdetect codes and our supported codes
LANG_MAPPING = {
    'en': 'en',
    'es': 'es',
    'fr': 'fr',
    'de': 'de',
    'hi': 'hi',
    'ru': 'ru',
    'zh-cn': 'zh',
    'ja': 'ja',
    'ar': 'ar'
}

def detect_language(text: str) -> str:
    """More reliable language detection using langdetect"""
    try:
        lang = detect(text)
        return LANG_MAPPING.get(lang, 'en')  # Default to English if not in our mapping
    except:
        return 'en'  # Fallback to English

def translate_with_marian(text: str, source_lang: str, target_lang: str) -> Optional[str]:
    """Translate using MarianMT if the language pair is supported"""
    if source_lang not in SUPPORTED_PAIRS or target_lang not in SUPPORTED_PAIRS[source_lang]:
        return None

    try:
        model_name = SUPPORTED_PAIRS[source_lang][target_lang]
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        outputs = model.generate(**inputs)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        print(f"MarianMT error: {e}")
        return None

def translate_text(text: str, source_lang: str, target_lang: str) -> str:
    """Main translation function with proper auto-detection"""
    if not text.strip():
        return "Please enter some text to translate"

    # Handle auto-detection
    if source_lang == "auto":
        detected_lang = detect_language(text)
        print(f"Detected language: {detected_lang}")

        # Special case: if target is same as detected, return original
        if detected_lang == target_lang:
            return "Source and target languages appear to be the same"

        # Try MarianMT if available
        if detected_lang in SUPPORTED_PAIRS and target_lang in SUPPORTED_PAIRS[detected_lang]:
            translation = translate_with_marian(text, detected_lang, target_lang)
            if translation:
                return f"Detected {detected_lang.upper()} → 🤖 MarianMT:\n{translation}"

        # Fallback to online translation
        try:
            translation = ts.translate_text(text, to_language=target_lang)
            return f"Detected {detected_lang.upper()} → 🌐 Online:\n{translation}"
        except Exception as e:
            return f"❌ Translation failed: {str(e)}"

    # Manual language selection
    marian_translation = translate_with_marian(text, source_lang, target_lang)
    if marian_translation:
        return f"🤖 MarianMT Translation:\n{marian_translation}"

    # Fallback to online translation
    try:
        translation = ts.translate_text(text, to_language=target_lang)
        return f"🌐 Online Translation:\n{translation}"
    except Exception as e:
        return f"❌ Translation failed: {str(e)}"

# Gradio Interface
iface = gr.Interface(
    fn=translate_text,
    inputs=[
        gr.Textbox(label="Text", placeholder="Enter text to translate..."),
        gr.Dropdown(
            ["auto", "en", "es", "fr", "de", "hi", "ru", "zh", "ja", "ar"],
            label="Source Language",
            value="auto"
        ),
        gr.Dropdown(
            ["en", "es", "fr", "de", "hi", "ru", "zh", "ja", "ar"],
            label="Target Language",
            value="en"
        )
    ],
    outputs=gr.Textbox(label="Translation Result", lines=5),
    title="🌍 Universal Translator (Working Auto-Detect)",
    description="Accurate language auto-detection with MarianMT and online fallback",
    examples=[
        ["Hello world", "auto", "es"],
        ["Bonjour le monde", "auto", "en"],
        ["自动检测语言", "auto", "fr"],
        ["Привет мир", "auto", "de"]
    ]
)

if __name__ == "__main__":
    iface.launch()

Collecting translators
  Downloading translators-6.0.1-py3-none-any.whl.metadata (70 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/70.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting niquests>=3.14.0 (from translators)
  Downloading niquests-3.14.1-py3-none-any.whl.metadata (16 kB)
Collecting exejs>=0.0.4 (from translators)
  Downloading exejs-0.0.4-py3-none-any.whl.metadata (5.0 kB)
Collecting pathos>=0.3.4 (from translators)
  Downloading pathos-0.3.4-py3-none-any.whl.metadata (11 kB)
Collecting cloudscraper>=1.2.71 (from translators)
  Downloading cloudscraper-1.2.71-py2.py3-none-any.whl.metadata (19 kB)
Collecting urllib3-future<3,>=2.12.900 (from niquests>=3.14.0->translators)
  Downloading urllib3_future-2.13.901-py3-none-any.whl.metadata (15 kB)
Collecting wassima<2,>=1.0.1 (from niquests>=3.14.0->translators)
  Downloading wassim