In [4]:
!pip install transformers sentence-transformers langdetect chromadb sentencepiece torch babel nltk

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/981.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m17.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting chromadb
  Downloading chromadb-1.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Down

In [7]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
"""
multilingual_chatbot.py

Purpose:
- Detect user language
- Translate between user's language and internal (English) workflow using MarianMT
- Use multilingual embeddings for RAG retrieval
- Provide culturally appropriate responses (greetings/date formatting)
- Expose process_user_message(...) which returns the final localized reply

Notes:
- Replace call_llm(...) stub with your LLM provider (OpenAI, Google Gemini, etc.)
- MarianMT translation models are from Helsinki-NLP (Hugging Face). Quality varies by pair.
- To add languages: add mapping entries in TRANSLATION_MODELS and CULTURE_MAP.
"""

import os
import time
from typing import Optional, Dict, Any, Tuple, List
from dataclasses import dataclass

# NLP & Model libs
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0  # stable results

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sentence_transformers import SentenceTransformer
import numpy as np

# For formatting / culturalization
from babel.dates import format_datetime
from datetime import datetime

# For tokenization / sentence splitting
import nltk
nltk.download("punkt")
nltk.download("stopwords")
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords

# Placeholder for RAG retrieval — expects you to provide a retrieval function that returns context chunks.
# e.g., query_knowledge_base(query, top_k)
# from dynamic_rag_updater import query_knowledge_base  # if using previous module

# ---------- Configuration ----------
# Supported languages and display names
SUPPORTED_LANGUAGES = {
    "en": "English",
    "es": "Spanish",
    "hi": "Hindi",
    "zh": "Chinese (Simplified)"
}

# Choose internal language for reasoning / LLM context (English)
INTERNAL_LANG = "en"

# Multilingual embedding model (good for retrieval across languages)
EMBEDDING_MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2"

# Translation model mapping (Helsinki/Marian style models)
# Keys are (source_lang, target_lang) -> model name on HF
# We include only pairs we need: user_lang <-> en
TRANSLATION_MODELS = {
    ("es", "en"): "Helsinki-NLP/opus-mt-es-en",
    ("en", "es"): "Helsinki-NLP/opus-mt-en-es",
    ("hi", "en"): "Helsinki-NLP/opus-mt-hi-en",
    ("en", "hi"): "Helsinki-NLP/opus-mt-en-hi",
    ("zh", "en"): "Helsinki-NLP/opus-mt-zh-en",
    ("en", "zh"): "Helsinki-NLP/opus-mt-en-zh",
    # Add more as needed
}

# Culturally-appropriate greeting templates and formats (can be extended)
CULTURE_MAP = {
    "en": {"greeting": "Hello", "locale": "en_US"},
    "es": {"greeting": "Hola", "locale": "es_ES"},
    "hi": {"greeting": "नमस्ते", "locale": "hi_IN"},
    "zh": {"greeting": "你好", "locale": "zh_CN"},
}

# Default device for models ("cpu" or "cuda")
DEVICE = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or False) else "cpu"

# ---------- Helper dataclasses ----------
@dataclass
class TranslationPipeline:
    src: str
    tgt: str
    model_name: str
    tokenizer: Any
    model: Any
    pipeline: Any

# ---------- Global caches for loaded pipelines ----------
_translation_pipelines: Dict[Tuple[str, str], TranslationPipeline] = {}
_embedding_model = None

# ---------- Utilities ----------
def detect_language(text: str) -> str:
    """Detect language code (ISO 639-1). Falls back to English on failure."""
    try:
        lang = detect(text)
        if lang in SUPPORTED_LANGUAGES:
            return lang
        # Some detections may return 'zh-cn' or 'zh-tw' variants — normalize
        if lang.startswith("zh"):
            return "zh"
        # default fallback
        return "en"
    except Exception:
        return "en"

def load_translation_pipeline(src: str, tgt: str) -> TranslationPipeline:
    """Load or return cached MarianMT pipeline for src->tgt translation."""
    key = (src, tgt)
    if key in _translation_pipelines:
        return _translation_pipelines[key]

    model_name = TRANSLATION_MODELS.get(key)
    if not model_name:
        raise ValueError(f"No translation model configured for {src} -> {tgt}")

    print(f"[INFO] Loading translation model {model_name} for {src}->{tgt} (this may take a while)...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    pipe = pipeline("translation", model=model, tokenizer=tokenizer, device=0 if DEVICE.startswith("cuda") else -1)
    tp = TranslationPipeline(src=src, tgt=tgt, model_name=model_name, tokenizer=tokenizer, model=model, pipeline=pipe)
    _translation_pipelines[key] = tp
    return tp

def translate_text(text: str, src: str, tgt: str) -> str:
    """Translate text from src to tgt using Marian pipelines."""
    if src == tgt:
        return text
    tp = load_translation_pipeline(src, tgt)
    # pipeline returns list of dicts with 'translation_text'
    # chunk long text into sentence batches to avoid token limits
    sentences = sent_tokenize(text)
    # group sentences into chunks of about ~500 tokens (rough heuristic: 20 sentences)
    out = []
    batch = []
    for s in sentences:
        batch.append(s)
        if len(batch) >= 20:
            joined = " ".join(batch)
            res = tp.pipeline(joined, max_length=1000)
            out.append(res[0]["translation_text"])
            batch = []
    if batch:
        res = tp.pipeline(" ".join(batch), max_length=1000)
        out.append(res[0]["translation_text"])
    return " ".join(out)

def get_embedding_model():
    global _embedding_model
    if _embedding_model is None:
        print(f"[INFO] Loading embedding model {EMBEDDING_MODEL_NAME}...")
        _embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME, device=DEVICE)
    return _embedding_model

def preprocess_text_for_language(text: str, lang: str) -> str:
    """Language-specific preprocessing: basic normalization and stopword removal."""
    text = text.strip()
    # lowercasing works for many languages but skip for scripts where case not used
    if lang in ["en", "es"]:
        text = text.lower()
    # remove repeated whitespace
    text = " ".join(text.split())
    # remove stopwords for certain languages (optional)
    try:
        if lang in ["en", "es"]:
            sw = set(stopwords.words("english") if lang == "en" else stopwords.words("spanish"))
            tokens = text.split()
            tokens = [t for t in tokens if t not in sw]
            return " ".join(tokens)
    except Exception:
        # fallback
        return text
    return text

def culturalize_reply(reply_en: str, user_lang: str) -> str:
    """Take an English reply and adapt small aspects (greeting, date formatting) for user locale and translate back."""
    culture = CULTURE_MAP.get(user_lang, CULTURE_MAP["en"])
    greeting = culture["greeting"]
    locale = culture["locale"]
    # Example: replace placeholders for dates with locale formatted date
    now = datetime.utcnow()
    date_str = format_datetime(now, locale=locale)
    # Inject greeting + localized date into reply
    reply_with_meta = f"{greeting}! ({date_str})\n\n{reply_en}"
    return reply_with_meta

# ---------- RAG retrieval integration placeholder ----------
# The user should provide a retrieval function here. Example signature:
# def query_knowledge_base(query_en:str, top_k:int) -> List[{"document": "...", "metadata": {...}}]
# For example, reuse the `query_knowledge_base` from your dynamic_rag_updater module.

def query_knowledge_base_stub(query_en: str, top_k: int = 5):
    """Stub retrieval: in production replace with actual retrieval from your vector DB."""
    # Example: return empty context
    return []

# ---------- LLM call placeholder ----------
def call_llm(prompt_en: str, context_chunks: List[Dict], user_lang: str) -> str:
    """
    Replace this with your LLM call. Expected behavior:
      - take prompt_en + the concatenated context_chunks
      - return an English response (string)
    """
    # Simple fallback stub: echo prompt and show retrieved context
    ctx = "\n\n".join([c.get("document", "") for c in context_chunks])
    reply = f"Answering (in English):\n\nUser asked: {prompt_en}\n\nContext:\n{ctx}\n\n(Replace call_llm with real LLM integration.)"
    return reply

# ---------- High-level pipeline ----------
def process_user_message(user_text: str,
                         user_lang: Optional[str] = None,
                         top_k: int = 5,
                         retrieval_fn = None,
                         llm_fn = None) -> Dict[str, Any]:
    """
    Main entrypoint for multilingual processing.
    - user_text: text as provided by user
    - user_lang: optional language code override; if None, auto-detect
    - retrieval_fn: function(query_en, top_k) -> list of docs (defaults to stub)
    - llm_fn: function(prompt_en, context_chunks, user_lang) -> reply_en (defaults to stub)

    Returns a dict:
      {
         "detected_language": "es",
         "translated_user_text": "...",  # english text used internally
         "retrieved_context": [...],
         "reply_translated": "...",      # reply translated back to user's language
         "reply_localized": "..."        # culturally adjusted reply (in user's language)
      }
    """
    # 1. Detect language
    lang = user_lang or detect_language(user_text)
    if lang not in SUPPORTED_LANGUAGES:
        # fallback to English
        lang = "en"

    # 2. Translate incoming text to INTERNAL_LANG if needed
    if lang != INTERNAL_LANG:
        try:
            user_text_en = translate_text(user_text, src=lang, tgt=INTERNAL_LANG)
        except Exception as e:
            # graceful fallback: if translation fails, assume original is English-ish
            print(f"[WARN] translation {lang}->{INTERNAL_LANG} failed: {e}")
            user_text_en = user_text
    else:
        user_text_en = user_text

    # 3. Preprocess
    user_text_en_pp = preprocess_text_for_language(user_text_en, INTERNAL_LANG)

    # 4. Retrieve context from KB (RAG)
    retrieval_fn = retrieval_fn or query_knowledge_base_stub
    context_chunks = retrieval_fn(user_text_en_pp, top_k=top_k) if retrieval_fn else []

    # 5. Build prompt for LLM and call
    llm_fn = llm_fn or call_llm
    reply_en = llm_fn(user_text_en_pp, context_chunks, user_lang=lang)

    # 6. Culturalize reply (add greeting/date)
    reply_en_cultur = culturalize_reply(reply_en, lang)

    # 7. Translate reply back to user's language if needed
    if lang != INTERNAL_LANG:
        try:
            reply_user_lang = translate_text(reply_en_cultur, src=INTERNAL_LANG, tgt=lang)
        except Exception as e:
            print(f"[WARN] translation {INTERNAL_LANG}->{lang} failed: {e}")
            reply_user_lang = reply_en_cultur
    else:
        reply_user_lang = reply_en_cultur

    return {
        "detected_language": lang,
        "translated_user_text": user_text_en,
        "preprocessed_text": user_text_en_pp,
        "retrieved_context": context_chunks,
        "reply_in_internal_language": reply_en,
        "reply_localized_internal": reply_en_cultur,
        "reply_translated": reply_user_lang
    }

# ---------- Example / quick demo ----------
if __name__ == "__main__":
    # load embedding model (for use with real retrieval)
    emb = get_embedding_model()

    # Demo queries in multiple languages
    samples = [
        ("Hello, can you summarize recent progress in machine learning?", None),
        ("¿Puedes resumir los últimos avances en aprendizaje automático?", None),
        ("क्या आप मशीन लर्निंग में हाल की प्रगति का सार दे सकते हैं?", None),
        ("帮我总结一下机器学习的最新进展。", None),
    ]

    # Use process_user_message with stub retrieval and LLM
    for text, _ in samples:
        out = process_user_message(text)
        print("="*40)
        print(f"User Input: {text}")
        print(f"Detected: {out['detected_language']}")
        print("Final reply (localized):")
        print(out['reply_translated'])
        print("\n")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


[INFO] Loading embedding model paraphrase-multilingual-MiniLM-L12-v2...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  now = datetime.utcnow()


User Input: Hello, can you summarize recent progress in machine learning?
Detected: en
Final reply (localized):
Hello! (Oct 12, 2025, 4:56:10 PM)

Answering (in English):

User asked: hello, summarize recent progress machine learning?

Context:


(Replace call_llm with real LLM integration.)


[INFO] Loading translation model Helsinki-NLP/opus-mt-es-en for es->en (this may take a while)...


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation es->en failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

[INFO] Loading translation model Helsinki-NLP/opus-mt-en-es for en->es (this may take a while)...


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation en->es failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

User Input: ¿Puedes resumir los últimos avances en aprendizaje automático?
Detected: es
Final reply (localized):
Hola! (12 oct 2025, 16:56:19)

Answering (in English):

User asked: ¿puedes resumir los últimos avances en aprendizaje automático?

Context:


(Replace call_llm with real LLM integratio

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/813k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/304M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/304M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation hi->en failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

[INFO] Loading translation model Helsinki-NLP/opus-mt-en-hi for en->hi (this may take a while)...


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation en->hi failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

User Input: क्या आप मशीन लर्निंग में हाल की प्रगति का सार दे सकते हैं?
Detected: hi
Final reply (localized):
नमस्ते! (12 अक्तू॰ 2025, 4:56:51 pm)

Answering (in English):

User asked: क्या आप मशीन लर्निंग में हाल की प्रगति का सार दे सकते हैं?

Context:


(Replace call_llm with real LLM integration

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/805k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/807k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation zh->en failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

[INFO] Loading translation model Helsinki-NLP/opus-mt-en-zh for en->zh (this may take a while)...


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

source.spm:   0%|          | 0.00/806k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/805k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Device set to use cpu


[WARN] translation en->zh failed: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

User Input: 帮我总结一下机器学习的最新进展。
Detected: zh
Final reply (localized):
你好! (2025年10月12日 16:57:13)

Answering (in English):

User asked: 帮我总结一下机器学习的最新进展。

Context:


(Replace call_llm with real LLM integration.)


