In [None]:
import gradio as gr
from PIL import Image
from ultralytics import YOLO
from deep_translator import GoogleTranslator
from gtts import gTTS
import tempfile

In [None]:
# ‡πÇ‡∏´‡∏•‡∏î YOLOv8
model = YOLO("yolov8n.pt")  # ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å model ‡πÄ‡∏•‡πá‡∏Å‡∏™‡∏∏‡∏î ‡πÉ‡∏ä‡πâ‡∏á‡∏≤‡∏ô‡πÄ‡∏£‡πá‡∏ß

# ‡∏†‡∏≤‡∏©‡∏≤
languages = {
    "Thai": "th", "English": "en", "Korean": "ko", "Japanese": "ja",
    "Chinese": "zh-CN", "Russian": "ru", "French": "fr",
    "Vietnamese": "vi", "Spanish": "es", "Portuguese": "pt"
}


In [None]:
# ‡∏ï‡∏£‡∏ß‡∏à‡∏à‡∏±‡∏ö‡∏ß‡∏±‡∏ï‡∏ñ‡∏∏‡∏´‡∏•‡∏±‡∏Å‡∏à‡∏≤‡∏Å‡∏†‡∏≤‡∏û‡∏î‡πâ‡∏ß‡∏¢ YOLO
def detect_main_object(image):
    results = model(image)[0]
    if len(results.boxes) == 0:
        return None
    best_box = max(results.boxes, key=lambda box: box.conf.cpu().item())
    class_id = int(best_box.cls.cpu().item())
    label = model.model.names[class_id]
    return label

# ‡πÅ‡∏õ‡∏•‡∏Ñ‡∏≥
def translate_word(word, lang_code):
    try:
        return GoogleTranslator(source="auto", target=lang_code).translate(word)
    except Exception as e:
        print("Translation error:", e)
        return "(‡πÅ‡∏õ‡∏•‡πÑ‡∏°‡πà‡πÑ‡∏î‡πâ)"

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÄ‡∏™‡∏µ‡∏¢‡∏á
def speak_word(text, lang_code):
    try:
        tts = gTTS(text=text, lang=lang_code)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
            tts.save(tmp.name)
            return tmp.name
    except Exception as e:
        print("TTS error:", e)
        return None

# ‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢‡∏†‡∏≤‡∏û

def predict(image, selected_langs):
    pred_label = detect_main_object(image)
    if not pred_label:
        return ["‡πÑ‡∏°‡πà‡∏û‡∏ö‡∏ß‡∏±‡∏ï‡∏ñ‡∏∏‡πÄ‡∏î‡πà‡∏ô‡πÉ‡∏ô‡∏†‡∏≤‡∏û"] + ["" for _ in languages] + [gr.update(visible=False) for _ in languages]

    outputs = {"label": pred_label}
    for lang in languages:
        if lang in selected_langs:
            translated = translate_word(pred_label, languages[lang])
            audio_path = speak_word(translated, languages[lang])
            outputs[lang] = (translated, audio_path)
        else:
            outputs[lang] = ("", None)

    return [outputs["label"]] + [outputs[lang][0] for lang in languages] + [gr.update(value=outputs[lang][1], visible=bool(outputs[lang][1])) for lang in languages]

In [None]:
# UI
with gr.Blocks() as app:
    gr.Markdown("""
    # üì∏ SnapTranslate by Pawit
    üì≤ ‡∏≠‡∏±‡∏õ‡πÇ‡∏´‡∏•‡∏î‡∏†‡∏≤‡∏û ‚Üí ‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢‡∏ß‡∏±‡∏ï‡∏ñ‡∏∏‡∏´‡∏•‡∏±‡∏Å‡∏î‡πâ‡∏ß‡∏¢ YOLO ‚Üí ‡πÅ‡∏õ‡∏•‡∏´‡∏•‡∏≤‡∏¢‡∏†‡∏≤‡∏©‡∏≤ ‚Üí ‡∏ü‡∏±‡∏á‡πÄ‡∏™‡∏µ‡∏¢‡∏á
    """)

    with gr.Row():
        image_input = gr.Image(type="pil", label="üìÑ ‡∏≠‡∏±‡∏õ‡πÇ‡∏´‡∏•‡∏î‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û")
        lang_select = gr.CheckboxGroup(choices=list(languages.keys()), label="üåê ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏†‡∏≤‡∏©‡∏≤‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡πÅ‡∏õ‡∏•")

    predict_btn = gr.Button("üßê ‡πÅ‡∏õ‡∏•‡∏†‡∏≤‡∏û")
    prediction_text = gr.Textbox(label="üîç ‡∏ß‡∏±‡∏ï‡∏ñ‡∏∏‡∏´‡∏•‡∏±‡∏Å‡πÉ‡∏ô‡∏†‡∏≤‡∏û (‡∏†‡∏≤‡∏©‡∏≤‡∏≠‡∏±‡∏á‡∏Å‡∏§‡∏©)")

    output_texts = {}
    audio_outputs = {}

    for lang in languages:
        with gr.Row():
            output_texts[lang] = gr.Textbox(label=f"{lang}:", visible=True)
            audio_outputs[lang] = gr.Audio(label="", visible=False)

    predict_btn.click(
        fn=predict,
        inputs=[image_input, lang_select],
        outputs=[prediction_text] + list(output_texts.values()) + list(audio_outputs.values())
    )

app.launch(debug=True, share=True)