<a href="https://colab.research.google.com/github/theerayutbo/gradioMed/blob/main/gradioMed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Library Installation

In [1]:
!pip install -q transformers accelerate bitsandbytes gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m109.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

2. โหลดโมเดล MedGemma 27B (instruction-tuned text-only)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_id = "google/medgemma-27b-text-it"

# ถ้าอยากรันบน Colab Free/Pro (เช่น Tesla T4) ให้ quantize 4-bit:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)


3. สร้างฟังก์ชันสำหรับ Chat


In [None]:
import gradio as gr

def chat_with_medgemma(user_message, history):
    # สร้าง conversation template
    messages = [{"role":"system", "content":"You are a helpful medical assistant."}]
    for u, a in history:
        messages += [{"role":"user","content":u}, {"role":"assistant","content":a}]
    messages.append({"role":"user", "content": user_message})

    # เตรียม input แล้ว generate
    inputs = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True, tokenize=True,
        return_tensors="pt"
    ).to(model.device)
    input_len = inputs["input_ids"].shape[-1]

    with torch.inference_mode():
        gen = model.generate(**inputs, max_new_tokens=200, do_sample=False)
    reply = tokenizer.decode(gen[0][input_len:], skip_special_tokens=True)

    history.append((user_message, reply))
    return history, history


4. สร้าง Gradio UI


In [None]:
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(placeholder="พิมพ์ข้อความของคุณที่นี่…")
    state = gr.State([])  # เก็บประวัติ chat

    user_input.submit(
        chat_with_medgemma,
        inputs=[user_input, state],
        outputs=[chatbot, state]
    )

demo.launch(server_name="0.0.0.0", share=True)