<a href="https://colab.research.google.com/github/petertseng0517/REG-TZUCHI-Library/blob/main/%E3%80%90REG_b%E3%80%91RAG02_%E6%89%93%E9%80%A0_RAG_%E7%B3%BB%E7%B5%B1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 0. 讀入你打造好的 vector dataset

In [None]:
!pip -q install gdown

In [None]:
GDRIVE_PUBLIC_URL = "https://drive.google.com/file/d/1LANvgJndrJWrbJ_eFr5vmUgUeRtEYAzz/view?usp=sharing"

In [None]:
!gdown --fuzzy -O faiss_db.zip "{GDRIVE_PUBLIC_URL}"

In [None]:
!unzip faiss_db.zip

### 1. 安裝並引入必要套件

In [None]:
!pip install -U langchain langchain-community faiss-cpu transformers sentence-transformers huggingface_hub requests==2.32.4
!pip -q install "aisuite[all]"

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:
from openai import OpenAI
import gradio as gr

### 2. 自訂 E5 embedding 類別

In [None]:
import os
from google.colab import userdata

In [None]:
hf_token = userdata.get('HuggingFace')

In [None]:
from huggingface_hub import login
login(token=hf_token)

In [None]:
from langchain_core.embeddings import Embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings # Import HuggingFaceEmbeddings

class EmbeddingGemmaEmbeddings(HuggingFaceEmbeddings): # Inherit from HuggingFaceEmbeddings
    def __init__(self, **kwargs): # Remove huggingfacehub_api_token parameter
        # Use the HuggingFaceEmbeddings initialization for the model
        super().__init__(
            model_name="google/embeddinggemma-300m",
            **kwargs
        )
        # Remove custom tokenizer and model loading here, as super().__init__ handles it

    # Remove custom embed_documents and embed_query methods,
    # as the methods from HuggingFaceEmbeddings will be used.
    # If custom logic is absolutely needed, it should be implemented carefully.

### 3. 載入 `faiss_db`

In [None]:
!ls faiss_db

In [None]:
embedding_model = EmbeddingGemmaEmbeddings()
vectorstore = FAISS.load_local(
    "faiss_db",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

### 4. 設定好我們要的 LLM

如之前, 我們會用 OpenAI API。這裡使用 Groq 服務, 可改成你要的服務。

In [None]:
import aisuite as ai

In [None]:
api_key = userdata.get('Groq')

In [None]:
os.environ['GROQ_API_KEY']=api_key

這裡的模型和 `base_url` 是用 Groq, 如果用其他服務請自行修改。

In [None]:
model = "groq:openai/gpt-oss-120b"
#base_url="https://api.groq.com/openai/v1"

In [None]:
client = ai.Client()

### 5. `prompt` 設計

In [None]:
system_prompt = "你是慈濟醫院圖書館館員，請根據資料來回應員工的問題。請親切、簡潔並附帶具體建議。請用台灣習慣的中文回應。"

prompt_template = """
根據下列資料：
{retrieved_chunks}

回答使用者的問題：{question}

請根據資料內容回覆，若資料不足請告訴同仁可以請教圖書館的陳小姐。
"""

### 6. 使用 RAG 來回應

搜尋與使用者問題相關的資訊，根據我們的 prompt 樣版去讓 LLM 回應。

In [None]:
chat_history = []

def chat_with_rag(user_input):
    global chat_history
    # 取回相關資料
    docs = retriever.get_relevant_documents(user_input)
    retrieved_chunks = "\n\n".join([doc.page_content for doc in docs])

    # 將自定 prompt 套入格式
    final_prompt = prompt_template.format(retrieved_chunks=retrieved_chunks, question=user_input)

    # 用 AI Suite 呼叫語言模型
    response = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": final_prompt},
    ]
    )
    answer = response.choices[0].message.content

    chat_history.append((user_input, answer))
    return answer

### 7. 用 Gradio 打造 Web App

In [41]:
with gr.Blocks() as demo:
    gr.Markdown("# 🎓 慈濟醫院圖書館AI館員")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="請輸入你的問題...")

    def respond(message, chat_history_local):
        response = chat_with_rag(message)
        chat_history_local.append((message, response))
        return "", chat_history_local

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch(debug=True)

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://cf603747e2541c4683.gradio.live




In [None]:
user_query = "請問圖書館的開放時間？"
response = chat_with_rag(user_query)
print(response)