In [None]:
# ====================================
# 📌 Step 0. 安裝需要的套件
# ====================================
!pip install -q transformers sentence-transformers chromadb pypdf huggingface_hub

# ====================================
# 📌 Step 1. 匯入套件
# ====================================
import os
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
from huggingface_hub import InferenceClient

# ====================================
# 📌 Step 2. 設定 Hugging Face API
# ====================================
# 請到 https://huggingface.co/settings/tokens 拿免費 token
HF_TOKEN = ""   # 請填入自己的 token
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=HF_TOKEN)

# ====================================
# 📌 Step 3. 上傳 PDF (Colab 介面)
# ====================================
from google.colab import files
uploaded = files.upload()  # 選擇自己的 PDF / TXT

pdf_path = list(uploaded.keys())[0]
reader = PdfReader(pdf_path)
texts = [p.extract_text() for p in reader.pages if p.extract_text()]

# ====================================
# 📌 Step 4. 建立向量資料庫 (ChromaDB)
# ====================================
embedder = SentenceTransformer("all-MiniLM-L6-v2")

chroma_client = chromadb.Client()

if "docs" in [c.name for c in chroma_client.list_collections()]:
    chroma_client.delete_collection("docs")

collection = chroma_client.create_collection("docs")

for i, t in enumerate(texts):
    collection.add(
        documents=[t],
        ids=[str(i)],
        embeddings=[embedder.encode(t)]
    )
# ====================================
# 📌 Step 5. RAG 查詢流程 (修正版)
# ====================================
def rag_query(question, top_k=3):
    # 1. 檢索相關文件
    results = collection.query(
        query_embeddings=[embedder.encode(question)],
        n_results=top_k
    )
    context = " ".join(results["documents"][0])

    # 2. 正確 messages 格式
    messages = [
        {"role": "system", "content": "你是一個文件助理，請根據提供的內容回答問題"},
        {"role": "user", "content": f"內容：\n{context}\n\n問題：{question}"}
    ]

    # 3. 呼叫 LLM
    response = client.chat_completion(
        messages=messages,
        max_tokens=300
    )
    return response.choices[0].message["content"]

# ====================================
# 📌 Step 6. 測試問答
# ====================================
question = "這份文件在講什麼？"
print(rag_query(question))


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m54.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m6.3 MB/s[0m eta [36m0:0



Saving 114-1 MGMT Syllabus.pdf to 114-1 MGMT Syllabus.pdf


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

這份文件是課程說明書，描述了「管理學」課程（IM2002）的內容、規則和要求。課程的目的是為學生介紹管理的基本概念、策略和原則，並鼓勵學生在日常生活中應用管理知識。


In [None]:
# ====================================
# 📌 Step 驗證沒有RAG. 純 LLM（without RAG）基準線
#    - 不做檢索，不送 context
#    - 僅把使用者問題丟給模型
# ====================================
def llm_only_query(question, max_tokens=300, temperature=0.7):
    messages = [
        {"role": "system", "content": "你是一個有幫助的助理，請用繁體中文作答。"},
        {"role": "user", "content": question}
    ]
    resp = client.chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature
    )
    return resp.choices[0].message["content"]

# 測試（與 RAG 版本使用相同的 question 便於比較）
print("Without RAG: "+llm_only_query(question))


Without RAG: 你沒有提供任何文件，我不清楚你想查詢什麼內容。若你能提供更多資訊或文件的內容，我會盡量幫助你。
