In [None]:
import chromadb
from sentence_transformers import SentenceTransformer

# 本地執行 Hugging Face 模型（不需要 API Key）
model = SentenceTransformer("intfloat/multilingual-e5-large")

# 初始化 ChromaDB(使用本地端保存的資料庫)
chroma_client = chromadb.PersistentClient(path='./chroma_db',)
my_collection = chroma_client.get_or_create_collection(name='my_collection')

In [None]:
# 轉換文本成向量
texts = ["今天天氣很好！","下雨天適合喝熱茶。"]
embeddings = model.encode(texts).tolist() # 轉換成 list 格式,符合 ChromaDB 的要求
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
    my_collection.upsert(
        ids=[f"id_{i}"],
        documents=[text],
        embeddings=[embedding]
    )
print("✅ 已儲存本地計算的嵌入向量！")

✅ 已儲存本地計算的嵌入向量！


In [None]:
#注意使用query_embeddings的參數
query_embedding = model.encode(["今天天氣很好！"]).tolist()
my_collection.query(
    query_embeddings=query_embedding,
    n_results=1
)

{'ids': [['id_0']],
 'embeddings': None,
 'documents': [['it a nice day！']],
 'uris': None,
 'data': None,
 'metadatas': [[None]],
 'distances': [[0.26763425693677245]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [None]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

# 設定 Hugging Face 嵌入函數（需要 API Key）
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
    api_key="hf_xxxxxxx", #Make calls to inference providers權限要開啟,
    model_name="intfloat/multilingual-e5-large"
)

# 初始化 ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")

# 創建一個 ChromaDB Collection（儲存向量）
mycollection = chroma_client.get_or_create_collection(
    name="mycollection",
    embedding_function=huggingface_ef
)

# 加入一些文本數據
mycollection.upsert(
    ids=["1", "2"],
    documents=["今天天氣很好！", "下雨天適合喝熱茶。"]
)

# print("✅ 向量儲存完成！")

In [None]:
#
query_embedding = model.encode(["今天天氣很好！"]).tolist()
mycollection.query(
    query_texts=["今天天氣很好！"],#只有使用api_key的時候才能使用query_texts
    n_results=2
)

{'ids': [['1', '2']],
 'embeddings': None,
 'documents': [['今天天氣很好！', '下雨天適合喝熱茶。']],
 'uris': None,
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[2.5161756403231264e-12, 0.2676620101520762]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}