In [2]:
with open('cat-facts.txt', 'r', encoding='utf-8-sig') as file:
    dataset = file.readlines()
print(f'Loaded {len(dataset)} entries')


Loaded 150 entries


In [2]:
import ollama

EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'

# Each element in the VECTOR_DB will be a tuple (chunk, embedding)
# The embedding is a list of floats, for example: [0.1, 0.04, -0.34, 0.21, ...]
VECTOR_DB = []

def add_chunk_to_database(chunk):
  embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
  VECTOR_DB.append((chunk, embedding))


In [3]:
for i, chunk in enumerate(dataset):
  add_chunk_to_database(chunk)
  print(f'Added chunk {i+1}/{len(dataset)} to the database')


Added chunk 1/150 to the database
Added chunk 2/150 to the database
Added chunk 3/150 to the database
Added chunk 4/150 to the database
Added chunk 5/150 to the database
Added chunk 6/150 to the database
Added chunk 7/150 to the database
Added chunk 8/150 to the database
Added chunk 9/150 to the database
Added chunk 10/150 to the database
Added chunk 11/150 to the database
Added chunk 12/150 to the database
Added chunk 13/150 to the database
Added chunk 14/150 to the database
Added chunk 15/150 to the database
Added chunk 16/150 to the database
Added chunk 17/150 to the database
Added chunk 18/150 to the database
Added chunk 19/150 to the database
Added chunk 20/150 to the database
Added chunk 21/150 to the database
Added chunk 22/150 to the database
Added chunk 23/150 to the database
Added chunk 24/150 to the database
Added chunk 25/150 to the database
Added chunk 26/150 to the database
Added chunk 27/150 to the database
Added chunk 28/150 to the database
Added chunk 29/150 to the dat

In [4]:
def cosine_similarity(a, b):
  dot_product = sum([x * y for x, y in zip(a, b)])
  norm_a = sum([x ** 2 for x in a]) ** 0.5
  norm_b = sum([x ** 2 for x in b]) ** 0.5
  return dot_product / (norm_a * norm_b)


In [5]:
def retrieve(query, top_n=3):
  query_embedding = ollama.embed(model=EMBEDDING_MODEL, input=query)['embeddings'][0]
  # temporary list to store (chunk, similarity) pairs
  similarities = []
  for chunk, embedding in VECTOR_DB:
    similarity = cosine_similarity(query_embedding, embedding)
    similarities.append((chunk, similarity))
  # sort by similarity in descending order, because higher similarity means more relevant chunks
  similarities.sort(key=lambda x: x[1], reverse=True)
  # finally, return the top N most relevant chunks
  return similarities[:top_n]


In [10]:
input_query = input('Ask me a question: ')
retrieved_knowledge = retrieve(input_query)

print('Retrieved knowledge:')
for chunk, similarity in retrieved_knowledge:
    print(f' - (similarity: {similarity:.2f}) {chunk}')

# 先組好上下文字串（注意這裡沒有 f-string）
context = "\n".join([f" - {chunk}" for chunk, _ in retrieved_knowledge])

# 再把變數插入到多行字串
instruction_prompt = (
    "You are a helpful chatbot.\n"
    "Use only the following pieces of context to answer the question. "
    "Don't make up any new information:\n"
    f"{context}"
)

print(instruction_prompt)


Retrieved knowledge:
 - (similarity: 0.76) In ancient Egypt, mummies were made of cats, and embalmed mice were placed with them in their tombs. In one ancient city, over 300,000 cat mummies were found.

 - (similarity: 0.72) In 1888, more than 300,000 mummified cats were found an Egyptian cemetery. They were stripped of their wrappings and carted off to be used by farmers in England and the U.S. for fertilizer.

 - (similarity: 0.65) When a family cat died in ancient Egypt, family members would mourn by shaving off their eyebrows. They also held elaborate funerals during which they drank wine and beat their breasts. The cat was embalmed with a sculpted wooden mask and the tiny mummy was placed in the family tomb or in a pet cemetery with tiny mummies of mice.

You are a helpful chatbot.
Use only the following pieces of context to answer the question. Don't make up any new information:
 - In ancient Egypt, mummies were made of cats, and embalmed mice were placed with them in their tombs

In [11]:
stream = ollama.chat(
  model=LANGUAGE_MODEL,
  messages=[
    {'role': 'system', 'content': instruction_prompt},
    {'role': 'user', 'content': input_query},
  ],
  stream=True,
)

# print the response from the chatbot in real-time
print('Chatbot response:')
for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)


Chatbot response:
The ancient Egyptians believed that cats were sacred animals, and they often mummified them as part of their funerary rites. The mummified cats were placed in tombs or cemeteries with other family members who had died, and were sometimes even used for fertilizer.

One notable example is the discovery of over 300,000 cat mummies in a single Egyptian cemetery, which was found in one ancient city. The mummies were typically buried with small tokens, such as tiny figurines or trinkets, that represented the deceased cat's status within their family and community.

In addition to being used for funerary purposes, the cat mummified animals could also be used by farmers in ancient Egypt who needed a source of fertilizer. This practice was known as "dung farming," where animal manure was collected from farms and used as a natural fertilizer.

In [1]:
# pip install faiss-cpu sentence-transformers
import numpy as np, faiss
from sentence_transformers import SentenceTransformer

docs = ["段落1...", "段落2...", "段落3..."]
model = SentenceTransformer("all-MiniLM-L6-v2")
emb = model.encode(docs, normalize_embeddings=True).astype("float32")  # cosine→先正規化
d = emb.shape[1]

index = faiss.IndexIDMap(faiss.IndexFlatIP(d))  # 用內積=cosine
ids = np.arange(len(docs)).astype("int64")
index.add_with_ids(emb, ids)

q = model.encode(["我的問題是..."], normalize_embeddings=True).astype("float32")
D, I = index.search(q, k=3)  # 取 Top-3
print([docs[i] for i in I[0]])

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


['段落3...', '段落1...', '段落2...']


In [None]:
# pip install faiss-cpu sentence-transformers  # 先裝好（即便用 Ollama，也建議裝供日後切換）

import csv, numpy as np, faiss, os
from textwrap import dedent

# ==== 0) 開關：要用哪個向量模型來「生 embedding」？ ====
USE_OLLAMA = False   # True=沿用你的 Ollama bge-base-zh；False=改用 sentence-transformers

if USE_OLLAMA:
    import ollama
    EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-zh-v1.5-gguf'  # 你原本的
else:
    from sentence_transformers import SentenceTransformer
    # 建議中文/多語模型（all-MiniLM-L6-v2 偏英文）
    EMBEDDING_MODEL = 'BAAI/bge-m3'  # 或 'paraphrase-multilingual-MiniLM-L12-v2'
    st_model = SentenceTransformer(EMBEDDING_MODEL)

# ==== 1) 讀取表格 ====
ROWS = []
with open('records.csv', newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for r in reader:
        pay  = (r.get('InvoicePayment', '') or '').strip()
        cat  = (r.get('AccountingCategory', '') or '').strip()
        summ = (r.get('Summary', '') or '').strip()
        if not pay and not cat and not summ:
            continue
        ROWS.append({'InvoicePayment': pay, 'AccountingCategory': cat, 'Summary': summ})

print(f'Loaded {len(ROWS)} rows')

# ==== 2) 準備要嵌入的文字（把多欄位組成一段） ====
DOCS = []
for row in ROWS:
    text_for_embedding = (
        f"InvoicePayment: {row['InvoicePayment']}. "
        f"AccountingCategory: {row['AccountingCategory']}. "
        f"Summary: {row['Summary']}"
    )
    DOCS.append(text_for_embedding)

# ==== 3) 產生向量 ====
def embed_texts(texts):
    if USE_OLLAMA:
        # Ollama 一次一段；也可自行批次呼叫以降低 overhead
        vecs = []
        for t in texts:
            emb = ollama.embed(model=EMBEDDING_MODEL, input=t)['embeddings'][0]
            vecs.append(emb)
        arr = np.array(vecs, dtype='float32')
    else:
        # sentence-transformers 可一次批量編碼，normalize_embeddings=True → 直接用 cosine
        arr = st_model.encode(texts, normalize_embeddings=True).astype('float32')
    # 若用 Ollama，建議自行做 L2 normalize 以便用 IP 當 cosine
    #（用 sbert 已經正規化過則不會改變值）
    norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
    arr = arr / norms
    return arr

EMB = embed_texts(DOCS).astype('float32')
d = EMB.shape[1]

# ==== 4) 建 FAISS 索引（用內積，等價 cosine；並保留 id 對應） ====
index = faiss.IndexIDMap(faiss.IndexFlatIP(d))
ids = np.arange(len(DOCS)).astype('int64')
index.add_with_ids(EMB, ids)
print("index.ntotal =", index.ntotal)

# 為了能回傳原欄位，做一個 id → metadata 的對照表
ID2META = {i: ROWS[i] for i in range(len(ROWS))}
ID2TEXT = {i: DOCS[i] for i in range(len(DOCS))}

# ==== 5) 查詢函式（輸入問題字串，回傳 Top-k + 分數 + 原欄位） ====
def search(query_text, k=5):
    q = embed_texts([query_text]).astype('float32')
    D, I = index.search(q, k)
    results = []
    for score, idx in zip(D[0], I[0]):
        if idx == -1:  # 當 k 超過資料量時，FAISS 可能回 -1
            continue
        results.append({
            'score': float(score),            # cosine 相似度
            'text': ID2TEXT[int(idx)],
            'meta': ID2META[int(idx)]
        })
    return results




Loaded 265 rows
index.ntotal = 265
0.6713 | InvoicePayment: received. AccountingCategory: 資本. Summary: 張功爾 | {'InvoicePayment': 'received', 'AccountingCategory': '資本', 'Summary': '張功爾'}
0.6537 | InvoicePayment: paid. AccountingCategory: 薪資支出. Summary: 12月薪資張功爾 | {'InvoicePayment': 'paid', 'AccountingCategory': '薪資支出', 'Summary': '12月薪資張功爾'}
0.6531 | InvoicePayment: paid. AccountingCategory: 銀行存款. Summary: 張功爾 | {'InvoicePayment': 'paid', 'AccountingCategory': '銀行存款', 'Summary': '張功爾'}


In [10]:
# ==== 6) 小測試 ====
if __name__ == "__main__":
    demo_q = "張功爾 paid"
    rst = search(demo_q, k=3)
    for r in rst:
        print(f"{r['score']:.4f} | {r['text']} | {r['meta']}")

    #（可選）持久化索引
    faiss.write_index(index, "records.faiss")
    # 讀回時： index = faiss.read_index("records.faiss")

0.6713 | InvoicePayment: received. AccountingCategory: 資本. Summary: 張功爾 | {'InvoicePayment': 'received', 'AccountingCategory': '資本', 'Summary': '張功爾'}
0.6537 | InvoicePayment: paid. AccountingCategory: 薪資支出. Summary: 12月薪資張功爾 | {'InvoicePayment': 'paid', 'AccountingCategory': '薪資支出', 'Summary': '12月薪資張功爾'}
0.6531 | InvoicePayment: paid. AccountingCategory: 銀行存款. Summary: 張功爾 | {'InvoicePayment': 'paid', 'AccountingCategory': '銀行存款', 'Summary': '張功爾'}
