In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import tqdm as notebook_tqdm

class PromptedQWEN(HuggingFaceEmbeddings):

    def embed_documents(self, texts):
        return super().embed_documents(
            [f"Represent this document for retrieval: {t}" for t in texts])

    def embed_query(self, text):
        return super().embed_query(
            f"Represent this query for retrieval: {text}")


def load_vectorstore(vectordb_path="./vectorDB"):
    print(f"Loading FAISS index from {vectordb_path}...")

    # QWEN 임베딩 모델 초기화
    embeddings = PromptedQWEN(model_name="Qwen/Qwen3-Embedding-8B")

    # FAISS 인덱스 로드
    vectorstore = FAISS.load_local(
        vectordb_path,
        embeddings,
        allow_dangerous_deserialization=True
    )

    print(f"   Total vectors: {vectorstore.index.ntotal}")

    return vectorstore

def load_retriever(vectordb_path="./vectorDB"):
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":5})
    return retriever

def test_queries(vectorstore, queries, k=5):
    for i, query in enumerate(queries, 1):
        print(f"Query {i}: {query}")
       
        results = vectorstore.similarity_search(query, k=k)

        for j, doc in enumerate(results, 1):
            print(f"\n[Result {j}]")
            print(f"Title: {doc.metadata.get('title', 'N/A')}")
            print(f"URL: {doc.metadata.get('url', 'N/A')}")
            print(f"Content preview: {doc.page_content[:300]}...")
            print(f"-" * 80)


def test_with_scores(vectorstore, query, k=5):
    print(f"\n{'='*80}")
    print(f"Query with scores: {query}")
    print(f"{'='*80}")

    results = vectorstore.similarity_search_with_score(query, k=k)

    for j, (doc, score) in enumerate(results, 1):
        print(f"\n[Result {j}] Score: {score:.4f}")
        print(f"Title: {doc.metadata.get('title', 'N/A')}")
        print(f"URL: {doc.metadata.get('url', 'N/A')}")
        print(f"Content preview: {doc.page_content[:200]}...")
        print(f"-" * 80)


if __name__ == "__main__":
    # VectorDB 경로
    VECTORDB_PATH = "./vectorDB"

    # FAISS 인덱스 로드
    vectorstore = load_vectorstore(VECTORDB_PATH)

    # 테스트 쿼리 목록
    test_queries_list = [
        "What is artificial intelligence?",
        "Who is Albert Einstein?",
        "How does photosynthesis work?",
        "What is the capital of France?",
        "Explain quantum mechanics",
    ]

    # 쿼리 테스트 (top-5 결과)
    test_queries(vectorstore, test_queries_list, k=5)

    # 점수와 함께 검색 테스트
    test_with_scores(vectorstore, "What is machine learning?", k=3)

    print("\n" + "="*80)
    print("="*80)


Loading FAISS index from ./vectorDB...


Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

---

BASELINE RAG

In [None]:
#벡터스토어 로드
load_vectorstore()
load_retriever()

#프롬프트 템플릿 작성
prompt_template = """

"""




NameError: name 'load_vectorstore' is not defined