In [None]:
import sys
import os

# Set project root as module path
sys.path.append(os.path.abspath(".."))

In [None]:
# load api keys from .env file 
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Setup: Import all modules
from src.chunking import load_and_chunk

from src.embedding import get_embedding_model
from src.vector_store import build_vectorstore, save_vectorstore, load_vectorstore
from src.query_api import build_qa_chain


In [None]:
# Step 1: Load and chunk PDF
pdf_path = "../data/study_plan.pdf"  # change path if needed
chunks = load_and_chunk(pdf_path)
print(f"Total chunks: {len(chunks)}")
print(chunks[0])


In [None]:
# Step 2: Build embedding model
embedding_model = get_embedding_model()

In [None]:
# Step 3: Build vector store
texts = [doc.page_content for doc in chunks]
metadatas = [doc.metadata for doc in chunks]
vectorstore = build_vectorstore(texts, metadatas, embedding_model=embedding_model)


In [None]:
# Step 4: Save vector store locally
store_path = "../faiss_store"
save_vectorstore(vectorstore, path=store_path)

In [None]:
# Step 5: Load vector store and build QA chain
vs = load_vectorstore(path=store_path, embedding_model=embedding_model)


In [None]:
qa_chain = build_qa_chain(vs)

# Step 6: Ask a question and get answer
query = "What is this document about?"  # replace with your own
#print(qa_chain.prompt.format_prompt(question=query, context="...").to_string())

result = qa_chain(query)

print("Answer:\n", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print(" -", doc.metadata)