In [1]:
# project setup

import sys
import os
from pathlib import Path

PROJECT_ROOT = Path().resolve().parent
os.chdir(PROJECT_ROOT)
sys.path.insert(0, str(PROJECT_ROOT))

print("‚úÖ PROJECT_ROOT:", PROJECT_ROOT)


‚úÖ PROJECT_ROOT: C:\Users\rajku\Desktop\enterprise_knowledge_assistant


In [2]:
# import 

from pathlib import Path
import config

from src.ingestion.pdf_loader import ingest_pdf
from src.chunking.text_chunker import chunk_documents
from src.embeddings.embedding_utils import (
    load_embedding_model,
    generate_embeddings
)
from src.vector_store.faiss_store import (
    load_vector_store,
    add_to_faiss_index,
    save_vector_store
)


In [3]:
# load existing vector store or create a new one

faiss_index, documents = load_vector_store(config.VECTOR_STORE_PATH)

print(f"üìä Existing vectors: {faiss_index.ntotal}")


‚úÖ Vector store loaded from:
C:\Users\rajku\Desktop\enterprise_knowledge_assistant\data\processed\vector_store.pkl
üìä Existing vectors: 1737


In [5]:
# ingest new PDF documents

NEW_PDF = Path("data/documents/Kaushik2020.pdf")

new_documents = ingest_pdf(
    NEW_PDF,
    show_preview=False,
    verbose=True
)


‚úÖ Loading PDF: Kaushik2020.pdf
üìÑ Total pages loaded: 15


##### ‚ö†Ô∏è Change filename here when adding a new PDF

In [6]:
# chunk new documents

new_chunks = chunk_documents(
    new_documents,
    chunk_size=1000,
    chunk_overlap=200,
    preview_chunks=0,
    verbose=True
)

print(f"‚úÖ New chunks created: {len(new_chunks)}")


‚úÇÔ∏è Chunking started
Chunk size     : 1000
Chunk overlap  : 200
Total docs in  : 15
üì¶ Total chunks created: 60
‚úÖ New chunks created: 60


In [7]:
# embed new chunks

embedding_model = load_embedding_model(config.EMBEDDING_MODEL_NAME)

new_embeddings = generate_embeddings(
    documents=new_chunks,
    model=embedding_model,
    show_progress=True
)


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# incrementally update FAISS vector store

add_to_faiss_index(
    index=faiss_index,
    documents=documents,
    new_embeddings=new_embeddings,
    new_documents=new_chunks
)


‚ûï Added 60 new vectors to FAISS index
üìä Total vectors now: 1797


In [9]:
# save updated vector store

save_vector_store(
    index=faiss_index,
    documents=documents,
    save_path=config.VECTOR_STORE_PATH
)


üíæ Vector store saved at:
C:\Users\rajku\Desktop\enterprise_knowledge_assistant\data\processed\vector_store.pkl


#### üéâ New PDF added without rebuilding.