In [19]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS 
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import glob

In [20]:
Base_dir = os.getcwd()
DATA_DIR = os.path.join(Base_dir, '..', 'data', 'PDF')
DATA_DIR = os.path.abspath(DATA_DIR)

In [21]:

# Global variables
vector_store = None
bm25_retriever = None
pdf_texts = {}


In [22]:

def setup_fast_rag():
    """Ultra-fast RAG setup - loads once, stores in memory"""
    global vector_store, bm25_retriever, pdf_texts
    
    if not os.path.exists(DATA_DIR):
        print(f"⚠️ PDF directory not found: {DATA_DIR}")
        print("Creating directory...")
        os.makedirs(DATA_DIR, exist_ok=True)
        print("Please add PDF files to this directory and restart.")
        return False
    
    pdf_files = glob.glob(os.path.join(DATA_DIR, "*.pdf"))
    if not pdf_files:
        print(f"⚠️ No PDF files found in {DATA_DIR}")
        print("Please add PDF files to this directory for knowledge base functionality.")
        return False
    
    print(f"📚 Loading {len(pdf_files)} PDF files into memory...")
    all_docs = []
    
    for pdf_file in pdf_files:
        try:
            print(f"  📄 Loading: {os.path.basename(pdf_file)}")
            loader = PyPDFLoader(pdf_file)
            docs = loader.load()
            
            full_text = " ".join([doc.page_content for doc in docs])
            pdf_texts[pdf_file] = full_text
            
            all_docs.extend(docs)
        except Exception as e:
            print(f"⚠️ Error loading {pdf_file}: {e}")
            continue
    
    if not all_docs:
        print("⚠️ No documents could be loaded")
        return False
    
    try:
        print("🔄 Processing documents...")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = text_splitter.split_documents(all_docs)
        
        print("🔄 Creating embeddings...")
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vector_store = FAISS.from_documents(chunks, embeddings)
        
        print("🔄 Setting up BM25 retriever...")
        bm25_retriever = BM25Retriever.from_documents(chunks)
        bm25_retriever.k = 15
        
        save_path = os.path.join(DATA_DIR, "vector_store_faiss")
        vector_store.save_local(save_path)
        print(f"✅ FAISS index saved at {save_path}")
        return True
        
    except Exception as e:
        print(f"❌ Error setting up RAG system: {e}")
        return False


In [23]:
setup_fast_rag()

📚 Loading 4 PDF files into memory...
  📄 Loading: car_knwoledge.pdf
  📄 Loading: Car_Maintencance_Guide.pdf
  📄 Loading: Crawfords_Auto_Repair_Guide.pdf
  📄 Loading: TheDriversGuidetoAutomotiveMaintenance.pdf
🔄 Processing documents...
🔄 Creating embeddings...


  from .autonotebook import tqdm as notebook_tqdm
  return forward_call(*args, **kwargs)


🔄 Setting up BM25 retriever...
✅ FAISS index saved at c:\Users\ibrah\Desktop\Agentic_Car\data\PDF\vector_store_faiss


True

In [24]:
def load_vector_store():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    save_path = os.path.join(DATA_DIR, "vector_store_faiss")
    if os.path.exists(save_path):
        print(f"📂 Loading existing FAISS index from {save_path}")
        return FAISS.load_local(save_path, embeddings, allow_dangerous_deserialization=True)
    else:
        print("⚠️ No saved index found, please run setup_fast_rag() once to build it.")
        return None

In [25]:
load_vector_store()

📂 Loading existing FAISS index from c:\Users\ibrah\Desktop\Agentic_Car\data\PDF\vector_store_faiss


<langchain_community.vectorstores.faiss.FAISS at 0x211d253bc50>

In [26]:
def debug_retriever(query, top_k=5):
    global vector_store
    if not vector_store:
        return "❌ Vector store not loaded."

    retriever = vector_store.as_retriever(search_kwargs={"k": top_k})
    results = retriever.get_relevant_documents(query)

    print(f"🔍 Query: {query}")
    print(f"✅ Retrieved {len(results)} chunks")

    for i, doc in enumerate(results, 1):
        src = os.path.basename(doc.metadata.get("source", "unknown.pdf"))
        page = doc.metadata.get("page", "N/A")
        snippet = doc.page_content[:300].replace("\n", " ") + "..."
        print(f"\n[{i}] {src} (page {page})")
        print(f"👉 {snippet}")

    return results


In [27]:
debug_retriever("how to change a tire")

🔍 Query: how to change a tire
✅ Retrieved 5 chunks

[1] Crawfords_Auto_Repair_Guide.pdf (page 44)
👉 Every vehicle has instructions for using the jack and other tools as necessary to change  the tire. Some of the details can vary by make and model, such as the storage location  for the jack and spare tire. Consult the owners manual of the vehicle. Also, there will  often be a sticker with instruct...

[2] Car_Maintencance_Guide.pdf (page 1)
👉 Step 1 Step 2 Step 3 Step 4 Locate the tire’s valve stem. Using a tire pressure gauge, hold the gauge down on the valve stem to get the  tire’s pressure reading. Perform this step for all of the tires on your vehicle. Compare the tire pressure to the information on the sticker located on the  driver...

[3] Car_Maintencance_Guide.pdf (page 7)
👉 Teen Driver Car Maintenance and Repair Guide Vehicle Maintenance and Repair Instructions 2. How to change a flat tire For help, view pictures of how to change a flat tire. A flat tire can occur anywhere and

  results = retriever.get_relevant_documents(query)
  return forward_call(*args, **kwargs)


[Document(id='6954e1cc-aef0-46c1-9b3b-b182b5548628', metadata={'producer': 'RAD PDF 2.20.2.0 - http://www.radpdf.com', 'creator': 'RAD PDF', 'creationdate': '2014-02-19T13:47:01-07:00', 'moddate': '2014-03-05T16:45:35-08:00', 'source': 'c:\\Users\\ibrah\\Desktop\\Agentic_Car\\data\\PDF\\Crawfords_Auto_Repair_Guide.pdf', 'total_pages': 84, 'page': 44, 'page_label': '45'}, page_content='Every vehicle has instructions for using the jack and other tools as necessary to change \nthe tire. Some of the details can vary by make and model, such as the storage location \nfor the jack and spare tire. Consult the owner\x01s manual of the vehicle. Also, there will \noften be a sticker with instructions located with the jack and spare tire.'),
 Document(id='10d56e99-4422-4420-a6fe-e74d125be88e', metadata={'producer': 'Adobe PDF Library 9.9', 'creator': 'Adobe InDesign CS5 (7.0)', 'creationdate': '2011-09-13T12:26:50-07:00', 'moddate': '2011-09-13T12:27:13-07:00', 'trapped': '/False', 'source': 'c:\\

In [29]:
def fast_rag_search(query, top_k=5, debug=True):
    """Hybrid RAG search with semantic + keyword results and debug mode"""
    global vector_store, bm25_retriever
    
    print(f"\n🔍 Searching PDF knowledge base for: '{query}'")
    
    if vector_store is None or bm25_retriever is None:
        return "❌ RAG system not initialized. Please check if PDF files are loaded correctly."
    
    try:
        faiss_retriever = vector_store.as_retriever(search_kwargs={"k": top_k})
        ensemble = EnsembleRetriever(
            retrievers=[bm25_retriever, faiss_retriever],
            weights=[0.4, 0.6]
        )
        
        docs = ensemble.get_relevant_documents(query)
        if not docs:
            print("❌ No relevant information found in PDFs")
            return "The PDF documents do not contain specific information about this topic."
        
        print(f"✅ Found {len(docs)} relevant chunks, showing top {min(3, len(docs))}")
        
        snippets = []
        for i, doc in enumerate(docs[:3], 1):
            source = os.path.basename(doc.metadata.get('source', 'unknown.pdf'))
            page = doc.metadata.get('page', 'N/A')
            content = doc.page_content.strip().replace("\n", " ")
            
            if debug:
                print(f"\n--- Chunk {i} ---")
                print(f"Source: {source}, Page: {page}")
                print(f"Content Preview: {content[:300]}...")  # only show first 300 chars
            
            snippets.append(f"📄 [{source} - page {page}]\n{content}")
        
        return "\n\n".join(snippets)
        
    except Exception as e:
        return f"❌ Error searching documents: {str(e)}"


In [30]:
print(fast_rag_search("how to change a tire", debug=True))



🔍 Searching PDF knowledge base for: 'how to change a tire'
✅ Found 18 relevant chunks, showing top 3

--- Chunk 1 ---
Source: Car_Maintencance_Guide.pdf, Page: 7
Content Preview: Teen Driver Car Maintenance and Repair Guide Vehicle Maintenance and Repair Instructions 2. How to change a flat tire For help, view pictures of how to change a flat tire. A flat tire can occur anywhere and at anytime, so knowing how to change a flat tire is  essential part of your vehicle repair an...

--- Chunk 2 ---
Source: Crawfords_Auto_Repair_Guide.pdf, Page: 45
Content Preview: Crawfords Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com  41  Standard spare tire kit with spare tire, lug wrench, and tire jack. This particular kit does not include chocks  for the other tires. Notice how the spare tire is smaller than the regular tire. It is only mean...

--- Chunk 3 ---
Source: Crawfords_Auto_Repair_Guide.pdf, Page: 44
Content Preview: Every vehicle has instructions for using the

  return forward_call(*args, **kwargs)
