In [17]:
# notebooks/3_rag_pipeline_testing.ipynb

import os
import sys

# --- Environment Setup & Diagnostics (Crucial for ModuleNotFoundError) ---
# This block ensures the notebook's working directory and Python path are correctly set.

# 1. Change the current working directory to the project root.
# This is made more robust to handle cases where the notebook isn't run from 'notebooks/'
try:
    current_dir = os.getcwd()
    project_root_indicator = "my_rag_chatbot" # Your project's root folder name

    # Traverse up the directory tree until the project root is found
    while os.path.basename(current_dir) != project_root_indicator and current_dir != os.path.dirname(current_dir):
        current_dir = os.path.dirname(current_dir)

    if os.path.basename(current_dir) == project_root_indicator:
        os.chdir(current_dir)
        print(f"Current Working Directory set to project root: {os.getcwd()}")
    else:
        print(f"Warning: Could not automatically set working directory to '{project_root_indicator}'. Current CWD: {os.getcwd()}")
        print("Please ensure you open VS Code from the 'my_rag_chatbot' folder.")

except Exception as e:
    print(f"Error changing directory: {e}")

Current Working Directory set to project root: c:\Users\Saksham\Downloads\my_rag_chatbot


In [18]:

# 2. Add the current project root to the Python system path.
# This explicitly tells Python where to look for 'src' and other top-level packages.
project_root = os.path.abspath('.')
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added project root to sys.path: {project_root}")


In [19]:
# 3. Diagnostic checks: Print the Python executable and sys.path
# This helps verify which Python interpreter the notebook kernel is actually using
# and what paths it's searching.
print(f"Python Executable (Kernel): {sys.executable}")
print("\nPython System Path (sys.path):")
for p in sys.path:
    print(f"- {p}")

# IMPORTANT: Verify the Python Executable path. If it's not pointing to your '.venv'
# (e.g., 'C:\Users\Saksham\Downloads\my_rag_chatbot\.venv\Scripts\python.exe'),
# then the kernel is using the wrong environment. You MUST manually select the
# correct kernel in VS Code's Jupyter interface (top-right of the notebook).
if ".venv" not in sys.executable:
    print("\n----------------------------------------------------------------------")
    print("CRITICAL WARNING: The Jupyter kernel is NOT using your '.venv' environment!")
    print("The current kernel is: " + sys.executable)
    print("You MUST manually change the kernel in VS Code.")
    print("Steps:")
    print("1. Click on the kernel selector in the top-right of this notebook (e.g., 'Python 3.10.x').")
    print("2. From the dropdown, select 'Python Environments'.")
    print("3. Find and explicitly select the entry that points to your virtual environment:")
    print("   'Python 3.10.9 (.venv): C:\\Users\\Saksham\\Downloads\\my_rag_chatbot\\.venv\\Scripts\\python.exe'")
    print("   (The Python version might vary slightly, but '.venv' and the path are key).")
    print("4. Once selected, restart the kernel (click the circular arrow icon next to the kernel name).")
    print("5. Run all cells again.")
    print("----------------------------------------------------------------------")



Python Executable (Kernel): c:\Users\Saksham\Downloads\my_rag_chatbot\.venv\Scripts\python.exe

Python System Path (sys.path):
- c:\Users\Saksham\Downloads\my_rag_chatbot
- C:\Users\Saksham\AppData\Local\Programs\Python\Python310\python310.zip
- C:\Users\Saksham\AppData\Local\Programs\Python\Python310\DLLs
- C:\Users\Saksham\AppData\Local\Programs\Python\Python310\lib
- C:\Users\Saksham\AppData\Local\Programs\Python\Python310
- c:\Users\Saksham\Downloads\my_rag_chatbot\.venv
- 
- c:\Users\Saksham\Downloads\my_rag_chatbot\.venv\lib\site-packages
- c:\Users\Saksham\Downloads\my_rag_chatbot\.venv\lib\site-packages\win32
- c:\Users\Saksham\Downloads\my_rag_chatbot\.venv\lib\site-packages\win32\lib
- c:\Users\Saksham\Downloads\my_rag_chatbot\.venv\lib\site-packages\Pythonwin


In [20]:
# 4. (Optional, but recommended for stubborn ModuleNotFoundErrors):
# Directly install/re-verify langchain-community within this notebook's kernel.
# This is a last resort to ensure the package is present in the *active* kernel.
# You might see "Requirement already satisfied" if it's there.
print("\nAttempting to install/verify 'langchain-community' within the notebook kernel...")
# Removed the problematic comment from the shell command
!pip install langchain-community
!pip install --upgrade ipykernel # Ensure ipykernel is also up-to-date for the kernel
print("\nChecking installed 'langchain' related packages (if the correct kernel is active):")
# Use 'findstr' for Windows compatibility instead of 'grep'
!pip list | findstr /C:"langchain" /C:"sentence-transformers"


# --- End of Environment Setup & Diagnostics ---



Attempting to install/verify 'langchain-community' within the notebook kernel...

Checking installed 'langchain' related packages (if the correct kernel is active):


ERROR: Invalid requirement: '#': Expected package name at the start of dependency specifier
    #
    ^


langchain                 0.3.26
langchain-community       0.3.27
langchain-core            0.3.68
langchain-text-splitters  0.3.8
sentence-transformers     5.0.0


In [21]:
# --- End of Environment Setup & Diagnostics ---


# --- Import necessary modules from src and langchain ---
# These imports should now work after the environment setup.
from src.retriever import DocumentRetriever
from src.generator import ResponseGenerator
from langchain_core.documents import Document # For type hinting and creating dummy docs if needed

print("\nSuccessfully imported DocumentRetriever, ResponseGenerator, and Document!")


# --- Placeholder for RAG Pipeline Testing Logic ---
# You can now add your actual RAG pipeline testing code here.

def run_rag_pipeline_test():
    """
    Placeholder function for testing the RAG pipeline.
    Replace with your actual testing logic.
    """
    print("\n--- Running RAG Pipeline Test ---")

    # Example: Initialize retriever (replace with your actual initialization)
    # This might require loading your FAISS index and embeddings model
    try:
        # Assuming your retriever needs paths to vectordb and chunks
        # Adjust these paths based on your actual project structure and data location
        # For example, if vectordb is at project_root/vectordb/
        # and chunks are at project_root/chunks/
        retriever = DocumentRetriever(
            embeddings_model_name="sentence-transformers/all-MiniLM-L6-v2",
            faiss_index_path="vectordb/faiss_index.faiss",
            chunks_path="chunks/processed_chunks.pkl" # Assuming you save chunks as a pickle
        )
        print("DocumentRetriever initialized successfully.")

        # Example: Perform a retrieval
        query = "What is the main topic of the documents?"
        retrieved_docs = retriever.retrieve_documents(query, k=3)
        print(f"\nRetrieved documents for query: '{query}')")
        for i, doc in enumerate(retrieved_docs):
            print(f"  Doc {i+1}: {doc.page_content[:100]}...") # Print first 100 chars
            print(f"  Source: {doc.metadata.get('source', 'N/A')}")

        # Example: Initialize generator (replace with your actual initialization)
        # Assuming your generator needs an LLM
        generator = ResponseGenerator(llm_model_name="gpt-3.5-turbo") # Or your local LLM
        print("\nResponseGenerator initialized successfully.")

        # Example: Generate a response
        context = " ".join([doc.page_content for doc in retrieved_docs])
        question = "Summarize the key information from the retrieved documents."
        response = generator.generate_response(question, context)
        print(f"\nGenerated Response for question: '{question}'")
        print(response)

    except Exception as e:
        print(f"\nAn error occurred during RAG pipeline testing: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for detailed error info

# Run the test pipeline
if __name__ == "__main__":
    run_rag_pipeline_test()



Successfully imported DocumentRetriever, ResponseGenerator, and Document!

--- Running RAG Pipeline Test ---

An error occurred during RAG pipeline testing: DocumentRetriever.__init__() got an unexpected keyword argument 'embeddings_model_name'


Traceback (most recent call last):
  File "C:\Users\Saksham\AppData\Local\Temp\ipykernel_45552\3976097940.py", line 30, in run_rag_pipeline_test
    retriever = DocumentRetriever(
TypeError: DocumentRetriever.__init__() got an unexpected keyword argument 'embeddings_model_name'
