# RAG Question Answering Demonstration

This notebook allows you to ask a question against a knowledge base stored in ChromaDB.

**Functionality:**
1. Takes your question as input.
2. Automatically finds the correct ChromaDB collection based on parameters in `config.json` (language, chunk size, overlap size).
3. Retrieves the most relevant context chunks from the database.
4. Displays the retrieved context.
5. **Optionally:** Queries a specified LLM (from `config.json`) with the question and context.
6. **Optionally:** Evaluates the LLM's answer against an expected answer you provide.

**Instructions:**
1. **Run Location:** Make sure you run this notebook from the `p_llm_manual/RAG` directory so it can find the necessary modules (`utils`, `llm_connectors`, etc.) and the `config.json` file.
2. **Configure Below:** Modify the variables in the 'User Configuration' cell below (especially `your_question`).
3. **Run Cells:** Execute the cells sequentially.

In [40]:
# --- Imports and Path Setup ---
import os
import sys
import json
import chromadb
import pathlib
from typing import Optional, List, Dict, Any

# Add the project root directory (p_llm_manual/RAG) to the Python path
print(f"Original working directory: {os.getcwd()}")
project_root = pathlib.Path(os.getcwd()).resolve() # Assumes notebook is run from RAG directory
if project_root.name != 'RAG':
    print("WARNING: Notebook might not be running from the 'p_llm_manual/RAG' directory. Trying to adjust path...")
    # Attempt to find the RAG directory if nested
    current_path = project_root
    while current_path.name != 'RAG' and current_path.parent != current_path:
        current_path = current_path.parent
    if current_path.name == 'RAG':
        project_root = current_path
        print(f"Adjusted project root to: {project_root}")
    else:
        print(f"ERROR: Could not reliably determine the project root ('RAG' directory). Imports might fail.")
        # Fallback to assuming current dir's parent is RAG if structure is known
        # project_root = pathlib.Path(os.getcwd()).resolve().parent # Example adjustment

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    print(f"Added '{project_root}' to sys.path")
else:
    print(f"'{project_root}' already in sys.path")

# --- Project Imports ---
try:
    from utils.config_loader import ConfigLoader
    from llm_connectors.llm_connector_manager import LLMConnectorManager
    from llm_connectors.base_llm_connector import BaseLLMConnector
    from retrieval_pipelines.embedding_retriever import EmbeddingRetriever
    from evaluation.evaluator import Evaluator
    print("Successfully imported project modules.")
except ImportError as e:
    print(f"ERROR: Failed to import necessary project modules: {e}")
    print(f"Project Root: {project_root}")
    print(f"Sys Path: {sys.path}")
    print("Ensure the notebook is run from the 'p_llm_manual/RAG' directory or the path setup is correct.")
    # Raise the error to stop execution if imports fail
    raise

# --- Helper Function (from ask_question.py) ---
def find_llm_type(
    model_name: str, llm_configs: Dict[str, Dict[str, Any]]
) -> Optional[str]:
    """Finds the type ('ollama', 'gemini', etc.) of a given model name from the config."""
    for type_key, models in llm_configs.items():
        if model_name in models:
            return type_key
    return None

# --- Constants ---
DEFAULT_CONFIG_NAME = "config.json"
DEFAULT_DB_DIR_NAME = "chroma_db"
DEFAULT_EVALUATOR_LLM_TYPE = "ollama" # Fallback

Original working directory: c:\Users\pc.WINS-22MAIN\p_llm_manual\RAG
'C:\Users\pc.WINS-22MAIN\p_llm_manual\RAG' already in sys.path
Successfully imported project modules.


In [None]:
# --- User Configuration ---

# 1. The question you want to ask
your_question = input("Enter your question: ") # Get question interactively
# Example: your_question = "What is the maximum pressure?"

# 2. (Optional) Specify the LLM to use for answering (must be a key in config.json -> llm_models)
# Set to None to only retrieve context without querying an LLM.
llm_model_to_use = "qwen2.5_7B-128k" # Example: Use qwen2.5_7B-128k
# llm_model_to_use = None # Example: Only retrieve context

# 3. (Optional) Set to True if you want to evaluate the LLM's answer (requires llm_model_to_use to be set)
evaluate_answer_flag = True # Example: Evaluate the answer
# evaluate_answer_flag = False # Example: Do not evaluate

# 4. Configuration file name (relative to project root)
config_file_name = DEFAULT_CONFIG_NAME
# config_file_name = "config_fast.json" # If using a different config

# 5. ChromaDB directory name (relative to project root)
db_dir_name = DEFAULT_DB_DIR_NAME

# --- Print chosen configuration ---
print("--- Configuration Summary ---")
print(f"Question: {your_question}")
print(f"LLM Model for Answering: {llm_model_to_use if llm_model_to_use else 'None (Context Retrieval Only)'}")
print(f"Evaluate Answer: {evaluate_answer_flag if llm_model_to_use else 'N/A'}")
print(f"Config File: {config_file_name}")
print(f"Database Directory: {db_dir_name}")
print("---------------------------")

--- Configuration Summary ---
Question: What is the hopper weight without hopper cover in kg of the Exacta-TLX GEOSPREAD 3225?
LLM Model for Answering: qwen2.5_7B-128k
Evaluate Answer: True
Config File: config.json
Database Directory: chroma_db
---------------------------


In [42]:
# --- Load Configuration and Determine Parameters ---
print("[INFO] Loading configuration...")
config = None
rag_params = {}
language_configs = []
llm_models_config = {}
target_language = None
collection_base_name = None
chunk_size = None
overlap_size = None
top_k = 3 # Default top-k
dynamic_collection_name = None

config_path = project_root / config_file_name
db_path = project_root / db_dir_name

try:
    if not config_path.exists():
        raise FileNotFoundError(f"Configuration file not found: {config_path}")

    config_loader = ConfigLoader(str(config_path))
    config = config_loader.config
    rag_params = config_loader.get_rag_parameters()
    language_configs = config.get("language_configs", [])
    llm_models_config = config.get("llm_models", {})
    print(f"[INFO] Configuration loaded successfully from {config_path}")

    # Determine Language (using first in config as default)
    if language_configs:
        lang_config = language_configs[0] # Use the first language defined
        target_language = lang_config.get("language")
        collection_base_name = lang_config.get("collection_base_name")
        if not target_language or not collection_base_name:
            raise ValueError("First language_config entry is missing 'language' or 'collection_base_name'.")
        print(f"[INFO] Using language from config: '{target_language}'")
    else:
        raise ValueError("No 'language_configs' found in configuration.")

    # Determine Chunk Size (using first from config's rag_parameters)
    chunk_sizes_in_config = rag_params.get("chunk_sizes_to_test", [])
    if chunk_sizes_in_config:
        chunk_size = chunk_sizes_in_config[0]
        print(f"[INFO] Using chunk size from config: {chunk_size}")
    else:
        raise ValueError("'chunk_sizes_to_test' not found or empty in config's rag_parameters.")

    # Determine Overlap Size (using first from config's rag_parameters)
    overlap_sizes_in_config = rag_params.get("overlap_sizes_to_test", [])
    if overlap_sizes_in_config:
        overlap_size = overlap_sizes_in_config[0]
        print(f"[INFO] Using overlap size from config: {overlap_size}")
    else:
        raise ValueError("'overlap_sizes_to_test' not found or empty in config's rag_parameters.")

    # Determine Top K (using value from config's rag_parameters or default)
    top_k_config = rag_params.get("num_retrieved_docs")
    if top_k_config is not None:
        top_k = top_k_config
        print(f"[INFO] Using top-k from config: {top_k}")
    else:
        print(f"[INFO] 'num_retrieved_docs' not found in config, using default top-k: {top_k}")

    # Construct Collection Name
    dynamic_collection_name = f"{collection_base_name}_cs{chunk_size}_os{overlap_size}"
    print(f"[INFO] Target ChromaDB collection name: '{dynamic_collection_name}'")

except Exception as e:
    print(f"[ERROR] Failed to load configuration or determine parameters: {e}")
    # Stop execution if config fails
    raise

[INFO] Loading configuration...
[INFO] Configuration loaded successfully from C:\Users\pc.WINS-22MAIN\p_llm_manual\RAG\config.json
[INFO] Using language from config: 'english'
[INFO] Using chunk size from config: 200
[INFO] Using overlap size from config: 100
[INFO] Using top-k from config: 3
[INFO] Target ChromaDB collection name: 'english_manual_cs200_os100'


In [43]:
# --- Retrieve Context ---
print(f"[INFO] Attempting to connect to ChromaDB at: {db_path}")
retrieved_docs = []
retrieved_distances = []
context_string = ""
collection = None

try:
    if not db_path.exists() or not db_path.is_dir():
        raise FileNotFoundError(f"ChromaDB directory not found: {db_path}. Please ensure the database has been created.")

    # Initialize ChromaDB Client
    chroma_client = chromadb.PersistentClient(path=str(db_path))
    print("[INFO] ChromaDB client initialized.")

    # Try to get the collection directly - this is the check for existence
    print(f"[INFO] Attempting to get collection: '{dynamic_collection_name}'")
    try:
        collection = chroma_client.get_collection(name=dynamic_collection_name)
        print(f"[INFO] Successfully connected to collection '{dynamic_collection_name}'. It contains {collection.count()} items.")
    except Exception as get_collection_error:
        # Collection likely doesn't exist or another error occurred
        print(f"[DEBUG] Failed to get collection '{dynamic_collection_name}': {get_collection_error}") # Log the specific error
        # Try listing collections *only* for the error message
        available_collections_str = "(Could not list collections)"
        try:
            existing_collections_objects = chroma_client.list_collections()
            existing_collection_names = [col.name for col in existing_collections_objects]
            available_collections_str = ", ".join(existing_collection_names) if existing_collection_names else "None"
        except Exception as list_error:
            print(f"[WARNING] Also failed to list collections after failing to get one: {list_error}")
        
        # Raise a clear, user-friendly error including the original error
        raise ValueError(f"Collection '{dynamic_collection_name}' not found or could not be accessed in the database at {db_path}. Available collections: [{available_collections_str}]. Ensure it was created with the correct language, chunk size ({chunk_size}), and overlap ({overlap_size}). Original error: {get_collection_error}")

    # --- If collection was successfully retrieved, proceed --- 

    # Initialize Retriever and Vectorize Question
    print("[INFO] Initializing embedding retriever...")
    # TODO: Make embedding model configurable if needed, aligning with create_databases.py
    retriever = EmbeddingRetriever() # Uses default model
    print("[INFO] Vectorizing question...")
    question_embedding = retriever.vectorize_text(your_question)

    # Ensure correct embedding format for ChromaDB query (List[List[float]])
    if isinstance(question_embedding, list) and len(question_embedding) == 1 and isinstance(question_embedding[0], list):
        query_vector = question_embedding[0]
    else:
        # Should not happen with current EmbeddingRetriever, but good practice
        raise TypeError("Unexpected embedding format received from retriever.")
    print("[INFO] Question vectorized successfully.")

    # Retrieve Context
    print(f"[INFO] Retrieving top {top_k} relevant documents...")
    results = collection.query(
        query_embeddings=[query_vector], # Query expects List[List[float]]
        n_results=top_k,
        include=["documents", "distances"] # Include distances for info
    )
    print("[INFO] Retrieval complete.")

    # Extract results
    retrieved_docs = results.get("documents", [[]])[0]
    retrieved_distances = results.get("distances", [[]])[0]
    context_string = "\n\n".join(retrieved_docs)

except Exception as e:
    print(f"[ERROR] An error occurred during context retrieval: {e}")
    # Optionally re-raise to stop notebook execution
    # raise

# --- Display Context ---
print("\n" + "=" * 20 + " Retrieved Context " + "=" * 20)
if not retrieved_docs:
    print("No relevant documents found.")
else:
    for i, (doc, dist) in enumerate(zip(retrieved_docs, retrieved_distances)):
        print(f"\n--- Document {i + 1} (Distance: {dist:.4f}) ---")
        print(doc)
        print("-" * (len(f"--- Document {i + 1} (Distance: {dist:.4f}) ---")))
print("=" * 59) # Match length of header

[INFO] Attempting to connect to ChromaDB at: C:\Users\pc.WINS-22MAIN\p_llm_manual\RAG\chroma_db
[INFO] ChromaDB client initialized.
[INFO] Attempting to get collection: 'english_manual_cs200_os100'
[INFO] Successfully connected to collection 'english_manual_cs200_os100'. It contains 678 items.
[INFO] Initializing embedding retriever...


Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 241.53it/s]


[INFO] Vectorizing question...
[INFO] Question vectorized successfully.
[INFO] Retrieving top 3 relevant documents...
[INFO] Retrieval complete.


--- Document 1 (Distance: 0.6343) ---
Getting familiar with the machine
Technical specifications
                                Exacta-TLX GEOSPREAD                                                                                                1875                                                                                                2550                                                                                                3225                                                                                                3900
0                                            General                                                                                                                                                                                                                                                           

In [44]:
# --- (Optional) LLM Query ---
llm_answer = None

if llm_model_to_use:
    print(f"\n[INFO] Querying LLM: {llm_model_to_use}...")
    if not context_string:
        print("[WARNING] No context was retrieved. Querying LLM without context.")
        # context_string = "No context available." # Or handle as needed

    try:
        # Initialize LLM Manager
        llm_connector_manager = LLMConnectorManager(llm_models_config)

        # Determine LLM type
        llm_type = find_llm_type(llm_model_to_use, llm_models_config)
        if not llm_type:
            # Add more robust checking like in ask_question.py if needed
            raise ValueError(f"LLM '{llm_model_to_use}' not found in configuration under 'llm_models'.")
        print(f"[INFO] Determined LLM type: {llm_type}")

        # Get LLM Connector
        llm_connector = llm_connector_manager.get_connector(llm_type, llm_model_to_use)
        print(f"[INFO] LLM connector obtained for {llm_model_to_use}.")

        # Load Question Prompt
        question_prompt_template = config_loader.load_prompt_template("question_prompt")

        # Format Prompt
        formatted_prompt = question_prompt_template.format(
            context=context_string, question=your_question
        )
        # print(f"\n[DEBUG] Formatted Prompt:\n{formatted_prompt}\n") # Uncomment for debugging

        # Invoke LLM
        print("[INFO] Sending request to LLM...")
        llm_answer = llm_connector.invoke(formatted_prompt)
        print("[INFO] Received response from LLM.")

        # Display Answer
        print("\n--- LLM Answer ---")
        print(llm_answer)
        print("------------------")

    except FileNotFoundError as e:
        print(f"[ERROR] Prompt file error: {e}")
    except ValueError as e:
        print(f"[ERROR] Configuration or LLM setup error: {e}")
    except Exception as e:
        print(f"[ERROR] Error during LLM interaction: {e}")
        llm_answer = None # Ensure answer is None if error occurred
else:
    print("\n[INFO] LLM query skipped as no model was specified.")


[INFO] Querying LLM: qwen2.5_7B-128k...
[INFO] Determined LLM type: ollama
[INFO] LLM connector obtained for qwen2.5_7B-128k.
[INFO] Sending request to LLM...
[INFO] Received response from LLM.

--- LLM Answer ---
337.5 kg
------------------


In [45]:
# --- (Optional) Evaluate Answer ---
evaluation_result = None

if evaluate_answer_flag:
    if not llm_model_to_use:
        print("\n[INFO] Evaluation skipped: No LLM was used to generate an answer.")
    elif llm_answer is None:
        print("\n[INFO] Evaluation skipped: Failed to get an answer from the LLM.")
    else:
        print("\n--- Evaluating LLM Answer ---")
        try:
            expected_answer = input("Please provide the expected answer for evaluation: ")

            # Initialize Evaluator
            evaluator_model_name = config_loader.get_evaluator_model_name()
            if not evaluator_model_name:
                raise ValueError("Evaluator model name not found in config. Cannot evaluate.")

            evaluator_llm_type = find_llm_type(evaluator_model_name, llm_models_config)
            if not evaluator_llm_type:
                evaluator_llm_type = DEFAULT_EVALUATOR_LLM_TYPE
                print(f"[WARNING] Could not determine type for evaluator '{evaluator_model_name}'. Assuming '{evaluator_llm_type}'.")

            # Need LLM manager again if not created before (e.g., if LLM query cell was skipped but eval is true)
            if 'llm_connector_manager' not in locals():
                 llm_connector_manager = LLMConnectorManager(llm_models_config)

            evaluator_llm_connector = llm_connector_manager.get_connector(
                evaluator_llm_type, evaluator_model_name
            )
            evaluation_prompt_template = config_loader.load_prompt_template("evaluation_prompt")

            evaluator = Evaluator(evaluator_llm_connector, evaluation_prompt_template)
            print(f"[INFO] Evaluator initialized with model: {evaluator_model_name}")

            # Perform Evaluation
            print("[INFO] Sending request to Evaluator LLM...")
            evaluation_result = evaluator.evaluate_answer(
                question=your_question,
                model_answer=llm_answer,
                expected_answer=expected_answer,
            )
            print("[INFO] Received response from Evaluator LLM.")

            # Display Evaluation Result
            print(f"\n--- Evaluation Result (Expected: '{expected_answer}') ---")
            print(f"Judgment: {evaluation_result}")
            print("----------------------------------------------------")

        except FileNotFoundError as e:
            print(f"[ERROR] Evaluation prompt file error: {e}")
        except ValueError as e:
            print(f"[ERROR] Configuration or Evaluator setup error: {e}")
        except Exception as e:
            print(f"[ERROR] Error during evaluation: {e}")
else:
    print("\n[INFO] Evaluation skipped as requested.")


--- Evaluating LLM Answer ---
[INFO] Evaluator initialized with model: gemma3_12B-128k
[INFO] Sending request to Evaluator LLM...
[INFO] Received response from Evaluator LLM.

--- Evaluation Result (Expected: 'What is the hopper weight without hopper cover in kg of the Exacta-TLX GEOSPREAD 3225') ---
Judgment: no
----------------------------------------------------


## Notebook Finished

The script has completed its execution.