# BCG Multi-Agent & Multimodal AI Platform - RAG Implementation

This notebook demonstrates the Retrieval-Augmented Generation (RAG) components of the BCG Multi-Agent & Multimodal AI Platform, including:
1. Setting up advanced retrievers for document retrieval
2. Building effective prompts for the LLM
3. Generating accurate responses using the RAG pipeline
4. Testing various RAG use cases with BCG Sustainability Reports

In [None]:
import os
import sys
import logging
from pathlib import Path

# Add the project root to the Python path
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

## 1. Setup and Configuration

In [None]:
from configs.config import (
    RAW_DATA_DIR,
    PROCESSED_DATA_DIR,
    EMBEDDINGS_DIR,
    VECTOR_STORE_PATH,
    LLM_MODEL,
    OLLAMA_BASE_URL,
)

# Check if we have the necessary directories and vector store
print(f"Vector store path: {VECTOR_STORE_PATH}")
vector_store_exists = VECTOR_STORE_PATH.exists() and any(VECTOR_STORE_PATH.iterdir())
print(f"Vector store exists: {vector_store_exists}")

# Check if Ollama is available
import requests
try:
    response = requests.get(f"{OLLAMA_BASE_URL}/api/tags")
    if response.status_code == 200:
        print(f"Ollama is available at {OLLAMA_BASE_URL}")
        models = response.json().get("models", [])
        if models:
            print(f"Available models: {[model['name'] for model in models]}")
        else:
            print("No models found in Ollama")
    else:
        print(f"Ollama API returned status code {response.status_code}")
except Exception as e:
    print(f"Error connecting to Ollama: {str(e)}")
    print(f"Please ensure Ollama is running at {OLLAMA_BASE_URL} with the {LLM_MODEL} model loaded")

## 2. Initialize Embeddings Manager

First, we need to set up the embeddings manager that will handle our vector store operations.

In [None]:
from src.embeddings.embeddings_manager import EmbeddingsManager

# Initialize embeddings manager
embeddings_manager = EmbeddingsManager(
    model_name="all-MiniLM-L6-v2",
    model_type="local",
    vector_store_dir=str(VECTOR_STORE_PATH),
    collection_name="bcg_sustainability_reports",
    cache_folder=str(EMBEDDINGS_DIR / "models"),
)

# Get vector store statistics
stats = embeddings_manager.get_statistics()
print("Embeddings Manager Statistics:")
for key, value in stats.items():
    print(f" - {key}: {value}")

## 3. Test Document Retrieval

Let's test the document retrieval capability to ensure we can fetch relevant chunks.

In [None]:
# Test simple retrieval
query = "What are BCG's climate commitments?"
search_results = embeddings_manager.search(query, k=3, use_mmr=True)

print(f"Query: '{query}'")
print(f"Retrieved {len(search_results)} documents\n")

for i, doc in enumerate(search_results):
    print(f"Document {i+1}:")
    print(f"Source: {doc.metadata.get('filename', 'Unknown')}")
    print(f"Content: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"Content: {doc.page_content}")
    print()

## 4. Initialize Response Generator

Now, let's set up the response generator that will use the LLM to generate responses.

In [None]:
from src.rag.generator import ResponseGenerator

# Initialize the response generator using Ollama
response_generator = ResponseGenerator(
    model_name=LLM_MODEL,
    model_type="ollama",
    temperature=0.2,
    streaming=True,  # Set to True to see real-time generation
    ollama_base_url=OLLAMA_BASE_URL,
)

# Test the response generator with a simple prompt
test_prompt = """You are an AI assistant specialized in analyzing BCG Sustainability Reports.
Please give a brief introduction of yourself in 3 sentences."""

print("Testing response generator...\n")
response = response_generator.generate_response(test_prompt)
print("\nResponse:\n")
print(response)

## 5. Test Prompt Builder

Let's test the prompt builder to see how it structures prompts for the LLM.

In [None]:
from src.rag.prompt_builder import PromptBuilder

# Initialize the prompt builder
prompt_builder = PromptBuilder(
    include_source_documents=True,
    max_context_length=8000,
)

# Build a RAG prompt using the retrieved documents
query = "What are BCG's commitments to achieve net zero?"
search_results = embeddings_manager.search(query, k=3, use_mmr=True)

rag_prompt = prompt_builder.build_rag_prompt(
    query=query,
    documents=search_results,
)

print("RAG Prompt Preview:")
print(rag_prompt.messages[0].content[:1000] + "...")

## 6. Set Up Enhanced Retriever

Now let's set up the enhanced retriever and test its retrieval capabilities.

In [None]:
from src.rag.retriever import EnhancedRetriever, MultiQueryRetriever

# Create an enhanced retriever
enhanced_retriever = EnhancedRetriever(
    embeddings_manager=embeddings_manager,
    search_kwargs={"k": 3},
    use_mmr=True,
    fetch_k=10,
    lambda_mult=0.7,
)

# Test the enhanced retriever
query = "What progress has BCG made in reducing its carbon emissions?"
docs = enhanced_retriever.get_relevant_documents(query)

print(f"Enhanced retriever found {len(docs)} documents for query: '{query}'\n")
for i, doc in enumerate(docs):
    print(f"Document {i+1}:")
    print(f"Source: {doc.metadata.get('filename', 'Unknown')}")
    print(f"Content: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"Content: {doc.page_content}")
    print()

## 7. Test Multi-Query Retriever

Let's test the multi-query retriever, which generates variations of the original query to improve recall.

In [None]:
# Create a multi-query retriever
multi_query_retriever = MultiQueryRetriever(
    base_retriever=enhanced_retriever,
    llm=response_generator.get_llm(),
    num_queries=3,
)

# Test the multi-query retriever
query = "How does BCG support diversity and inclusion?"
docs = multi_query_retriever.get_relevant_documents(query)

print(f"Multi-query retriever found {len(docs)} documents for query: '{query}'\n")
for i, doc in enumerate(docs):
    print(f"Document {i+1}:")
    print(f"Source: {doc.metadata.get('filename', 'Unknown')}")
    print(f"Content: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"Content: {doc.page_content}")
    print()

## 8. Test Complete RAG Pipeline

Now let's test the complete RAG pipeline, which combines retrieval, prompt building, and response generation.

In [None]:
from src.rag.rag_pipeline import RAGPipeline

# Initialize the RAG pipeline
rag_pipeline = RAGPipeline(
    embeddings_manager=embeddings_manager,
    response_generator=response_generator,
    retriever_type="enhanced",
    use_multi_query=False,  # Start with simple retrieval
    include_sources=True,
    max_sources=4,
)

# Test the RAG pipeline with a query
query = "What are BCG's key achievements in sustainability over the last three years?"
response = rag_pipeline.query(query)

print(f"Query: '{query}'\n")
print("Response:")
print(response.response)

print("\nSource Documents:")
for i, doc in enumerate(response.source_documents[:3]):  # Show first 3 sources
    print(f"Source {i+1}: {doc.metadata.get('filename', 'Unknown')}")

## 9. Compare RAG Pipeline with Different Retrievers

Let's compare the performance of the RAG pipeline with different retriever configurations.

In [None]:
# Initialize the RAG pipeline with multi-query retriever
multi_query_rag_pipeline = RAGPipeline(
    embeddings_manager=embeddings_manager,
    response_generator=response_generator,
    retriever_type="enhanced",
    use_multi_query=True,  # Enable multi-query retrieval
    include_sources=True,
    max_sources=4,
)

# Test the multi-query RAG pipeline with the same query
query = "What specific steps is BCG taking to reduce scope 3 emissions?"

print("Testing standard RAG pipeline...")
standard_response = rag_pipeline.query(query)

print("\nTesting multi-query RAG pipeline...")
multi_query_response = multi_query_rag_pipeline.query(query)

print("\n--- Standard RAG Response ---")
print(standard_response.response)

print("\n--- Multi-Query RAG Response ---")
print(multi_query_response.response)

## 10. Generate Comparisons Across Years

Let's test the RAG pipeline's ability to compare information across different BCG sustainability reports.

In [None]:
# Test yearly comparison
topic = "carbon emission reduction targets"
comparison_response = rag_pipeline.compare_across_years(topic)

print(f"Comparison of '{topic}' across years:\n")
print(comparison_response.response)

## 11. Extract Key Metrics and Facts

Let's test the RAG pipeline's ability to extract key metrics and facts from the BCG sustainability reports.

In [None]:
# Extract metrics
metrics_response = rag_pipeline.extract_facts(fact_type="metrics")

print("Extracted Metrics:\n")
print(metrics_response.response)

In [None]:
# Extract commitments
commitments_response = rag_pipeline.extract_facts(fact_type="commitments")
    
print("Extracted Commitments:\n")
print(commitments_response.response)

## 12. Generate a Comprehensive Summary

Let's test the RAG pipeline's ability to generate a comprehensive summary of the BCG sustainability reports.

In [None]:
# Generate a general summary
summary_response = rag_pipeline.generate_summary()

print("Summary of BCG Sustainability Reports:\n")
print(summary_response.response)

In [None]:
# Generate a focused summary on a specific topic
topic = "diversity, equity, and inclusion"
focused_summary_response = rag_pipeline.generate_summary(topic=topic)

print(f"Summary of BCG's {topic} initiatives:\n")
print(focused_summary_response.response)

## 13. Test RAG Pipeline with Specific Filters

Let's test the RAG pipeline with specific filters to focus on particular documents or years.

In [None]:
# Test filter by document (assuming we have multiple documents)
import re

# Find documents from specific years
query = "BCG's climate and sustainability initiatives"
all_docs = embeddings_manager.search(query, k=10)

# Extract unique document IDs
doc_ids = set()
for doc in all_docs:
    if "document_id" in doc.metadata:
        doc_ids.add(doc.metadata["document_id"])

print(f"Found documents with IDs: {doc_ids}\n")

# If we have at least one document ID, test filtering
if doc_ids:
    doc_id = list(doc_ids)[0]
    print(f"Testing RAG pipeline with filter for document ID: {doc_id}")
    
    filter_criteria = {"document_id": doc_id}
    filtered_response = rag_pipeline.query(
        query="What are BCG's commitments to sustainability?",
        filter_criteria=filter_criteria,
    )
    
    print("\nFiltered Response:")
    print(filtered_response.response)
else:
    print("No document IDs found for filtering test.")

## 14. Test Complex Queries

Let's test the RAG pipeline with more complex, multi-part queries to evaluate its capabilities.

In [None]:
# Test a complex query
complex_query = """How has BCG's approach to climate action evolved over the last three years, 
and what specific achievements or initiatives have they implemented? 
Also, how does this compare to their diversity and inclusion efforts during the same period?"""

complex_response = rag_pipeline.query(complex_query)

print(f"Complex Query: '{complex_query}'\n")
print("Response:")
print(complex_response.response)

## 15. Evaluation of RAG Performance

Let's evaluate the performance of our RAG pipeline with some basic metrics.

In [None]:
# Define a set of test queries
test_queries = [
    "What are BCG's carbon emission reduction targets?",
    "How does BCG approach diversity and inclusion?",
    "What community engagement initiatives does BCG participate in?",
    "How is BCG supporting climate action with its clients?",
    "What is BCG's strategy for responsible business practices?"
]

# Set up a simple evaluation function
def evaluate_response(query, response):
    """Simple evaluation function to assess response quality."""
    # Check if sources are cited
    has_citations = "[Document" in response and "]" in response
    
    # Check for common hallucination indicators
    hallucination_indicators = [
        "I don't have enough information",
        "not mentioned in the provided",
        "cannot provide specific",
        "don't have access to"
    ]
    potential_hallucination = any(indicator in response for indicator in hallucination_indicators)
    
    # Check response length as a basic measure
    word_count = len(response.split())
    
    return {
        "query": query,
        "has_citations": has_citations,
        "potential_hallucination": potential_hallucination,
        "word_count": word_count,
    }

# Run evaluations
evaluation_results = []
for query in test_queries:
    print(f"Testing query: '{query}'")
    response_obj = rag_pipeline.query(query)
    result = evaluate_response(query, response_obj.response)
    result["source_count"] = len(response_obj.source_documents)
    evaluation_results.append(result)
    print(f"Result: {len(response_obj.source_documents)} sources, {result['word_count']} words")
    print("-" * 50)

# Display summary
import pandas as pd
eval_df = pd.DataFrame(evaluation_results)
display(eval_df)

# Calculate summary statistics
print("\nEvaluation Summary:")
print(f"Average word count: {eval_df['word_count'].mean():.1f}")
print(f"Percentage with citations: {(eval_df['has_citations'].sum() / len(eval_df) * 100):.1f}%")
print(f"Percentage with potential hallucinations: {(eval_df['potential_hallucination'].sum() / len(eval_df) * 100):.1f}%")

## 16. Conclusion

In this notebook, we've successfully demonstrated the RAG components of the BCG Multi-Agent & Multimodal AI Platform. We've shown how the system can retrieve relevant information from BCG Sustainability Reports and generate accurate, contextually relevant responses to various queries.

Key accomplishments include:
1. Setting up enhanced retrievers for effective document retrieval
2. Building structured prompts for the LLM
3. Generating accurate responses using the complete RAG pipeline
4. Testing various RAG use cases including comparisons, summaries, and fact extraction
5. Evaluating the performance of the RAG system

These components form the foundation of our multi-agent system, which will be implemented in the next phase of the project.