In [1]:
from __future__ import annotations

# Notebook 5: Agentic RAG with Qdrant

## 🎯 Objectives

In this notebook, you'll learn:
- Building intelligent RAG agents that can reason about queries
- Multi-step query planning and execution
- Dynamic retrieval strategies based on query analysis
- Tool use and function calling within RAG workflows
- Self-correcting retrieval with quality assessment
- Building conversational agents with memory
- Advanced techniques: query rewriting, result fusion, and answer synthesis

## 📋 Prerequisites

- Understanding of RAG (Retrieval-Augmented Generation) concepts
- Familiarity with LLM APIs (OpenAI, Anthropic, or local models)
- Collections from previous notebooks for knowledge base
- Basic understanding of agent frameworks

In [2]:
# Load environment variables from .env for Jupyter
try:
    from dotenv import load_dotenv, find_dotenv
    load_dotenv(find_dotenv(), override=False)
    print("🔐 Loaded environment from .env")
except Exception as e:
    print(f"⚠️ Could not load .env via python-dotenv: {e}")


🔐 Loaded environment from .env


In [3]:
import os
import json
import time
from typing import Dict, List, Tuple, Optional, Any
import numpy as np
import pandas as pd
from dataclasses import dataclass
from enum import Enum
import re
from collections import defaultdict

from utils import (
    get_qdrant_client, create_sample_dataset, search_dense, 
    search_hybrid_fusion, mmr_rerank, print_system_info
)

from qdrant_client.models import Filter, FieldCondition, MatchValue, Range

print_system_info()
print("\n🤖 Agentic RAG Workshop")

# Check for LLM API availability
print("\n🔑 Checking LLM API availability...")
openai_key = os.getenv("OPENAI_API_KEY")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")

if openai_key:
    print("✅ OpenAI API key found")
    LLM_PROVIDER = "openai"
elif anthropic_key:
    print("✅ Anthropic API key found")
    LLM_PROVIDER = "anthropic"
else:
    print("⚠️ No LLM API keys found. Using mock responses for demonstration.")
    LLM_PROVIDER = "mock"



🔧 System Information:
   Python: 3.9.6
   ✅ Qdrant Client: unknown
   ✅ NumPy: 2.0.2
   ✅ Pandas: 2.3.1
   ✅ Matplotlib: 3.9.4


   ✅ Scikit-learn: 1.6.1

🔧 Optional Dependencies:
   ✅ FastEmbed: 0.7.1
   ✅ OpenAI: 1.100.1


   ✅ Anthropic: 0.64.0

🔬 Environment: JupyterLab/Notebook detected

🤖 Agentic RAG Workshop

🔑 Checking LLM API availability...
✅ OpenAI API key found


In [4]:
# LLM clients
from typing import List, Dict, Any

# Optional SDKs
try:
    from openai import OpenAI
except Exception:
    OpenAI = None

class LLMClient:
    """Unified interface for LLM providers (OpenAI)."""
    def __init__(self, provider: str = "openai", model: str = "gpt-4o-mini"):
        self.provider = provider
        self.model = model
        if provider == "openai":
            if OpenAI is None:
                raise RuntimeError("OpenAI SDK not installed. Please `pip install openai`.")
            self.client = OpenAI()
        elif provider == "mock":
            self.client = None
        else:
            raise ValueError(f"Unsupported provider: {provider}")

    def chat_completion(self, messages: List[Dict[str, str]], temperature: float = 0.1) -> str:
        if self.provider == "openai":
            response = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=temperature,
            )
            return response.choices[0].message.content
        elif self.provider == "mock":
            # Return a minimal valid JSON for analysis steps
            return (
                '{"query_type":"simple_factual","complexity":"medium","requires_multi_step":false,'
                '"key_entities":[],"intent":"answer the question","search_strategy":"hybrid",'
                '"expected_answer_type":"factual","reasoning":"mock analysis"}'
            )
        raise ValueError(f"Unsupported provider: {self.provider}")

# Initialize LLM based on available keys
if 'LLM_PROVIDER' in globals() and LLM_PROVIDER == 'openai':
    llm = LLMClient(provider="openai")
else:
    # Fallback to mock so the notebook can run without real API keys
    llm = LLMClient(provider="mock")

## 📦 Install Dependencies (if needed)

In [5]:
# Uncomment if running in a fresh environment
# !pip install openai anthropic tiktoken fastembed

## 🧠 Agentic RAG Architecture

Traditional RAG: Query → Retrieve → Generate  
Agentic RAG: Query → Plan → Multi-step Retrieve → Reason → Generate → Validate

In [6]:
class QueryType(Enum):
    """Types of queries that require different retrieval strategies"""
    SIMPLE_FACTUAL = "simple_factual"
    COMPLEX_ANALYTICAL = "complex_analytical"
    COMPARATIVE = "comparative"
    MULTI_HOP = "multi_hop"
    PROCEDURAL = "procedural"
    CONVERSATIONAL = "conversational"

@dataclass
class RetrievalPlan:
    """Plan for multi-step retrieval"""
    query_type: QueryType
    steps: List[Dict[str, Any]]
    search_strategy: str  # 'dense', 'sparse', 'hybrid', 'multi_vector'
    filters: Optional[Dict] = None
    rerank: bool = True
    max_results: int = 10
    confidence_threshold: float = 0.7

@dataclass
class AgentState:
    """Maintains agent conversation state"""
    conversation_history: List[Dict[str, str]]
    retrieved_context: List[Dict]
    current_query: str
    query_plan: Optional[RetrievalPlan] = None
    metadata: Dict = None
    
    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}

def explain_agentic_rag():
    """Explain the concepts behind agentic RAG"""
    print("🤖 Agentic RAG Architecture")
    print("=" * 40)
    
    print("\n🔄 Traditional RAG Limitations:")
    print("   • Single retrieval step")
    print("   • No query understanding")
    print("   • Fixed retrieval strategy")
    print("   • No result validation")
    print("   • Limited multi-turn capability")
    
    print("\n🧠 Agentic RAG Enhancements:")
    print("   🎯 Query Analysis:")
    print("      - Classify query type and complexity")
    print("      - Identify required information types")
    print("      - Plan multi-step retrieval strategy")
    
    print("   🔍 Intelligent Retrieval:")
    print("      - Adaptive search strategies")
    print("      - Dynamic filtering based on context")
    print("      - Multi-hop information gathering")
    
    print("   🔧 Tool Integration:")
    print("      - Query rewriting and expansion")
    print("      - Result synthesis and validation")
    print("      - External API integration")
    
    print("   💭 Reasoning:")
    print("      - Quality assessment of retrieved info")
    print("      - Gap identification and follow-up queries")
    print("      - Answer confidence scoring")
    
    print("\n🔄 Agentic RAG Workflow:")
    workflow = '''
    1. Query Analysis
       ├── Intent classification
       ├── Complexity assessment
       └── Strategy selection
    
    2. Retrieval Planning
       ├── Multi-step breakdown
       ├── Search strategy per step
       └── Quality thresholds
    
    3. Execution
       ├── Retrieve → Assess → Refine
       ├── Multi-hop follow-ups
       └── Result validation
    
    4. Synthesis
       ├── Information integration
       ├── Answer generation
       └── Confidence scoring
    '''
    print(workflow)

explain_agentic_rag()

🤖 Agentic RAG Architecture

🔄 Traditional RAG Limitations:
   • Single retrieval step
   • No query understanding
   • Fixed retrieval strategy
   • No result validation
   • Limited multi-turn capability

🧠 Agentic RAG Enhancements:
   🎯 Query Analysis:
      - Classify query type and complexity
      - Identify required information types
      - Plan multi-step retrieval strategy
   🔍 Intelligent Retrieval:
      - Adaptive search strategies
      - Dynamic filtering based on context
      - Multi-hop information gathering
   🔧 Tool Integration:
      - Query rewriting and expansion
      - Result synthesis and validation
      - External API integration
   💭 Reasoning:
      - Quality assessment of retrieved info
      - Gap identification and follow-up queries
      - Answer confidence scoring

🔄 Agentic RAG Workflow:

    1. Query Analysis
       ├── Intent classification
       ├── Complexity assessment
       └── Strategy selection
    
    2. Retrieval Planning
       ├── Multi

## ⚙️ Setup & LLM Integration

In [7]:
# Knowledge Base Setup (independent)
from utils import (
    get_qdrant_client,
    ensure_collection,
    create_sample_dataset,
    upsert_points_batch,
)
from qdrant_client.models import VectorParams, Distance
import numpy as np

DEFAULT_KNOWLEDGE_COLLECTION = "agentic_rag_demo"

# Connect to Qdrant
client = get_qdrant_client()

# Discover existing collections
try:
    collections_response = client.get_collections()
    existing_collections = [c.name for c in getattr(collections_response, "collections", [])]
except Exception:
    existing_collections = []

# Prefer richer workshop collections if available
preferred_order = ["workshop_hybrid", "workshop_fundamentals"]
KNOWLEDGE_COLLECTION = next((name for name in preferred_order if name in existing_collections), None)

# If no preferred collection exists, use default demo collection
if KNOWLEDGE_COLLECTION is None:
    KNOWLEDGE_COLLECTION = DEFAULT_KNOWLEDGE_COLLECTION

# Determine if we need to populate
needs_population = False
if KNOWLEDGE_COLLECTION in existing_collections:
    try:
        info = client.get_collection(KNOWLEDGE_COLLECTION)
        needs_population = int(getattr(info, "points_count", 0) or 0) == 0
    except Exception:
        needs_population = True
else:
    needs_population = True

if needs_population:
    print(f"✅ Creating and populating knowledge collection: {KNOWLEDGE_COLLECTION}")
    ensure_collection(
        client=client,
        collection_name=KNOWLEDGE_COLLECTION,
        vector_config=VectorParams(size=384, distance=Distance.COSINE),
        force_recreate=True,
    )
    df = create_sample_dataset(size=300, seed=123)
    vectors = np.random.randn(len(df), 384)
    vectors = vectors / np.linalg.norm(vectors, axis=1, keepdims=True)
    upsert_points_batch(
        client=client,
        collection_name=KNOWLEDGE_COLLECTION,
        df=df,
        vectors=vectors,
        payload_cols=["text", "category", "lang", "timestamp"],
        batch_size=100,
    )
    print(f"✅ Added {len(df)} documents to {KNOWLEDGE_COLLECTION}")
else:
    print(f"📚 Using knowledge base: {KNOWLEDGE_COLLECTION}")

🌐 Using Qdrant Cloud cluster: https://a025094c-936b-4e1b-b947-67d686d20306.eu-central-1-0.aws.development-cloud.qdrant.io:6333


📚 Using knowledge base: workshop_hybrid


## 🔍 Query Analysis Agent

In [8]:
class QueryAnalyzer:
    """Analyzes queries to determine appropriate retrieval strategy"""
    
    def __init__(self, llm_client: LLMClient):
        self.llm = llm_client
    
    def analyze_query(self, query: str, conversation_history: List[Dict] = None) -> Dict:
        """Analyze query to determine type, complexity, and strategy"""
        
        analysis_prompt = self._build_analysis_prompt(query, conversation_history)
        
        messages = [
            {"role": "system", "content": "You are a query analysis expert. Analyze queries and return structured JSON responses."},
            {"role": "user", "content": analysis_prompt}
        ]
        
        response = self.llm.chat_completion(messages)
        
        try:
            analysis = json.loads(response)
        except json.JSONDecodeError:
            # Fallback analysis
            analysis = self._fallback_analysis(query)
        
        # Enrich with rule-based analysis
        analysis.update(self._rule_based_analysis(query))
        
        return analysis
    
    def _build_analysis_prompt(self, query: str, history: List[Dict] = None) -> str:
        """Build prompt for query analysis"""
        
        context = ""
        if history and len(history) > 0:
            recent_context = history[-3:] if len(history) > 3 else history
            context = "\n\nConversation context:\n"
            for turn in recent_context:
                context += f"User: {turn.get('user', '')}\nAssistant: {turn.get('assistant', '')}\n"
        
        return f'''
Analyze this query and return a JSON response with the following structure:
{{
    "query_type": "simple_factual|complex_analytical|comparative|multi_hop|procedural|conversational",
    "complexity": "low|medium|high", 
    "requires_multi_step": true/false,
    "key_entities": ["entity1", "entity2"],
    "intent": "what the user wants to achieve",
    "search_strategy": "dense|sparse|hybrid",
    "expected_answer_type": "factual|explanatory|comparative|step-by-step",
    "reasoning": "brief explanation of the analysis"
}}

Query: "{query}"{context}
        '''.strip()
    
    def _rule_based_analysis(self, query: str) -> Dict:
        """Rule-based query analysis as fallback/enhancement"""
        analysis = {}
        
        query_lower = query.lower()
        
        # Detect comparative queries
        comparative_words = ['vs', 'versus', 'compare', 'difference between', 'better than']
        if any(word in query_lower for word in comparative_words):
            analysis['is_comparative'] = True
            analysis['suggested_strategy'] = 'hybrid'  # Good for finding diverse perspectives
        
        # Detect procedural queries
        procedural_words = ['how to', 'step by step', 'guide', 'tutorial', 'instructions']
        if any(word in query_lower for word in procedural_words):
            analysis['is_procedural'] = True
            analysis['needs_ordering'] = True
        
        # Detect technical complexity
        technical_terms = ['algorithm', 'optimization', 'configuration', 'implementation']
        if any(term in query_lower for term in technical_terms):
            analysis['is_technical'] = True
            analysis['preferred_sources'] = 'technical'
        
        # Estimate required information breadth
        question_words = ['what', 'why', 'how', 'when', 'where', 'who']
        question_count = sum(1 for word in question_words if word in query_lower)
        
        if question_count > 1 or len(query.split()) > 10:
            analysis['is_complex'] = True
            analysis['needs_multiple_sources'] = True
        
        return analysis
    
    def _fallback_analysis(self, query: str) -> Dict:
        """Simple fallback analysis when LLM fails"""
        return {
            "query_type": "simple_factual",
            "complexity": "medium",
            "requires_multi_step": False,
            "search_strategy": "hybrid",
            "reasoning": "Fallback analysis used due to LLM unavailability"
        }

# Test query analyzer
analyzer = QueryAnalyzer(llm)

test_queries = [
    "What is vector search?",
    "Compare HNSW vs IVF algorithms for large-scale vector retrieval",
    "How do I set up a production-ready Qdrant cluster with monitoring?",
    "Why is my search performance degrading over time?"
]

print("🔍 Query Analysis Examples:")
print("=" * 50)

for query in test_queries:
    print(f"\n📝 Query: '{query}'")
    analysis = analyzer.analyze_query(query)
    
    print(f"   Type: {analysis.get('query_type', 'unknown')}")
    print(f"   Complexity: {analysis.get('complexity', 'unknown')}")
    print(f"   Strategy: {analysis.get('search_strategy', 'unknown')}")
    print(f"   Multi-step: {analysis.get('requires_multi_step', False)}")
    
    if 'reasoning' in analysis:
        print(f"   💭 {analysis['reasoning'][:80]}...")

🔍 Query Analysis Examples:

📝 Query: 'What is vector search?'


   Type: simple_factual
   Complexity: low
   Strategy: sparse
   Multi-step: False
   💭 The query is straightforward and seeks a definition or explanation of a specific...

📝 Query: 'Compare HNSW vs IVF algorithms for large-scale vector retrieval'


   Type: comparative
   Complexity: medium
   Strategy: sparse
   Multi-step: False
   💭 The query seeks a comparison between two specific algorithms, indicating a need ...

📝 Query: 'How do I set up a production-ready Qdrant cluster with monitoring?'


   Type: procedural
   Complexity: medium
   Strategy: hybrid
   Multi-step: True
   💭 The query seeks a procedural guide on setting up a specific technology (Qdrant) ...

📝 Query: 'Why is my search performance degrading over time?'


   Type: complex_analytical
   Complexity: medium
   Strategy: hybrid
   Multi-step: True
   💭 The query seeks to analyze a problem (degrading search performance) and requires...


## 📋 Retrieval Planner

In [9]:
class RetrievalPlanner:
    """Plans multi-step retrieval strategies based on query analysis"""
    
    def __init__(self, llm_client: LLMClient):
        self.llm = llm_client
    
    def create_plan(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Create a detailed retrieval plan based on query analysis"""
        
        # Determine query type from analysis
        query_type_str = analysis.get('query_type', 'simple_factual')
        try:
            query_type = QueryType(query_type_str)
        except ValueError:
            query_type = QueryType.SIMPLE_FACTUAL
        
        # Create plan based on query type
        if query_type == QueryType.SIMPLE_FACTUAL:
            return self._plan_simple_factual(query, analysis)
        elif query_type == QueryType.COMPARATIVE:
            return self._plan_comparative(query, analysis)
        elif query_type == QueryType.COMPLEX_ANALYTICAL:
            return self._plan_complex_analytical(query, analysis)
        elif query_type == QueryType.MULTI_HOP:
            return self._plan_multi_hop(query, analysis)
        elif query_type == QueryType.PROCEDURAL:
            return self._plan_procedural(query, analysis)
        else:
            return self._plan_default(query, analysis)
    
    def _plan_simple_factual(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Plan for simple factual queries"""
        steps = [
            {
                "action": "search",
                "strategy": "hybrid",
                "query": query,
                "limit": 5,
                "description": "Direct search for factual information"
            }
        ]
        
        return RetrievalPlan(
            query_type=QueryType.SIMPLE_FACTUAL,
            steps=steps,
            search_strategy="hybrid",
            rerank=False,  # Simple queries don't need reranking
            max_results=5
        )
    
    def _plan_comparative(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Plan for comparative queries"""
        
        # Extract entities to compare (simplified)
        entities = analysis.get('key_entities', [])
        if not entities:
            # Simple extraction from query
            parts = query.lower().split(' vs ')
            if len(parts) == 2:
                entities = [part.strip() for part in parts]
        
        steps = [
            {
                "action": "search",
                "strategy": "hybrid",
                "query": query,
                "limit": 15,
                "description": "Broad search for comparative information"
            },
            {
                "action": "rerank",
                "method": "mmr",
                "lambda": 0.3,  # Favor diversity for comparison
                "description": "Diversify results to cover both sides"
            }
        ]
        
        # Add specific searches for each entity if identified
        for entity in entities[:2]:  # Limit to 2 main entities
            steps.append({
                "action": "search",
                "strategy": "dense",  # Focused search
                "query": entity,
                "limit": 3,
                "description": f"Focused search for {entity}"
            })
        
        return RetrievalPlan(
            query_type=QueryType.COMPARATIVE,
            steps=steps,
            search_strategy="hybrid",
            rerank=True,
            max_results=12
        )
    
    def _plan_complex_analytical(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Plan for complex analytical queries"""
        steps = [
            {
                "action": "search",
                "strategy": "hybrid",
                "query": query,
                "limit": 20,
                "description": "Initial broad search"
            },
            {
                "action": "analyze_gaps",
                "description": "Identify information gaps"
            },
            {
                "action": "rerank",
                "method": "mmr",
                "lambda": 0.5,
                "description": "Balance relevance and diversity"
            },
            {
                "action": "validate",
                "description": "Assess result quality and completeness"
            }
        ]
        
        return RetrievalPlan(
            query_type=QueryType.COMPLEX_ANALYTICAL,
            steps=steps,
            search_strategy="hybrid",
            rerank=True,
            max_results=15,
            confidence_threshold=0.8
        )
    
    def _plan_procedural(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Plan for procedural/how-to queries"""
        steps = [
            {
                "action": "search",
                "strategy": "sparse",  # Good for exact procedural terms
                "query": query,
                "limit": 10,
                "filter": {"category": ["howto", "guide", "tutorial"]},
                "description": "Search for procedural content"
            },
            {
                "action": "order_by_steps",
                "description": "Order results by logical step sequence"
            }
        ]
        
        return RetrievalPlan(
            query_type=QueryType.PROCEDURAL,
            steps=steps,
            search_strategy="sparse",
            rerank=False,  # Order matters more than diversity
            max_results=8
        )
    
    def _plan_multi_hop(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Plan for multi-hop reasoning queries"""
        steps = [
            {
                "action": "search",
                "strategy": "hybrid",
                "query": query,
                "limit": 10,
                "description": "Initial search"
            },
            {
                "action": "extract_entities",
                "description": "Extract entities for follow-up"
            },
            {
                "action": "iterative_search",
                "max_hops": 2,
                "description": "Follow-up searches based on extracted entities"
            },
            {
                "action": "synthesize",
                "description": "Combine information from multiple hops"
            }
        ]
        
        return RetrievalPlan(
            query_type=QueryType.MULTI_HOP,
            steps=steps,
            search_strategy="hybrid",
            rerank=True,
            max_results=15
        )
    
    def _plan_default(self, query: str, analysis: Dict) -> RetrievalPlan:
        """Default plan for unclassified queries"""
        return self._plan_simple_factual(query, analysis)

# Test retrieval planner
planner = RetrievalPlanner(llm)

print("\n📋 Retrieval Planning Examples:")
print("=" * 50)

for query in test_queries:
    analysis = analyzer.analyze_query(query)
    plan = planner.create_plan(query, analysis)
    
    print(f"\n📝 Query: '{query}'")
    print(f"   🎯 Query Type: {plan.query_type.value}")
    print(f"   🔍 Strategy: {plan.search_strategy}")
    print(f"   📊 Max Results: {plan.max_results}")
    print(f"   🔄 Rerank: {plan.rerank}")
    print(f"   📈 Steps: {len(plan.steps)}")
    
    for i, step in enumerate(plan.steps, 1):
        print(f"     {i}. {step['action']}: {step.get('description', 'N/A')}")


📋 Retrieval Planning Examples:



📝 Query: 'What is vector search?'
   🎯 Query Type: simple_factual
   🔍 Strategy: hybrid
   📊 Max Results: 5
   🔄 Rerank: False
   📈 Steps: 1
     1. search: Direct search for factual information



📝 Query: 'Compare HNSW vs IVF algorithms for large-scale vector retrieval'
   🎯 Query Type: comparative
   🔍 Strategy: hybrid
   📊 Max Results: 12
   🔄 Rerank: True
   📈 Steps: 4
     1. search: Broad search for comparative information
     2. rerank: Diversify results to cover both sides
     3. search: Focused search for HNSW
     4. search: Focused search for IVF



📝 Query: 'How do I set up a production-ready Qdrant cluster with monitoring?'
   🎯 Query Type: procedural
   🔍 Strategy: sparse
   📊 Max Results: 8
   🔄 Rerank: False
   📈 Steps: 2
     1. search: Search for procedural content
     2. order_by_steps: Order results by logical step sequence



📝 Query: 'Why is my search performance degrading over time?'
   🎯 Query Type: complex_analytical
   🔍 Strategy: hybrid
   📊 Max Results: 15
   🔄 Rerank: True
   📈 Steps: 4
     1. search: Initial broad search
     2. analyze_gaps: Identify information gaps
     3. rerank: Balance relevance and diversity
     4. validate: Assess result quality and completeness


## 🔧 Advanced Retrieval Executor

In [10]:
class AdvancedRetriever:
    """Executes complex retrieval plans with multiple strategies"""
    
    def __init__(self, client, collection_name: str, llm_client: LLMClient):
        self.client = client
        self.collection = collection_name
        self.llm = llm_client
        self.query_cache = {}  # Simple query caching
    
    def execute_plan(self, plan: RetrievalPlan, original_query: str) -> List[Dict]:
        """Execute a retrieval plan step by step"""
        
        print(f"\n🚀 Executing retrieval plan for: '{original_query}'")
        print(f"   Plan type: {plan.query_type.value}")
        print(f"   Steps: {len(plan.steps)}")
        
        results = []
        context = {"original_query": original_query, "intermediate_results": []}
        
        for i, step in enumerate(plan.steps, 1):
            print(f"\n   📍 Step {i}: {step['action']}")
            
            step_results = self._execute_step(step, context)
            
            if step_results:
                results.extend(step_results)
                context["intermediate_results"].extend(step_results)
                print(f"      ✅ Retrieved {len(step_results)} results")
            else:
                print(f"      ⚠️ No results from this step")
        
        # Final result processing
        final_results = self._post_process_results(results, plan, original_query)
        
        print(f"\n✅ Plan execution complete: {len(final_results)} final results")
        return final_results
    
    def _execute_step(self, step: Dict, context: Dict) -> List[Dict]:
        """Execute a single step in the retrieval plan"""
        
        action = step.get('action', 'search')
        
        if action == 'search':
            return self._execute_search(step, context)
        elif action == 'rerank':
            return self._execute_rerank(step, context)
        elif action == 'filter':
            return self._execute_filter(step, context)
        elif action == 'analyze_gaps':
            return self._analyze_gaps(step, context)
        elif action == 'validate':
            return self._validate_results(step, context)
        elif action == 'extract_entities':
            return self._extract_entities(step, context)
        elif action == 'iterative_search':
            return self._iterative_search(step, context)
        else:
            print(f"      ⚠️ Unknown action: {action}")
            return []
    
    def _execute_search(self, step: Dict, context: Dict) -> List[Dict]:
        """Execute a search step"""
        
        query = step.get('query', context['original_query'])
        strategy = step.get('strategy', 'hybrid')
        limit = step.get('limit', 10)
        
        # Check cache first
        cache_key = f"{strategy}_{query}_{limit}"
        if cache_key in self.query_cache:
            print(f"      💾 Using cached results")
            return self.query_cache[cache_key]
        
        # Create mock query vector for demonstration
        np.random.seed(hash(query) % 2**32)
        query_vector = np.random.randn(384)
        query_vector = query_vector / np.linalg.norm(query_vector)
        
        # Apply filters if specified
        filter_condition = None
        if 'filter' in step:
            filter_condition = self._build_filter(step['filter'])
        
        try:
            if strategy == 'dense':
                results = search_dense(
                    client=self.client,
                    collection_name=self.collection,
                    query_vector=query_vector,
                    limit=limit,
                    filter_condition=filter_condition,
                    with_payload=True
                )
            
            elif strategy == 'hybrid':
                # Create mock sparse vector
                sparse_vector = {i: 1.0/len(query.split()) for i in range(len(query.split()[:10]))}
                
                results = search_hybrid_fusion(
                    client=self.client,
                    collection_name=self.collection,
                    dense_vector=query_vector,
                    sparse_vector=sparse_vector,
                    dense_weight=0.6,
                    limit=limit*2,
                    final_limit=limit,
                    filter_condition=filter_condition
                )
            
            else:  # sparse or fallback
                results = search_dense(
                    client=self.client,
                    collection_name=self.collection,
                    query_vector=query_vector,
                    limit=limit,
                    filter_condition=filter_condition,
                    with_payload=True
                )
            
            # Convert to dict format
            result_dicts = []
            for result in results:
                result_dict = {
                    'id': result.id,
                    'score': result.score,
                    'content': result.payload.get('text', ''),
                    'metadata': {
                        'category': result.payload.get('category', 'unknown'),
                        'lang': result.payload.get('lang', 'en'),
                        'timestamp': result.payload.get('timestamp', 0)
                    },
                    'retrieval_info': {
                        'strategy': strategy,
                        'step': step.get('description', 'search'),
                        'query': query
                    }
                }
                result_dicts.append(result_dict)
            
            # Cache results
            self.query_cache[cache_key] = result_dicts
            
            return result_dicts
            
        except Exception as e:
            print(f"      ❌ Search failed: {e}")
            return []
    
    def _execute_rerank(self, step: Dict, context: Dict) -> List[Dict]:
        """Execute MMR reranking on existing results"""
        
        if not context.get('intermediate_results'):
            print(f"      ⚠️ No results to rerank")
            return []
        
        method = step.get('method', 'mmr')
        lambda_param = step.get('lambda', 0.5)
        k = step.get('k', 10)
        
        if method == 'mmr':
            # Convert back to result objects for MMR
            results = context['intermediate_results'][-20:]  # Use recent results
            
            # Create mock vectors for MMR (in practice, you'd retrieve actual vectors)
            candidate_vectors = []
            for result in results:
                np.random.seed(result['id'])  # Consistent vector per ID
                vec = np.random.randn(384)
                candidate_vectors.append(vec / np.linalg.norm(vec))
            
            candidate_vectors = np.array(candidate_vectors)
            
            # Mock query vector
            query = context['original_query']
            np.random.seed(hash(query) % 2**32)
            query_vector = np.random.randn(384)
            query_vector = query_vector / np.linalg.norm(query_vector)
            
            # Apply MMR reranking (simplified mock)
            selected_indices = list(range(min(k, len(results))))
            np.random.shuffle(selected_indices)  # Mock MMR selection
            
            reranked_results = [results[i] for i in selected_indices]
            
            # Update metadata
            for result in reranked_results:
                result['retrieval_info']['reranked'] = True
                result['retrieval_info']['rerank_method'] = 'mmr'
                result['retrieval_info']['lambda'] = lambda_param
            
            return reranked_results
        
        return context.get('intermediate_results', [])
    
    def _analyze_gaps(self, step: Dict, context: Dict) -> List[Dict]:
        """Analyze information gaps in current results"""
        
        results = context.get('intermediate_results', [])
        if not results:
            return []
        
        # Simple gap analysis: check topic coverage
        original_query = context['original_query']
        query_terms = set(original_query.lower().split())
        
        covered_terms = set()
        for result in results:
            content_terms = set(result['content'].lower().split())
            covered_terms.update(content_terms.intersection(query_terms))
        
        missing_terms = query_terms - covered_terms
        
        if missing_terms:
            print(f"      📊 Gap analysis: Missing terms {missing_terms}")
            # Could trigger additional searches here
        else:
            print(f"      ✅ Good coverage of query terms")
        
        return []
    
    def _validate_results(self, step: Dict, context: Dict) -> List[Dict]:
        """Validate result quality and completeness"""
        
        results = context.get('intermediate_results', [])
        if not results:
            return []
        
        # Simple validation metrics
        avg_score = np.mean([r['score'] for r in results]) if results else 0
        score_std = np.std([r['score'] for r in results]) if results else 0
        
        # Content diversity check
        categories = set(r['metadata']['category'] for r in results)
        
        print(f"      📊 Validation - Avg score: {avg_score:.3f}, Diversity: {len(categories)} categories")
        
        # Mark high-quality results
        threshold = avg_score + 0.1
        for result in results:
            if result['score'] >= threshold:
                result['retrieval_info']['high_quality'] = True
        
        return []
    
    def _extract_entities(self, step: Dict, context: Dict) -> List[Dict]:
        """Extract entities for multi-hop reasoning"""
        
        results = context.get('intermediate_results', [])
        
        # Simple entity extraction (in practice, use NER)
        entities = set()
        for result in results[:5]:  # Top results only
            content = result['content']
            # Mock entity extraction
            words = content.split()
            # Extract capitalized words as mock entities
            for word in words:
                if word.istitle() and len(word) > 3:
                    entities.add(word)
        
        context['extracted_entities'] = list(entities)[:5]  # Limit entities
        print(f"      🔍 Extracted entities: {context['extracted_entities']}")
        
        return []
    
    def _iterative_search(self, step: Dict, context: Dict) -> List[Dict]:
        """Perform iterative search based on extracted entities"""
        
        entities = context.get('extracted_entities', [])
        if not entities:
            return []
        
        additional_results = []
        max_hops = step.get('max_hops', 2)
        
        for i, entity in enumerate(entities[:max_hops]):
            print(f"      🔍 Hop {i+1}: Searching for '{entity}'")
            
            entity_step = {
                'action': 'search',
                'query': entity,
                'strategy': 'dense',
                'limit': 3
            }
            
            entity_results = self._execute_search(entity_step, context)
            for result in entity_results:
                result['retrieval_info']['hop'] = i + 1
                result['retrieval_info']['hop_entity'] = entity
            
            additional_results.extend(entity_results)
        
        return additional_results
    
    def _build_filter(self, filter_spec: Dict) -> Filter:
        """Build Qdrant filter from specification"""
        
        conditions = []
        
        for field, values in filter_spec.items():
            if isinstance(values, list):
                # Multiple values - use should (OR)
                for value in values:
                    conditions.append(
                        FieldCondition(key=field, match=MatchValue(value=value))
                    )
            else:
                # Single value
                conditions.append(
                    FieldCondition(key=field, match=MatchValue(value=values))
                )
        
        return Filter(should=conditions) if len(conditions) > 1 else Filter(must=conditions)
    
    def _post_process_results(self, results: List[Dict], plan: RetrievalPlan, query: str) -> List[Dict]:
        """Final post-processing of all results"""
        
        # Remove duplicates
        seen_ids = set()
        unique_results = []
        
        for result in results:
            if result['id'] not in seen_ids:
                seen_ids.add(result['id'])
                unique_results.append(result)
        
        # Sort by score (descending)
        unique_results.sort(key=lambda x: x['score'], reverse=True)
        
        # Limit to max_results
        final_results = unique_results[:plan.max_results]
        
        # Add final metadata
        for i, result in enumerate(final_results):
            result['final_rank'] = i + 1
            result['retrieval_info']['plan_type'] = plan.query_type.value
        
        return final_results

# Test advanced retriever
if KNOWLEDGE_COLLECTION:
    retriever = AdvancedRetriever(client, KNOWLEDGE_COLLECTION, llm)
    
    # Test with a complex query
    test_query = "Compare HNSW vs IVF algorithms for large-scale vector retrieval"
    analysis = analyzer.analyze_query(test_query)
    plan = planner.create_plan(test_query, analysis)
    
    results = retriever.execute_plan(plan, test_query)
    
    print(f"\n🎯 Final Results Summary:")
    for i, result in enumerate(results[:3], 1):
        print(f"{i}. [{result['score']:.3f}] {result['content'][:60]}...")
        print(f"   Strategy: {result['retrieval_info']['strategy']}")
        if 'reranked' in result['retrieval_info']:
            print(f"   Reranked: {result['retrieval_info']['rerank_method']}")

else:
    print("⚠️ Skipping retriever test - no knowledge collection available")


🚀 Executing retrieval plan for: 'Compare HNSW vs IVF algorithms for large-scale vector retrieval'
   Plan type: comparative
   Steps: 4

   📍 Step 1: search


      ✅ Retrieved 15 results

   📍 Step 2: rerank
      ✅ Retrieved 10 results

   📍 Step 3: search
      ❌ Search failed: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: Collection requires specified vector name in the request, available names: text_dense, text_sparse"},"time":0.000040672}'
      ⚠️ No results from this step

   📍 Step 4: search


      ❌ Search failed: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: Collection requires specified vector name in the request, available names: text_dense, text_sparse"},"time":0.000030333}'
      ⚠️ No results from this step

✅ Plan execution complete: 12 final results

🎯 Final Results Summary:
1. [0.045] Guide: Data retention guidelines...
   Strategy: hybrid
   Reranked: mmr
2. [0.038] Learn about data retention guidelines...
   Strategy: hybrid
   Reranked: mmr
3. [0.038] Learn about data retention guidelines...
   Strategy: hybrid
   Reranked: mmr


## 🧮 Quality Assessment Agent

In [11]:
class QualityAssessor:
    """Assesses retrieval quality and suggests improvements"""
    
    def __init__(self, llm_client: LLMClient):
        self.llm = llm_client
    
    def assess_results(self, query: str, results: List[Dict], 
                      plan: RetrievalPlan) -> Dict[str, Any]:
        """Assess the quality of retrieval results"""
        
        if not results:
            return {
                "overall_quality": 0.0,
                "completeness": 0.0,
                "relevance": 0.0,
                "diversity": 0.0,
                "needs_improvement": True,
                "suggestions": ["No results found - revise search strategy"]
            }
        
        # Quantitative metrics
        relevance_score = self._assess_relevance(query, results)
        completeness_score = self._assess_completeness(query, results, plan)
        diversity_score = self._assess_diversity(results)
        coverage_score = self._assess_coverage(query, results)
        
        # Overall quality (weighted average)
        overall_quality = (
            0.4 * relevance_score +
            0.3 * completeness_score +
            0.2 * diversity_score +
            0.1 * coverage_score
        )
        
        # LLM-based assessment for qualitative insights
        llm_assessment = self._llm_quality_assessment(query, results)
        
        # Generate suggestions
        suggestions = self._generate_suggestions(
            query, results, plan, 
            relevance_score, completeness_score, diversity_score
        )
        
        assessment = {
            "overall_quality": overall_quality,
            "metrics": {
                "relevance": relevance_score,
                "completeness": completeness_score,
                "diversity": diversity_score,
                "coverage": coverage_score
            },
            "llm_insights": llm_assessment,
            "needs_improvement": overall_quality < 0.7,
            "suggestions": suggestions,
            "result_count": len(results),
            "avg_score": np.mean([r['score'] for r in results]) if results else 0
        }
        
        return assessment
    
    def _assess_relevance(self, query: str, results: List[Dict]) -> float:
        """Assess relevance using score distribution and content matching"""
        
        if not results:
            return 0.0
        
        # Use similarity scores as primary relevance indicator
        scores = [r['score'] for r in results]
        avg_score = np.mean(scores)
        
        # Content relevance (simplified keyword matching)
        query_terms = set(query.lower().split())
        content_relevance = []
        
        for result in results:
            content_terms = set(result['content'].lower().split())
            overlap = len(query_terms.intersection(content_terms))
            relevance = overlap / len(query_terms) if query_terms else 0
            content_relevance.append(relevance)
        
        content_score = np.mean(content_relevance)
        
        # Combine similarity and content relevance
        return 0.7 * avg_score + 0.3 * content_score
    
    def _assess_completeness(self, query: str, results: List[Dict], 
                           plan: RetrievalPlan) -> float:
        """Assess completeness based on query type and result count"""
        
        result_count = len(results)
        expected_count = plan.max_results
        
        # Base completeness on result count ratio
        count_score = min(1.0, result_count / expected_count)
        
        # Adjust based on query type
        if plan.query_type == QueryType.COMPARATIVE:
            # Comparative queries should have diverse sources
            categories = set(r['metadata']['category'] for r in results)
            diversity_bonus = min(0.3, len(categories) * 0.1)
            count_score += diversity_bonus
        
        elif plan.query_type == QueryType.MULTI_HOP:
            # Multi-hop queries should have results from different hops
            hops = set(r['retrieval_info'].get('hop', 0) for r in results)
            hop_bonus = min(0.2, len(hops) * 0.1)
            count_score += hop_bonus
        
        return min(1.0, count_score)
    
    def _assess_diversity(self, results: List[Dict]) -> float:
        """Assess diversity of results"""
        
        if len(results) <= 1:
            return 1.0 if len(results) == 1 else 0.0
        
        # Category diversity
        categories = [r['metadata']['category'] for r in results]
        unique_categories = len(set(categories))
        category_diversity = unique_categories / len(categories)
        
        # Content diversity (simplified using length variation)
        content_lengths = [len(r['content']) for r in results]
        length_std = np.std(content_lengths)
        length_diversity = min(1.0, length_std / 100)  # Normalize
        
        # Score diversity
        scores = [r['score'] for r in results]
        score_std = np.std(scores)
        score_diversity = min(1.0, score_std * 2)  # Boost diversity score
        
        return 0.5 * category_diversity + 0.3 * length_diversity + 0.2 * score_diversity
    
    def _assess_coverage(self, query: str, results: List[Dict]) -> float:
        """Assess how well results cover the query topics"""
        
        if not results:
            return 0.0
        
        query_terms = set(query.lower().split())
        covered_terms = set()
        
        for result in results:
            content_terms = set(result['content'].lower().split())
            covered_terms.update(content_terms.intersection(query_terms))
        
        coverage = len(covered_terms) / len(query_terms) if query_terms else 1.0
        return coverage
    
    def _llm_quality_assessment(self, query: str, results: List[Dict]) -> Dict:
        """Get qualitative assessment from LLM"""
        
        # Prepare content sample for LLM
        content_sample = "\n\n".join([
            f"Result {i+1}: {r['content'][:100]}..."
            for i, r in enumerate(results[:3])
        ])
        
        prompt = f'''
Assess the quality of these search results for the query: "{query}"

Results:
{content_sample}

Return a JSON assessment with:
{{
    "quality_score": 0.0-1.0,
    "strengths": ["list of strengths"],
    "weaknesses": ["list of weaknesses"],
    "missing_topics": ["topics not covered"],
    "recommendation": "overall recommendation"
}}
        '''.strip()
        
        messages = [
            {"role": "system", "content": "You are an expert at evaluating search result quality."},
            {"role": "user", "content": prompt}
        ]
        
        try:
            response = self.llm.chat_completion(messages)
            return json.loads(response)
        except Exception as e:
            return {
                "quality_score": 0.7,
                "strengths": ["Results retrieved successfully"],
                "weaknesses": ["LLM assessment unavailable"],
                "missing_topics": [],
                "recommendation": "Results appear relevant but detailed assessment unavailable"
            }
    
    def _generate_suggestions(self, query: str, results: List[Dict], 
                            plan: RetrievalPlan, relevance: float, 
                            completeness: float, diversity: float) -> List[str]:
        """Generate improvement suggestions"""
        
        suggestions = []
        
        # Relevance suggestions
        if relevance < 0.6:
            suggestions.append("Consider query rewriting or expansion for better relevance")
            if plan.search_strategy == "dense":
                suggestions.append("Try hybrid search to capture keyword matches")
        
        # Completeness suggestions
        if completeness < 0.7:
            suggestions.append("Increase search limit or reduce filters to get more results")
            if plan.query_type == QueryType.COMPLEX_ANALYTICAL:
                suggestions.append("Consider multi-step retrieval with follow-up queries")
        
        # Diversity suggestions
        if diversity < 0.5:
            suggestions.append("Apply MMR reranking with lower lambda (0.3-0.4) for more diversity")
            suggestions.append("Expand search to include more content categories")
        
        # Query-type specific suggestions
        if plan.query_type == QueryType.COMPARATIVE and len(results) < 6:
            suggestions.append("Comparative queries benefit from more diverse results - increase limit")
        
        if plan.query_type == QueryType.PROCEDURAL:
            suggestions.append("For procedural queries, prioritize step-by-step and tutorial content")
        
        # Result quality suggestions
        if results and np.mean([r['score'] for r in results]) < 0.5:
            suggestions.append("Low similarity scores - consider query reformulation")
        
        if len(results) == 0:
            suggestions.append("No results found - broaden search terms or reduce filters")
        
        return suggestions if suggestions else ["Results quality is acceptable"]

# Test quality assessor
assessor = QualityAssessor(llm)

if 'results' in locals() and results:
    assessment = assessor.assess_results(test_query, results, plan)
    
    print(f"\n📊 Quality Assessment for: '{test_query}'")
    print("=" * 50)
    print(f"Overall Quality: {assessment['overall_quality']:.3f}")
    print(f"\n📈 Detailed Metrics:")
    for metric, score in assessment['metrics'].items():
        print(f"   {metric.capitalize()}: {score:.3f}")
    
    print(f"\n💡 Suggestions:")
    for i, suggestion in enumerate(assessment['suggestions'], 1):
        print(f"   {i}. {suggestion}")
    
    if assessment['llm_insights'].get('recommendation'):
        print(f"\n🧠 LLM Recommendation: {assessment['llm_insights']['recommendation']}")


📊 Quality Assessment for: 'Compare HNSW vs IVF algorithms for large-scale vector retrieval'
Overall Quality: 0.345

📈 Detailed Metrics:
   Relevance: 0.018
   Completeness: 1.000
   Diversity: 0.188
   Coverage: 0.000

💡 Suggestions:
   1. Consider query rewriting or expansion for better relevance
   2. Apply MMR reranking with lower lambda (0.3-0.4) for more diversity
   3. Expand search to include more content categories
   4. Low similarity scores - consider query reformulation

🧠 LLM Recommendation: Results appear relevant but detailed assessment unavailable


## 🤖 Complete Agentic RAG System

In [12]:
class AgenticRAGAgent:
    """Complete agentic RAG system that combines all components"""
    
    def __init__(self, client, collection_name: str, llm_client: LLMClient):
        self.client = client
        self.collection = collection_name
        self.llm = llm_client
        
        # Initialize components
        self.analyzer = QueryAnalyzer(llm_client)
        self.planner = RetrievalPlanner(llm_client)
        self.retriever = AdvancedRetriever(client, collection_name, llm_client)
        self.assessor = QualityAssessor(llm_client)
        
        # Agent state
        self.state = AgentState(
            conversation_history=[],
            retrieved_context=[],
            current_query=""
        )
        
        self.max_iterations = 3  # Prevent infinite loops
    
    def query(self, user_query: str, context: Dict = None) -> Dict[str, Any]:
        """Process a user query through the complete agentic RAG pipeline"""
        
        print(f"\n🤖 Agentic RAG Processing: '{user_query}'")
        print("=" * 60)
        
        self.state.current_query = user_query
        start_time = time.time()
        
        # Phase 1: Query Analysis
        print(f"\n1️⃣ Query Analysis")
        analysis = self.analyzer.analyze_query(
            user_query, self.state.conversation_history
        )
        
        print(f"   Query type: {analysis.get('query_type', 'unknown')}")
        print(f"   Complexity: {analysis.get('complexity', 'unknown')}")
        print(f"   Multi-step: {analysis.get('requires_multi_step', False)}")
        
        # Phase 2: Retrieval Planning
        print(f"\n2️⃣ Retrieval Planning")
        plan = self.planner.create_plan(user_query, analysis)
        self.state.query_plan = plan
        
        print(f"   Strategy: {plan.search_strategy}")
        print(f"   Steps: {len(plan.steps)}")
        print(f"   Max results: {plan.max_results}")
        
        # Phase 3: Retrieval Execution (with possible refinement)
        print(f"\n3️⃣ Retrieval Execution")
        
        best_results = []
        best_quality = 0.0
        best_assessment = {'overall_quality': 0.0, 'needs_improvement': True}
        iteration = 0
        
        while iteration < self.max_iterations:
            iteration += 1
            print(f"\n   Iteration {iteration}:")
            
            # Execute current plan
            results = self.retriever.execute_plan(plan, user_query)
            
            if not results:
                print(f"   ⚠️ No results - trying fallback strategy")
                plan = self._create_fallback_plan(user_query)
                results = self.retriever.execute_plan(plan, user_query)
            
            # Phase 4: Quality Assessment
            print(f"\n4️⃣ Quality Assessment")
            assessment = self.assessor.assess_results(user_query, results, plan)
            
            quality_score = assessment['overall_quality']
            print(f"   Quality score: {quality_score:.3f}")
            print(f"   Needs improvement: {assessment['needs_improvement']}")
            
            # Keep best results
            if quality_score > best_quality:
                best_results = results
                best_quality = quality_score
                best_assessment = assessment
            
            # Check if we should refine
            if not assessment['needs_improvement'] or iteration >= self.max_iterations:
                break
            
            # Phase 5: Plan Refinement
            print(f"\n5️⃣ Plan Refinement")
            plan = self._refine_plan(plan, assessment, user_query)
            
        # Phase 6: Answer Generation
        print(f"\n6️⃣ Answer Generation")
        final_answer = self._generate_answer(
            user_query, best_results, best_assessment
        )
        
        # Update state
        self.state.retrieved_context = best_results
        self.state.conversation_history.append({
            "user": user_query,
            "assistant": final_answer,
            "timestamp": time.time(),
            "quality_score": best_quality
        })
        
        total_time = time.time() - start_time
        
        return {
            "answer": final_answer,
            "results": best_results,
            "assessment": best_assessment,
            "metadata": {
                "query_type": analysis.get('query_type'),
                "iterations": iteration,
                "total_time": total_time,
                "result_count": len(best_results),
                "quality_score": best_quality
            }
        }
    
    def _create_fallback_plan(self, query: str) -> RetrievalPlan:
        """Create a simple fallback plan when main plan fails"""
        
        return RetrievalPlan(
            query_type=QueryType.SIMPLE_FACTUAL,
            steps=[
                {
                    "action": "search",
                    "strategy": "hybrid",
                    "query": query,
                    "limit": 10
                }
            ],
            search_strategy="hybrid",
            max_results=10
        )
    
    def _refine_plan(self, plan: RetrievalPlan, assessment: Dict, query: str) -> RetrievalPlan:
        """Refine retrieval plan based on quality assessment"""
        
        suggestions = assessment.get('suggestions', [])
        
        # Create refined plan
        refined_steps = []
        
        for suggestion in suggestions:
            if "hybrid search" in suggestion.lower():
                refined_steps.append({
                    "action": "search",
                    "strategy": "hybrid",
                    "query": query,
                    "limit": plan.max_results * 2
                })
            
            elif "mmr" in suggestion.lower():
                refined_steps.append({
                    "action": "rerank",
                    "method": "mmr",
                    "lambda": 0.3  # More diversity
                })
            
            elif "increase" in suggestion.lower() and "limit" in suggestion.lower():
                # Modify existing search steps to increase limit
                for step in plan.steps:
                    if step.get('action') == 'search':
                        step['limit'] = step.get('limit', 10) * 2
        
        # If we have refinement suggestions, use them
        if refined_steps:
            plan.steps = refined_steps
        else:
            # Fallback refinement: increase limits
            plan.max_results = min(20, plan.max_results * 2)
            for step in plan.steps:
                if 'limit' in step:
                    step['limit'] = min(20, step['limit'] * 2)
        
        return plan
    
    def _generate_answer(self, query: str, results: List[Dict], assessment: Dict) -> str:
        """Generate final answer using retrieved context"""
        
        if not results:
            return "I couldn't find relevant information to answer your question. Please try rephrasing your query or being more specific."
        
        # Prepare context from results
        context_parts = []
        for i, result in enumerate(results[:5], 1):  # Top 5 results
            context_parts.append(
                f"Source {i}: {result['content'][:200]}..."
            )
        
        context_text = "\n\n".join(context_parts)
        
        # Generate answer using LLM
        prompt = f'''
Based on the following retrieved information, provide a comprehensive and accurate answer to the user's question.

User Question: {query}

Retrieved Context:
{context_text}

Instructions:
- Use only the information provided in the context
- Be specific and cite sources when possible
- If the context doesn't fully answer the question, acknowledge the limitations
- Structure your answer clearly and logically
        '''.strip()
        
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant that answers questions based on retrieved information."},
            {"role": "user", "content": prompt}
        ]
        
        try:
            answer = self.llm.chat_completion(messages, temperature=0.1)
            
            # Add quality note if assessment indicates issues
            if assessment['overall_quality'] < 0.7:
                answer += "\n\n*Note: The retrieved information may be incomplete. Consider asking more specific questions or trying different search terms.*"
            
            return answer
            
        except Exception as e:
            print(f"   ⚠️ Answer generation failed: {e}")
            
            # Fallback answer
            return f"Based on the retrieved information, I found {len(results)} relevant sources that discuss {query}. However, I'm unable to generate a detailed answer at the moment. Please review the source materials directly."
    
    def get_conversation_history(self) -> List[Dict]:
        """Get conversation history"""
        return self.state.conversation_history
    
    def clear_history(self):
        """Clear conversation history"""
        self.state.conversation_history = []
        self.state.retrieved_context = []

# Initialize and test the complete agentic RAG system
if KNOWLEDGE_COLLECTION:
    agent = AgenticRAGAgent(client, KNOWLEDGE_COLLECTION, llm)
    
    # Test with different query types
    test_queries_agentic = [
        "What is vector search and how does it work?",
        "Compare dense vs sparse vector search methods",
        "How do I optimize HNSW performance in production?"
    ]
    
    for test_query in test_queries_agentic[:1]:  # Test one for demonstration
        result = agent.query(test_query)
        
        print(f"\n🎯 Final Response:")
        print(f"Answer: {result['answer'][:200]}...")
        print(f"\n📊 Metadata:")
        for key, value in result['metadata'].items():
            print(f"   {key}: {value}")
        
        break  # Only run one example to avoid too much output

else:
    print("⚠️ Skipping agentic RAG test - no knowledge collection available")
    print("\n💡 To test the complete system:")
    print("   1. Run Notebook 1 or 2 to create a knowledge collection")
    print("   2. Set up LLM API keys (OpenAI or Anthropic)")
    print("   3. Re-run this cell")


🤖 Agentic RAG Processing: 'What is vector search and how does it work?'

1️⃣ Query Analysis


   Query type: complex_analytical
   Complexity: medium
   Multi-step: True

2️⃣ Retrieval Planning
   Strategy: hybrid
   Steps: 4
   Max results: 15

3️⃣ Retrieval Execution

   Iteration 1:

🚀 Executing retrieval plan for: 'What is vector search and how does it work?'
   Plan type: complex_analytical
   Steps: 4

   📍 Step 1: search


      ✅ Retrieved 20 results

   📍 Step 2: analyze_gaps
      📊 Gap analysis: Missing terms {'what', 'work?', 'is', 'how', 'does', 'it', 'vector'}
      ⚠️ No results from this step

   📍 Step 3: rerank
      ✅ Retrieved 10 results

   📍 Step 4: validate
      📊 Validation - Avg score: 0.058, Diversity: 5 categories
      ⚠️ No results from this step

✅ Plan execution complete: 15 final results

4️⃣ Quality Assessment


   Quality score: 0.381
   Needs improvement: True

5️⃣ Plan Refinement

   Iteration 2:

🚀 Executing retrieval plan for: 'What is vector search and how does it work?'
   Plan type: complex_analytical
   Steps: 1

   📍 Step 1: rerank
      ⚠️ No results to rerank
      ⚠️ No results from this step

✅ Plan execution complete: 0 final results
   ⚠️ No results - trying fallback strategy

🚀 Executing retrieval plan for: 'What is vector search and how does it work?'
   Plan type: simple_factual
   Steps: 1

   📍 Step 1: search


      ✅ Retrieved 10 results

✅ Plan execution complete: 10 final results

4️⃣ Quality Assessment


   Quality score: 0.388
   Needs improvement: True

5️⃣ Plan Refinement

   Iteration 3:

🚀 Executing retrieval plan for: 'What is vector search and how does it work?'
   Plan type: simple_factual
   Steps: 1

   📍 Step 1: rerank
      ⚠️ No results to rerank
      ⚠️ No results from this step

✅ Plan execution complete: 0 final results
   ⚠️ No results - trying fallback strategy

🚀 Executing retrieval plan for: 'What is vector search and how does it work?'
   Plan type: simple_factual
   Steps: 1

   📍 Step 1: search
      💾 Using cached results
      ✅ Retrieved 10 results

✅ Plan execution complete: 10 final results

4️⃣ Quality Assessment


   Quality score: 0.388
   Needs improvement: True

6️⃣ Answer Generation



🎯 Final Response:
Answer: Vector search is a method used in information retrieval that involves representing data as vectors in a high-dimensional space. This technique is particularly useful in the context of machine learning...

📊 Metadata:
   query_type: complex_analytical
   iterations: 3
   total_time: 17.86768674850464
   result_count: 10
   quality_score: 0.3877095946191332


## 🎮 Advanced Features Demo

In [13]:
def demonstrate_advanced_features():
    """Demonstrate advanced agentic RAG features"""
    
    print("🎮 Advanced Agentic RAG Features")
    print("=" * 50)
    
    print("\n🔧 1. Query Rewriting")
    print("   • Automatic query expansion")
    print("   • Synonym replacement")
    print("   • Technical term normalization")
    print("   • Multi-language query handling")
    
    print("\n🔍 2. Multi-Modal Retrieval")
    print("   • Text + image search")
    print("   • Code + documentation retrieval")
    print("   • Structured data integration")
    
    print("\n🧠 3. Reasoning Chains")
    print("   • Chain-of-thought retrieval")
    print("   • Evidence combination")
    print("   • Contradiction detection")
    print("   • Confidence calibration")
    
    print("\n🔄 4. Self-Correction")
    print("   • Answer validation")
    print("   • Retrieval refinement")
    print("   • Source verification")
    print("   • Hallucination detection")
    
    print("\n📚 5. Knowledge Graph Integration")
    print("   • Entity relationship traversal")
    print("   • Structured knowledge queries")
    print("   • Factual consistency checking")
    
    print("\n🎯 6. Personalization")
    print("   • User expertise level adaptation")
    print("   • Domain preference learning")
    print("   • Interaction history analysis")
    
    print("\n🔧 7. Tool Integration")
    print("   • Calculator for numerical queries")
    print("   • Code execution for programming")
    print("   • Web search for current info")
    print("   • API calls for live data")

def demonstrate_query_rewriting():
    """Show query rewriting examples"""
    
    print("\n🔄 Query Rewriting Examples:")
    print("=" * 40)
    
    rewrites = [
        {
            "original": "How to make my search faster?",
            "rewritten": "vector search optimization performance tuning latency reduction HNSW parameters",
            "reason": "Expanded with technical terms"
        },
        {
            "original": "DB vs vector store",
            "rewritten": "database comparison vector database traditional relational database differences",
            "reason": "Clarified abbreviations and added context"
        },
        {
            "original": "Why is recall bad?",
            "rewritten": "recall degradation causes HNSW index performance issues approximate nearest neighbor",
            "reason": "Added technical context and specificity"
        }
    ]
    
    for example in rewrites:
        print(f"\n📝 Original: '{example['original']}'")
        print(f"🔄 Rewritten: '{example['rewritten']}'")
        print(f"💭 Reason: {example['reason']}")

def demonstrate_multi_hop_reasoning():
    """Show multi-hop reasoning example"""
    
    print("\n🔗 Multi-Hop Reasoning Example:")
    print("=" * 40)
    
    query = "Why might my vector database be slow after adding lots of data?"
    
    print(f"❓ Query: '{query}'")
    print(f"\n🎯 Reasoning Chain:")
    print(f"\n1️⃣ Initial Search: 'vector database performance issues'")
    print(f"   → Entities found: HNSW, index degradation, memory usage")
    
    print(f"\n2️⃣ Follow-up Search: 'HNSW index degradation causes'")
    print(f"   → More entities: graph connectivity, update/delete ratio")
    
    print(f"\n3️⃣ Final Search: 'index optimization healing rebuild'")
    print(f"   → Solution entities: optimization, parameter tuning")
    
    print(f"\n📋 Synthesized Answer:")
    print(f"   'Vector database slowdown after adding data is typically caused by")
    print(f"    HNSW index degradation. As you insert many new vectors, the graph")
    print(f"    structure becomes suboptimal, leading to longer search paths and")
    print(f"    reduced recall. Solutions include: 1) Regular index optimization,")
    print(f"    2) Tuning HNSW parameters like ef_construct, 3) Index rebuilding.'")

demonstrate_advanced_features()
demonstrate_query_rewriting()
demonstrate_multi_hop_reasoning()

🎮 Advanced Agentic RAG Features

🔧 1. Query Rewriting
   • Automatic query expansion
   • Synonym replacement
   • Technical term normalization
   • Multi-language query handling

🔍 2. Multi-Modal Retrieval
   • Text + image search
   • Code + documentation retrieval
   • Structured data integration

🧠 3. Reasoning Chains
   • Chain-of-thought retrieval
   • Evidence combination
   • Contradiction detection
   • Confidence calibration

🔄 4. Self-Correction
   • Answer validation
   • Retrieval refinement
   • Source verification
   • Hallucination detection

📚 5. Knowledge Graph Integration
   • Entity relationship traversal
   • Structured knowledge queries
   • Factual consistency checking

🎯 6. Personalization
   • User expertise level adaptation
   • Domain preference learning
   • Interaction history analysis

🔧 7. Tool Integration
   • Calculator for numerical queries
   • Code execution for programming
   • Web search for current info
   • API calls for live data

🔄 Query Rewrit

## 📊 Performance Analysis & Best Practices

In [14]:
def analyze_agentic_rag_performance():
    """Analysis of agentic RAG performance characteristics"""
    
    print("📊 Agentic RAG Performance Analysis")
    print("=" * 50)
    
    print("\n⚡ Latency Breakdown:")
    
    latency_components = {
        "Query Analysis": {"time_ms": "50-200", "description": "LLM call to classify query"},
        "Plan Generation": {"time_ms": "30-100", "description": "Creating retrieval strategy"},
        "Vector Search": {"time_ms": "10-50", "description": "Per search operation"},
        "Reranking (MMR)": {"time_ms": "5-20", "description": "Per rerank operation"},
        "Quality Assessment": {"time_ms": "100-300", "description": "LLM evaluation of results"},
        "Answer Generation": {"time_ms": "200-800", "description": "Final LLM response"}
    }
    
    total_min = sum([int(comp["time_ms"].split("-")[0]) for comp in latency_components.values()])
    total_max = sum([int(comp["time_ms"].split("-")[1]) for comp in latency_components.values()])
    
    for component, details in latency_components.items():
        print(f"   {component:<20}: {details['time_ms']:<10} ms - {details['description']}")
    
    print(f"\n   {'Total Pipeline':<20}: {total_min}-{total_max:<6} ms")
    
    print(f"\n💰 Cost Analysis (per query):")
    cost_components = {
        "Query Analysis": "$0.001-0.003",
        "Quality Assessment": "$0.002-0.005", 
        "Answer Generation": "$0.003-0.010",
        "Vector Operations": "$0.0001-0.001"
    }
    
    for component, cost in cost_components.items():
        print(f"   {component:<20}: {cost}")
    
    print(f"\n📈 Scalability Considerations:")
    print("   • LLM calls are the primary bottleneck")
    print("   • Vector operations scale sub-linearly")
    print("   • Caching can reduce 60-80% of LLM calls")
    print("   • Async processing enables parallelization")
    
    print(f"\n🎯 Optimization Strategies:")
    
    strategies = {
        "Caching": {
            "what": "Cache query analysis and plans",
            "impact": "50-80% latency reduction"
        },
        "Batching": {
            "what": "Batch multiple queries to LLM",
            "impact": "30-50% cost reduction"
        },
        "Early Stopping": {
            "what": "Stop refinement when quality is good",
            "impact": "20-40% latency reduction"
        },
        "Async Processing": {
            "what": "Parallel retrieval and assessment",
            "impact": "30-60% latency reduction"
        },
        "Model Sizing": {
            "what": "Use smaller models for classification",
            "impact": "50-70% cost reduction"
        }
    }
    
    for strategy, details in strategies.items():
        print(f"   {strategy:<15}: {details['what']:<35} → {details['impact']}")

def best_practices_guide():
    """Comprehensive best practices for agentic RAG"""
    
    print("\n🏆 Agentic RAG Best Practices")
    print("=" * 50)
    
    practices = {
        "🎯 Query Understanding": [
            "Use lightweight models for query classification",
            "Implement fallback rules when LLM classification fails",
            "Cache common query patterns and their classifications",
            "Consider query similarity for cache hits"
        ],
        "🔍 Retrieval Strategy": [
            "Match retrieval strategy to query type",
            "Use hybrid search as the default for unknown query types",
            "Implement progressive refinement with quality thresholds",
            "Set reasonable iteration limits to prevent infinite loops"
        ],
        "📊 Quality Assessment": [
            "Combine multiple quality signals (relevance, diversity, coverage)",
            "Use LLM assessment sparingly due to cost",
            "Implement fast heuristic checks before expensive evaluation",
            "Track quality metrics over time for system improvement"
        ],
        "🤖 Answer Generation": [
            "Provide clear context boundaries to reduce hallucination",
            "Include confidence indicators in responses",
            "Cite sources and provide retrieval transparency",
            "Gracefully handle low-quality retrieval results"
        ],
        "⚡ Performance": [
            "Cache at multiple levels (query, plan, results)",
            "Use async/parallel processing where possible",
            "Implement circuit breakers for external API calls",
            "Monitor and alert on latency and cost metrics"
        ],
        "🛡️ Production Readiness": [
            "Implement comprehensive error handling and retries",
            "Add request rate limiting and quota management",
            "Create health checks for all system components",
            "Maintain audit logs for debugging and compliance"
        ]
    }
    
    for category, items in practices.items():
        print(f"\n{category}")
        for item in items:
            print(f"   • {item}")

analyze_agentic_rag_performance()
best_practices_guide()

📊 Agentic RAG Performance Analysis

⚡ Latency Breakdown:
   Query Analysis      : 50-200     ms - LLM call to classify query
   Plan Generation     : 30-100     ms - Creating retrieval strategy
   Vector Search       : 10-50      ms - Per search operation
   Reranking (MMR)     : 5-20       ms - Per rerank operation
   Quality Assessment  : 100-300    ms - LLM evaluation of results
   Answer Generation   : 200-800    ms - Final LLM response

   Total Pipeline      : 395-1470   ms

💰 Cost Analysis (per query):
   Query Analysis      : $0.001-0.003
   Quality Assessment  : $0.002-0.005
   Answer Generation   : $0.003-0.010
   Vector Operations   : $0.0001-0.001

📈 Scalability Considerations:
   • LLM calls are the primary bottleneck
   • Vector operations scale sub-linearly
   • Caching can reduce 60-80% of LLM calls
   • Async processing enables parallelization

🎯 Optimization Strategies:
   Caching        : Cache query analysis and plans      → 50-80% latency reduction
   Batching     

## 🎯 Final Summary & Next Steps

In [15]:
print("🎉 Agentic RAG Workshop Summary")
print("=" * 50)

print(f"\n📚 Components Built:")
components = {
    "Query Analyzer": "Classifies queries and determines complexity",
    "Retrieval Planner": "Creates multi-step retrieval strategies", 
    "Advanced Retriever": "Executes complex retrieval plans",
    "Quality Assessor": "Evaluates result quality and suggests improvements",
    "Complete Agent": "Orchestrates the full agentic RAG pipeline"
}

for component, description in components.items():
    print(f"   ✅ {component}: {description}")

print(f"\n🔍 Key Techniques Demonstrated:")
techniques = [
    "Multi-step query analysis and planning",
    "Adaptive retrieval strategies by query type", 
    "Self-correcting retrieval with quality feedback",
    "MMR reranking for result diversification",
    "Hybrid search integration (dense + sparse)",
    "Conversational context management",
    "Quality assessment and improvement suggestions",
    "Answer generation with source attribution"
]

for technique in techniques:
    print(f"   🎯 {technique}")

if KNOWLEDGE_COLLECTION:
    print(f"\n📊 System Performance:")
    print(f"   Knowledge Base: {KNOWLEDGE_COLLECTION}")
    print(f"   LLM Provider: {LLM_PROVIDER}")
    if 'agent' in locals():
        history = agent.get_conversation_history()
        print(f"   Queries Processed: {len(history)}")
        if history:
            avg_quality = np.mean([h.get('quality_score', 0) for h in history])
            print(f"   Average Quality Score: {avg_quality:.3f}")

print(f"\n🚀 Production Deployment Checklist:")
checklist = [
    "Set up proper LLM API credentials and rate limiting",
    "Implement comprehensive caching strategy",
    "Add monitoring and alerting for all components",
    "Create fallback strategies for component failures",
    "Implement user session and conversation management",
    "Add evaluation framework with human feedback",
    "Set up A/B testing for different strategies",
    "Create security controls for sensitive queries"
]

for i, item in enumerate(checklist, 1):
    print(f"   {i}. {item}")

print(f"\n🎯 Next Steps for Enhancement:")
enhancements = [
    "Add multi-modal retrieval (text + images + code)",
    "Implement knowledge graph integration",
    "Build user personalization and preference learning",
    "Add real-time learning from user feedback",
    "Integrate with external tools and APIs",
    "Implement advanced reasoning patterns",
    "Add support for different domain expertise levels",
    "Build collaborative filtering for query suggestions"
]

for enhancement in enhancements:
    print(f"   🔮 {enhancement}")

print(f"\n💡 Key Insights:")
print("   • Agentic RAG significantly improves answer quality over traditional RAG")
print("   • Multi-step retrieval helps with complex and comparative queries")
print("   • Quality assessment enables self-correction and continuous improvement")
print("   • Proper caching and optimization are critical for production performance")
print("   • The system can adapt its strategy based on query characteristics")

print(f"\n✨ Congratulations! You've built a complete agentic RAG system.")
print(f"\n📖 This completes the 5-notebook Qdrant workshop series:")
print(f"   1. Fundamentals & Search Basics ✅")
print(f"   2. Hybrid Search (Dense + Sparse) ✅")
print(f"   3. MMR Reranking ✅")
print(f"   4. HNSW Index Health ✅")
print(f"   5. Agentic RAG ✅")

print(f"\n🎓 You now have comprehensive knowledge of:")
print(f"   • Vector database fundamentals")
print(f"   • Advanced search techniques")
print(f"   • Production optimization strategies")
print(f"   • Intelligent RAG architectures")

print(f"\n🌟 Ready to build amazing AI applications with Qdrant!")

🎉 Agentic RAG Workshop Summary

📚 Components Built:
   ✅ Query Analyzer: Classifies queries and determines complexity
   ✅ Retrieval Planner: Creates multi-step retrieval strategies
   ✅ Advanced Retriever: Executes complex retrieval plans
   ✅ Quality Assessor: Evaluates result quality and suggests improvements
   ✅ Complete Agent: Orchestrates the full agentic RAG pipeline

🔍 Key Techniques Demonstrated:
   🎯 Multi-step query analysis and planning
   🎯 Adaptive retrieval strategies by query type
   🎯 Self-correcting retrieval with quality feedback
   🎯 MMR reranking for result diversification
   🎯 Hybrid search integration (dense + sparse)
   🎯 Conversational context management
   🎯 Quality assessment and improvement suggestions
   🎯 Answer generation with source attribution

📊 System Performance:
   Knowledge Base: workshop_hybrid
   LLM Provider: openai
   Queries Processed: 1
   Average Quality Score: 0.388

🚀 Production Deployment Checklist:
   1. Set up proper LLM API credential