# Retrieval-Augmented Generation (RAG) AI Agent

This notebook demonstrates how to create a RAG-based AI agent that can retrieve information from external knowledge sources and generate informed responses.

## Features:
- Document ingestion and vector storage
- Semantic search and retrieval
- Context-aware response generation
- Multiple document formats support
- Relevance scoring and filtering

## Setup and Installation

In [None]:
# Install required packages
!pip install openai tiktoken numpy scikit-learn sentence-transformers python-dotenv PyPDF2 requests

## Import Libraries

In [None]:
import openai
import os
import numpy as np
import tiktoken
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
import re
import json
import requests
from datetime import datetime

# Load environment variables
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

## Document and Chunk Classes

In [None]:
@dataclass
class Document:
    """Represents a document in the knowledge base"""
    id: str
    title: str
    content: str
    metadata: Dict[str, Any]
    source: str
    created_at: datetime = None
    
    def __post_init__(self):
        if self.created_at is None:
            self.created_at = datetime.now()

@dataclass
class DocumentChunk:
    """Represents a chunk of a document for retrieval"""
    id: str
    document_id: str
    content: str
    embedding: Optional[np.ndarray]
    metadata: Dict[str, Any]
    start_index: int
    end_index: int
    
@dataclass
class RetrievalResult:
    """Represents a retrieval result with relevance score"""
    chunk: DocumentChunk
    score: float
    document_title: str
    document_source: str

## Text Processing and Chunking

In [None]:
class TextProcessor:
    """Handles text processing and chunking for documents"""
    
    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.encoding = tiktoken.get_encoding("cl100k_base")  # GPT-3.5/4 encoding
    
    def clean_text(self, text: str) -> str:
        """Clean and normalize text"""
        # Remove extra whitespace
        text = re.sub(r'\s+', ' ', text)
        # Remove special characters but keep punctuation
        text = re.sub(r'[^\w\s.,!?;:()\[\]-]', '', text)
        return text.strip()
    
    def count_tokens(self, text: str) -> int:
        """Count tokens in text using tiktoken"""
        return len(self.encoding.encode(text))
    
    def split_text_by_tokens(self, text: str, max_tokens: int = None) -> List[str]:
        """Split text into chunks based on token count"""
        if max_tokens is None:
            max_tokens = self.chunk_size
        
        # Split by sentences first
        sentences = re.split(r'[.!?]+', text)
        chunks = []
        current_chunk = ""
        current_tokens = 0
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
                
            sentence_tokens = self.count_tokens(sentence)
            
            # If adding this sentence would exceed token limit
            if current_tokens + sentence_tokens > max_tokens and current_chunk:
                chunks.append(current_chunk.strip())
                
                # Start new chunk with overlap
                overlap_text = self._get_overlap_text(current_chunk, self.chunk_overlap)
                current_chunk = overlap_text + sentence
                current_tokens = self.count_tokens(current_chunk)
            else:
                current_chunk += " " + sentence if current_chunk else sentence
                current_tokens += sentence_tokens
        
        # Add the last chunk
        if current_chunk.strip():
            chunks.append(current_chunk.strip())
        
        return chunks
    
    def _get_overlap_text(self, text: str, overlap_tokens: int) -> str:
        """Get the last portion of text for overlap"""
        words = text.split()
        if not words:
            return ""
        
        # Estimate words needed for overlap (rough approximation)
        overlap_words = overlap_tokens // 4  # Assume ~4 tokens per word
        overlap_words = min(overlap_words, len(words))
        
        return " ".join(words[-overlap_words:]) + " "
    
    def create_chunks(self, document: Document) -> List[DocumentChunk]:
        """Create chunks from a document"""
        cleaned_text = self.clean_text(document.content)
        text_chunks = self.split_text_by_tokens(cleaned_text)
        
        chunks = []
        start_index = 0
        
        for i, chunk_text in enumerate(text_chunks):
            chunk_id = f"{document.id}_chunk_{i}"
            end_index = start_index + len(chunk_text)
            
            chunk = DocumentChunk(
                id=chunk_id,
                document_id=document.id,
                content=chunk_text,
                embedding=None,  # Will be set later
                metadata={
                    **document.metadata,
                    "chunk_index": i,
                    "token_count": self.count_tokens(chunk_text)
                },
                start_index=start_index,
                end_index=end_index
            )
            
            chunks.append(chunk)
            start_index = end_index
        
        return chunks

## Vector Store for Embeddings

In [None]:
class VectorStore:
    """Simple in-memory vector store for document embeddings"""
    
    def __init__(self, embedding_model: str = "all-MiniLM-L6-v2"):
        self.embedding_model = SentenceTransformer(embedding_model)
        self.chunks: Dict[str, DocumentChunk] = {}
        self.documents: Dict[str, Document] = {}
        self.embeddings: Optional[np.ndarray] = None
        self.chunk_ids: List[str] = []
    
    def add_document(self, document: Document, chunks: List[DocumentChunk]):
        """Add a document and its chunks to the vector store"""
        self.documents[document.id] = document
        
        # Generate embeddings for chunks
        chunk_texts = [chunk.content for chunk in chunks]
        chunk_embeddings = self.embedding_model.encode(chunk_texts)
        
        # Store chunks with embeddings
        for chunk, embedding in zip(chunks, chunk_embeddings):
            chunk.embedding = embedding
            self.chunks[chunk.id] = chunk
        
        # Update the embedding matrix
        self._rebuild_embedding_matrix()
    
    def _rebuild_embedding_matrix(self):
        """Rebuild the embedding matrix for efficient similarity search"""
        if not self.chunks:
            self.embeddings = None
            self.chunk_ids = []
            return
        
        self.chunk_ids = list(self.chunks.keys())
        embeddings_list = [self.chunks[chunk_id].embedding for chunk_id in self.chunk_ids]
        self.embeddings = np.vstack(embeddings_list)
    
    def search(self, query: str, top_k: int = 5, min_score: float = 0.0) -> List[RetrievalResult]:
        """Search for relevant chunks based on query"""
        if not self.chunks or self.embeddings is None:
            return []
        
        # Generate query embedding
        query_embedding = self.embedding_model.encode([query])
        
        # Calculate similarities
        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
        
        # Get top-k results
        top_indices = np.argsort(similarities)[::-1][:top_k]
        
        results = []
        for idx in top_indices:
            score = similarities[idx]
            if score < min_score:
                break
                
            chunk_id = self.chunk_ids[idx]
            chunk = self.chunks[chunk_id]
            document = self.documents[chunk.document_id]
            
            result = RetrievalResult(
                chunk=chunk,
                score=score,
                document_title=document.title,
                document_source=document.source
            )
            results.append(result)
        
        return results
    
    def get_document_stats(self) -> Dict[str, Any]:
        """Get statistics about the vector store"""
        return {
            "total_documents": len(self.documents),
            "total_chunks": len(self.chunks),
            "embedding_dimension": self.embeddings.shape[1] if self.embeddings is not None else 0,
            "model_name": self.embedding_model.get_sentence_embedding_dimension()
        }

## RAG Agent Implementation

In [None]:
class RAGAgent:
    """Retrieval-Augmented Generation AI Agent"""
    
    def __init__(self, model: str = "gpt-3.5-turbo", max_context_tokens: int = 3000):
        self.model = model
        self.max_context_tokens = max_context_tokens
        self.text_processor = TextProcessor()
        self.vector_store = VectorStore()
        self.conversation_history = []
        
        self.system_prompt = """
        You are a knowledgeable AI assistant that uses retrieved information to provide accurate, 
        well-informed responses. When answering questions:
        
        1. Use the provided context information to support your answers
        2. Clearly distinguish between information from the context and your general knowledge
        3. If the context doesn't contain enough information, say so honestly
        4. Cite sources when possible
        5. Provide comprehensive but concise answers
        
        Always prioritize accuracy and helpfulness in your responses.
        """
    
    def add_document_from_text(self, title: str, content: str, source: str = "manual", metadata: Dict[str, Any] = None) -> str:
        """Add a document from text content"""
        doc_id = f"doc_{len(self.vector_store.documents)}_{hash(title) % 10000}"
        
        document = Document(
            id=doc_id,
            title=title,
            content=content,
            metadata=metadata or {},
            source=source
        )
        
        chunks = self.text_processor.create_chunks(document)
        self.vector_store.add_document(document, chunks)
        
        return doc_id
    
    def add_document_from_url(self, url: str, title: str = None) -> str:
        """Add a document by fetching content from a URL"""
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            
            content = response.text
            # Basic HTML cleaning (you might want to use BeautifulSoup for better results)
            content = re.sub(r'<[^>]+>', '', content)
            content = re.sub(r'\s+', ' ', content)
            
            if not title:
                title = f"Web content from {url}"
            
            return self.add_document_from_text(title, content, source=url)
            
        except Exception as e:
            raise Exception(f"Failed to fetch content from {url}: {str(e)}")
    
    def retrieve_context(self, query: str, top_k: int = 3, min_score: float = 0.1) -> Tuple[str, List[RetrievalResult]]:
        """Retrieve relevant context for a query"""
        results = self.vector_store.search(query, top_k=top_k, min_score=min_score)
        
        if not results:
            return "", []
        
        # Build context string
        context_parts = []
        for i, result in enumerate(results, 1):
            context_parts.append(
                f"[Source {i}: {result.document_title} (Score: {result.score:.3f})]\n"
                f"{result.chunk.content}\n"
            )
        
        context = "\n".join(context_parts)
        
        # Ensure context doesn't exceed token limit
        if self.text_processor.count_tokens(context) > self.max_context_tokens:
            # Truncate context to fit within limits
            words = context.split()
            truncated_words = words[:self.max_context_tokens]
            context = " ".join(truncated_words)
        
        return context, results
    
    def generate_response(self, query: str, use_conversation_history: bool = True) -> Dict[str, Any]:
        """Generate a response using RAG"""
        # Retrieve relevant context
        context, retrieval_results = self.retrieve_context(query)
        
        # Build messages for the API call
        messages = [{"role": "system", "content": self.system_prompt}]
        
        # Add conversation history if requested
        if use_conversation_history and self.conversation_history:
            messages.extend(self.conversation_history[-6:])  # Last 3 exchanges
        
        # Add context and query
        if context:
            user_message = f"""
            Context Information:
            {context}
            
            Question: {query}
            
            Please answer the question using the provided context information. 
            If the context doesn't contain sufficient information, please indicate that.
            """
        else:
            user_message = f"""
            Question: {query}
            
            Note: No relevant context was found in the knowledge base. 
            Please answer based on your general knowledge but indicate that 
            this is not from the provided documents.
            """
        
        messages.append({"role": "user", "content": user_message})
        
        try:
            # Generate response
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=messages,
                max_tokens=500,
                temperature=0.3
            )
            
            assistant_response = response.choices[0].message.content
            
            # Update conversation history
            if use_conversation_history:
                self.conversation_history.extend([
                    {"role": "user", "content": query},
                    {"role": "assistant", "content": assistant_response}
                ])
            
            return {
                "success": True,
                "response": assistant_response,
                "context_used": context,
                "retrieval_results": retrieval_results,
                "sources_count": len(retrieval_results)
            }
            
        except Exception as e:
            return {
                "success": False,
                "error": str(e),
                "context_used": context,
                "retrieval_results": retrieval_results
            }
    
    def clear_conversation_history(self):
        """Clear the conversation history"""
        self.conversation_history = []
    
    def get_knowledge_base_stats(self) -> Dict[str, Any]:
        """Get statistics about the knowledge base"""
        stats = self.vector_store.get_document_stats()
        stats["text_processor_settings"] = {
            "chunk_size": self.text_processor.chunk_size,
            "chunk_overlap": self.text_processor.chunk_overlap
        }
        return stats
    
    def list_documents(self) -> List[Dict[str, Any]]:
        """List all documents in the knowledge base"""
        return [
            {
                "id": doc.id,
                "title": doc.title,
                "source": doc.source,
                "created_at": doc.created_at.isoformat(),
                "content_length": len(doc.content),
                "metadata": doc.metadata
            }
            for doc in self.vector_store.documents.values()
        ]

## Example Usage and Demonstrations

In [None]:
# Create the RAG agent
rag_agent = RAGAgent()

print("=== RAG Agent Initialized ===")
print(f"Model: {rag_agent.model}")
print(f"Max context tokens: {rag_agent.max_context_tokens}")
print(f"Chunk size: {rag_agent.text_processor.chunk_size}")

## Adding Sample Documents

In [None]:
# Add sample documents to demonstrate RAG capabilities

# Document 1: AI and Machine Learning overview
ai_content = """
Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines 
that can perform tasks that typically require human intelligence. These tasks include learning, 
reasoning, problem-solving, perception, and language understanding.

Machine Learning (ML) is a subset of AI that focuses on the development of algorithms and statistical 
models that enable computers to improve their performance on a specific task through experience, 
without being explicitly programmed.

Deep Learning is a subset of machine learning that uses artificial neural networks with multiple 
layers (hence "deep") to model and understand complex patterns in data. It has been particularly 
successful in areas such as image recognition, natural language processing, and speech recognition.

Common applications of AI include:
- Computer vision and image recognition
- Natural language processing and chatbots
- Recommendation systems
- Autonomous vehicles
- Predictive analytics
- Game playing (like chess and Go)

The field of AI has experienced rapid growth in recent years, driven by advances in computing power, 
the availability of large datasets, and improvements in algorithms.
"""

doc1_id = rag_agent.add_document_from_text(
    title="Introduction to Artificial Intelligence and Machine Learning",
    content=ai_content,
    source="AI Tutorial",
    metadata={"category": "education", "topic": "AI/ML"}
)

# Document 2: Python programming guide
python_content = """
Python is a high-level, interpreted programming language known for its simplicity and readability. 
It was created by Guido van Rossum and first released in 1991.

Key features of Python:
- Easy to learn and use syntax
- Interpreted language (no compilation needed)
- Object-oriented and functional programming support
- Extensive standard library
- Large ecosystem of third-party packages
- Cross-platform compatibility

Python is widely used in various domains:
- Web development (Django, Flask)
- Data science and analytics (Pandas, NumPy, Matplotlib)
- Machine learning (Scikit-learn, TensorFlow, PyTorch)
- Automation and scripting
- Scientific computing
- Game development

Popular Python libraries:
- NumPy: Numerical computing
- Pandas: Data manipulation and analysis
- Matplotlib: Data visualization
- Requests: HTTP library
- Django: Web framework
- Flask: Lightweight web framework

Python's philosophy emphasizes code readability and simplicity, following the principle that 
"there should be one obvious way to do it."
"""

doc2_id = rag_agent.add_document_from_text(
    title="Python Programming Language Guide",
    content=python_content,
    source="Programming Tutorial",
    metadata={"category": "programming", "language": "python"}
)

# Document 3: Data Science overview
data_science_content = """
Data Science is an interdisciplinary field that combines statistics, computer science, and domain 
expertise to extract insights and knowledge from data. It involves collecting, processing, analyzing, 
and interpreting large amounts of data to inform decision-making.

The data science process typically includes:
1. Problem definition and goal setting
2. Data collection and acquisition
3. Data cleaning and preprocessing
4. Exploratory data analysis (EDA)
5. Feature engineering and selection
6. Model building and training
7. Model evaluation and validation
8. Deployment and monitoring

Key skills for data scientists:
- Programming (Python, R, SQL)
- Statistics and probability
- Machine learning algorithms
- Data visualization
- Domain knowledge
- Communication skills

Common tools and technologies:
- Python libraries: Pandas, NumPy, Scikit-learn, Matplotlib, Seaborn
- R and RStudio
- SQL databases
- Jupyter Notebooks
- Tableau, Power BI for visualization
- Apache Spark for big data processing
- Cloud platforms: AWS, Google Cloud, Azure

Data science applications span many industries including healthcare, finance, retail, technology, 
and government, helping organizations make data-driven decisions.
"""

doc3_id = rag_agent.add_document_from_text(
    title="Data Science: Process, Skills, and Tools",
    content=data_science_content,
    source="Data Science Guide",
    metadata={"category": "data science", "topic": "overview"}
)

print(f"\n=== Documents Added ===")
print(f"Document 1 ID: {doc1_id}")
print(f"Document 2 ID: {doc2_id}")
print(f"Document 3 ID: {doc3_id}")

# Show knowledge base stats
stats = rag_agent.get_knowledge_base_stats()
print(f"\nKnowledge Base Stats:")
for key, value in stats.items():
    print(f"- {key}: {value}")

## Example 1: Basic RAG Queries

In [None]:
print("\n=== Example 1: Basic RAG Queries ===")

# Query 1: About AI and ML
query1 = "What is machine learning and how does it relate to AI?"
result1 = rag_agent.generate_response(query1)

print(f"Query: {query1}")
print(f"Response: {result1.get('response', 'Error occurred')}")
print(f"Sources used: {result1.get('sources_count', 0)}")
print("-" * 50)

# Query 2: About Python
query2 = "What are the key features of Python programming language?"
result2 = rag_agent.generate_response(query2)

print(f"Query: {query2}")
print(f"Response: {result2.get('response', 'Error occurred')}")
print(f"Sources used: {result2.get('sources_count', 0)}")
print("-" * 50)

# Query 3: About Data Science
query3 = "What skills do I need to become a data scientist?"
result3 = rag_agent.generate_response(query3)

print(f"Query: {query3}")
print(f"Response: {result3.get('response', 'Error occurred')}")
print(f"Sources used: {result3.get('sources_count', 0)}")

## Example 2: Cross-Document Queries

In [None]:
print("\n=== Example 2: Cross-Document Queries ===")

# Query that should pull information from multiple documents
cross_query = "How is Python used in machine learning and data science?"
cross_result = rag_agent.generate_response(cross_query)

print(f"Query: {cross_query}")
print(f"Response: {cross_result.get('response', 'Error occurred')}")
print(f"Sources used: {cross_result.get('sources_count', 0)}")

# Show which sources were used
if cross_result.get('retrieval_results'):
    print("\nRetrieved sources:")
    for i, result in enumerate(cross_result['retrieval_results'], 1):
        print(f"{i}. {result.document_title} (Score: {result.score:.3f})")
        print(f"   Content preview: {result.chunk.content[:100]}...")

## Example 3: Conversation with Context

In [None]:
print("\n=== Example 3: Conversation with Context ===")

# Clear previous conversation history
rag_agent.clear_conversation_history()

# First question
conv_query1 = "Tell me about deep learning"
conv_result1 = rag_agent.generate_response(conv_query1)
print(f"User: {conv_query1}")
print(f"Agent: {conv_result1.get('response', 'Error occurred')}")
print()

# Follow-up question (should use conversation context)
conv_query2 = "What are some practical applications of it?"
conv_result2 = rag_agent.generate_response(conv_query2)
print(f"User: {conv_query2}")
print(f"Agent: {conv_result2.get('response', 'Error occurred')}")
print()

# Third question building on context
conv_query3 = "Which Python libraries would be useful for this?"
conv_result3 = rag_agent.generate_response(conv_query3)
print(f"User: {conv_query3}")
print(f"Agent: {conv_result3.get('response', 'Error occurred')}")

## Example 4: Query Without Relevant Context

In [None]:
print("\n=== Example 4: Query Without Relevant Context ===")

# Query about something not in our knowledge base
no_context_query = "What is the weather like today in New York?"
no_context_result = rag_agent.generate_response(no_context_query)

print(f"Query: {no_context_query}")
print(f"Response: {no_context_result.get('response', 'Error occurred')}")
print(f"Sources used: {no_context_result.get('sources_count', 0)}")

# The agent should indicate that this information is not in the knowledge base

## Utility Functions for RAG Analysis

In [None]:
def analyze_retrieval_quality(rag_agent, query: str, expected_docs: List[str] = None):
    """
    Analyze the quality of retrieval for a given query.
    """
    print(f"\n=== Retrieval Analysis for: '{query}' ===")
    
    # Get retrieval results
    context, results = rag_agent.retrieve_context(query, top_k=5)
    
    print(f"Retrieved {len(results)} chunks:")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. Document: {result.document_title}")
        print(f"   Source: {result.document_source}")
        print(f"   Similarity Score: {result.score:.4f}")
        print(f"   Content: {result.chunk.content[:200]}...")
    
    if expected_docs:
        retrieved_docs = [result.document_title for result in results]
        print(f"\nExpected documents: {expected_docs}")
        print(f"Retrieved documents: {retrieved_docs}")
        
        # Calculate overlap
        overlap = set(expected_docs) & set(retrieved_docs)
        print(f"Overlap: {list(overlap)}")
        print(f"Precision: {len(overlap) / len(retrieved_docs) if retrieved_docs else 0:.2f}")
        print(f"Recall: {len(overlap) / len(expected_docs) if expected_docs else 0:.2f}")

# Example retrieval analysis
analyze_retrieval_quality(
    rag_agent, 
    "machine learning algorithms",
    expected_docs=["Introduction to Artificial Intelligence and Machine Learning"]
)

analyze_retrieval_quality(
    rag_agent,
    "Python libraries for data analysis",
    expected_docs=["Python Programming Language Guide", "Data Science: Process, Skills, and Tools"]
)

## Interactive RAG Session

In [None]:
def interactive_rag_session(rag_agent, max_questions=3):
    """
    Start an interactive RAG session.
    """
    print("\n=== Interactive RAG Session ===")
    print("I can answer questions based on the documents in my knowledge base.")
    print(f"Knowledge base contains {len(rag_agent.list_documents())} documents.")
    print("Type 'quit' to end the session, 'docs' to list documents, 'stats' for statistics.")
    print("-" * 60)
    
    question_count = 0
    while question_count < max_questions:
        try:
            user_input = input(f"\nQuestion {question_count + 1}/{max_questions}: ")
            
            if user_input.lower() in ['quit', 'exit']:
                print("Session ended. Thank you!")
                break
            elif user_input.lower() == 'docs':
                print("\nDocuments in knowledge base:")
                for doc in rag_agent.list_documents():
                    print(f"- {doc['title']} (Source: {doc['source']})")
                continue
            elif user_input.lower() == 'stats':
                stats = rag_agent.get_knowledge_base_stats()
                print("\nKnowledge base statistics:")
                for key, value in stats.items():
                    print(f"- {key}: {value}")
                continue
            
            if not user_input.strip():
                print("Please enter a question.")
                continue
            
            # Generate response
            result = rag_agent.generate_response(user_input)
            
            if result.get('success'):
                print(f"\nAnswer: {result['response']}")
                if result.get('sources_count', 0) > 0:
                    print(f"\n[Based on {result['sources_count']} source(s) from the knowledge base]")
                else:
                    print(f"\n[No relevant sources found in knowledge base - using general knowledge]")
            else:
                print(f"\nError: {result.get('error', 'Unknown error occurred')}")
            
            question_count += 1
            
        except KeyboardInterrupt:
            print("\nSession interrupted.")
            break
        except Exception as e:
            print(f"Error: {e}")

# Uncomment to run interactive session
# interactive_rag_session(rag_agent)

## Knowledge Base Management

In [None]:
def display_knowledge_base_info(rag_agent):
    """
    Display comprehensive information about the knowledge base.
    """
    print("\n=== Knowledge Base Information ===")
    
    # Basic stats
    stats = rag_agent.get_knowledge_base_stats()
    print(f"Total documents: {stats['total_documents']}")
    print(f"Total chunks: {stats['total_chunks']}")
    print(f"Embedding dimension: {stats['embedding_dimension']}")
    
    # Document details
    print("\nDocument Details:")
    docs = rag_agent.list_documents()
    for doc in docs:
        print(f"\n- ID: {doc['id']}")
        print(f"  Title: {doc['title']}")
        print(f"  Source: {doc['source']}")
        print(f"  Content length: {doc['content_length']} characters")
        print(f"  Created: {doc['created_at']}")
        if doc['metadata']:
            print(f"  Metadata: {doc['metadata']}")
    
    # Chunk distribution
    chunks_per_doc = {}
    for chunk_id, chunk in rag_agent.vector_store.chunks.items():
        doc_id = chunk.document_id
        if doc_id not in chunks_per_doc:
            chunks_per_doc[doc_id] = 0
        chunks_per_doc[doc_id] += 1
    
    print("\nChunks per document:")
    for doc_id, chunk_count in chunks_per_doc.items():
        doc_title = next((doc['title'] for doc in docs if doc['id'] == doc_id), doc_id)
        print(f"- {doc_title}: {chunk_count} chunks")

# Display current knowledge base info
display_knowledge_base_info(rag_agent)

## Advanced RAG Features

Here are some advanced features and extensions you can implement:

### 1. Multi-Modal RAG
Extend to handle images, audio, and video content alongside text.

### 2. Hierarchical Document Structure
Maintain document structure (headings, sections) for better context.

### 3. Dynamic Knowledge Updates
Automatically update the knowledge base from web sources or databases.

### 4. Fact Verification
Cross-reference information across multiple sources for accuracy.

### 5. Personalized Retrieval
Adapt retrieval based on user preferences and interaction history.

### 6. Explainable AI
Provide detailed explanations of why certain sources were selected.

In [None]:
# Example: Enhanced RAG with confidence scoring
class EnhancedRAGAgent(RAGAgent):
    """Enhanced RAG agent with additional features"""
    
    def generate_response_with_confidence(self, query: str) -> Dict[str, Any]:
        """Generate response with confidence scoring"""
        result = self.generate_response(query)
        
        if result.get('success') and result.get('retrieval_results'):
            # Calculate confidence based on retrieval scores
            scores = [r.score for r in result['retrieval_results']]
            avg_score = np.mean(scores) if scores else 0
            max_score = max(scores) if scores else 0
            
            # Simple confidence calculation
            confidence = min(1.0, (avg_score + max_score) / 2)
            
            # Confidence levels
            if confidence >= 0.8:
                confidence_level = "High"
            elif confidence >= 0.6:
                confidence_level = "Medium"
            elif confidence >= 0.4:
                confidence_level = "Low"
            else:
                confidence_level = "Very Low"
            
            result['confidence_score'] = confidence
            result['confidence_level'] = confidence_level
        
        return result

# Example usage of enhanced agent
enhanced_agent = EnhancedRAGAgent()

# Add the same documents
enhanced_agent.add_document_from_text(
    title="AI Overview", 
    content=ai_content, 
    source="tutorial"
)

# Test with confidence scoring
enhanced_result = enhanced_agent.generate_response_with_confidence(
    "What is artificial intelligence?"
)

print("\n=== Enhanced RAG with Confidence Scoring ===")
if enhanced_result.get('success'):
    print(f"Response: {enhanced_result['response']}")
    print(f"Confidence Score: {enhanced_result.get('confidence_score', 'N/A'):.3f}")
    print(f"Confidence Level: {enhanced_result.get('confidence_level', 'N/A')}")
    print(f"Sources: {enhanced_result.get('sources_count', 0)}")
else:
    print(f"Error: {enhanced_result.get('error')}")

## Summary

This RAG AI agent demonstrates:

1. **Document Ingestion**: Converting text documents into searchable chunks with embeddings
2. **Semantic Search**: Finding relevant information using vector similarity
3. **Context Integration**: Combining retrieved information with AI generation
4. **Conversation Memory**: Maintaining context across multiple exchanges
5. **Quality Assessment**: Evaluating retrieval quality and response confidence

### Key Benefits of RAG:
- **Factual Accuracy**: Responses grounded in provided documents
- **Up-to-date Information**: Knowledge base can be updated without retraining
- **Source Attribution**: Clear indication of information sources
- **Domain Specialization**: Focus on specific knowledge domains
- **Scalability**: Can handle large document collections efficiently

### Use Cases:
- Customer support systems
- Internal knowledge management
- Research assistants
- Educational tutoring systems
- Technical documentation helpers