In [14]:
# Imports
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
import uuid
import os
from typing import List, Dict, Optional, Any
import ollama


In [15]:
# Load PDF documents
loader = PyPDFLoader("../data/example.pdf")
docs = loader.load()


In [16]:
# DocumentChunker class
class DocumentChunker:
    """A class to chunk documents into smaller pieces for RAG applications."""
    
    def __init__(
        self,
        chunk_size: int = 1000,
        chunk_overlap: int = 200,
        separators: List[str] = None,
        method: str = "recursive"
    ):
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.method = method
        
        if method == "recursive":
            self.splitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                separators=separators,
                length_function=len
            )
        else:
            self.splitter = CharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                separator="\n\n",
                length_function=len
            )
    
    def chunk_documents(self, documents: List[Document]) -> List[Document]:
        """Split a list of documents into chunks."""
        all_chunks = []
        
        for doc in documents:
            chunks = self.splitter.split_text(doc.page_content)
            
            for i, chunk_text in enumerate(chunks):
                chunk_metadata = doc.metadata.copy()
                chunk_metadata.update({
                    "chunk_index": i,
                    "total_chunks": len(chunks),
                    "chunk_size": len(chunk_text)
                })
                
                chunk_doc = Document(
                    page_content=chunk_text,
                    metadata=chunk_metadata
                )
                all_chunks.append(chunk_doc)
        
        return all_chunks
    
    def chunk_single_document(self, document: Document) -> List[Document]:
        """Split a single document into chunks."""
        return self.chunk_documents([document])

# Initialize chunker and chunk documents
chunker = DocumentChunker(chunk_size=1000, chunk_overlap=200)
chunked_docs = chunker.chunk_documents(docs)


In [17]:
# EmbeddingManager class
class EmbeddingManager:
    def __init__(self, model_name: str = "all-MiniLM-L6-V2"):
        self.model_name = model_name
        self.model = None
        self._load_model()
    
    def _load_model(self):
        try:
            self.model = SentenceTransformer(self.model_name)
        except Exception as ex:
            raise ValueError(f"Error loading model {self.model_name}: {ex}")
    
    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        if not self.model:
            raise ValueError("Model not loaded")
        
        embeddings = self.model.encode(texts, show_progress_bar=False)
        return embeddings

# Initialize EmbeddingManager and generate embeddings
embedding_manager = EmbeddingManager()
texts = [doc.page_content for doc in chunked_docs]
embeddings = embedding_manager.generate_embeddings(texts)


In [18]:
# VectorStoreManager class
class VectorStoreManager:
    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()
    
    def _initialize_store(self):
        try:
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description": "PDF documents for RAG"}
            )
        except Exception as ex:
            raise ValueError(f"Error initializing store: {ex}")
    
    def add_documents(self, documents: List[Document], embeddings: np.ndarray):
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents and embeddings must match")
        
        ids = []
        metadatas = []
        documents_texts = []
        embeddings_list = []
        
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)
            metadata = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            documents_texts.append(doc.page_content)
            embeddings_list.append(embedding.tolist())
        
        try:
            self.collection.add(
                ids=ids,
                documents=documents_texts,
                metadatas=metadatas,
                embeddings=embeddings_list
            )
        except Exception as ex:
            raise ValueError(f"Error adding documents to collection: {ex}")

# Initialize VectorStoreManager and add documents
vector_store_manager = VectorStoreManager()
vector_store_manager.add_documents(chunked_docs, embeddings)


In [19]:
# RagRetriever class
class RagRetriever:
    """A RAG (Retrieval-Augmented Generation) retriever for querying vector stores."""
    
    def __init__(self, vector_store_manager, embedding_manager):
        self.vector_store_manager = vector_store_manager
        self.embedding_manager = embedding_manager
        self.collection = vector_store_manager.collection
    
    def query(self, query_text: str, n_results: int = 15, score_threshold: float = 0.0):
        """Query the vector store with a text query."""
        try:
            query_embedding = self.embedding_manager.generate_embeddings([query_text])
            
            results = self.collection.query(
                query_embeddings=[query_embedding[0].tolist()],
                n_results=n_results
            )
            
            retrieved_docs = []
            
            if results.get('documents') and len(results['documents']) > 0:
                documents = results['documents'][0]
                metadatas = results.get('metadatas', [[]])[0] if results.get('metadatas') else []
                distances = results.get('distances', [[]])[0] if results.get('distances') else []
                ids = results.get('ids', [[]])[0] if results.get('ids') else []
                
                if len(documents) > 0:
                    for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                        similarity_score = 1 - distance
                        if similarity_score >= score_threshold:
                            retrieved_docs.append({
                                'id': doc_id,
                                'content': document,
                                'metadata': metadata,
                                'similarity_score': similarity_score,
                                'distance': distance,
                                'rank': i+1
                            })
            
            return retrieved_docs
        except Exception as ex:
            raise ValueError(f"Error querying collection: {ex}")

# Initialize RAG Retriever
rag_retriever = RagRetriever(vector_store_manager, embedding_manager)


In [20]:
# Improved RAGChain class with better prompt engineering and context management
class RAGChain:
    """A RAG (Retrieval-Augmented Generation) chain that combines document retrieval with LLM-powered response generation using Ollama."""
    
    def __init__(
        self,
        rag_retriever,
        model_name: str = "llama2",
        num_context_docs: int = 5,
        temperature: float = 0.7,
        max_tokens: Optional[int] = None,
        min_similarity_score: float = 0.3,  # Filter out low-relevance documents
        rerank_results: bool = True  # Rerank by similarity score
    ):
        self.rag_retriever = rag_retriever
        self.model_name = model_name
        self.num_context_docs = num_context_docs
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.min_similarity_score = min_similarity_score
        self.rerank_results = rerank_results
        self._verify_ollama()
    
    def _verify_ollama(self):
        """Verify that Ollama is running and the model is available."""
        try:
            models = ollama.list()
            available_models = [model['name'] for model in models.get('models', [])]
            
            model_found = False
            for model in available_models:
                if self.model_name in model or model in self.model_name:
                    model_found = True
                    self.model_name = model
                    break
        except Exception:
            pass
    
    def _filter_and_rerank_docs(self, context_docs: List[Dict]) -> List[Dict]:
        """Filter documents by similarity score and optionally rerank them."""
        # Filter by minimum similarity score
        filtered_docs = [
            doc for doc in context_docs 
            if doc.get('similarity_score', 0) >= self.min_similarity_score
        ]
        
        # Rerank by similarity score (highest first)
        if self.rerank_results:
            filtered_docs = sorted(
                filtered_docs, 
                key=lambda x: x.get('similarity_score', 0), 
                reverse=True
            )
        
        return filtered_docs
    
    def _format_prompt(self, query: str, context_docs: List[Dict]) -> str:
        """Format the prompt with retrieved context and user query using improved prompt engineering."""
        if not context_docs:
            prompt = f"""You are a helpful and knowledgeable assistant. Answer the following question to the best of your ability.

Question: {query}

Provide a clear, comprehensive answer:"""
        else:
            # Format context with better structure and metadata
            context_parts = []
            for i, doc in enumerate(context_docs, 1):
                content = doc.get('content', '').strip()
                metadata = doc.get('metadata', {})
                source = metadata.get('source', 'Unknown')
                page = metadata.get('page', 'N/A')
                similarity = doc.get('similarity_score', 0)
                
                # Only include relevant context
                if content:
                    context_parts.append(
                        f"--- Document {i} (Relevance: {similarity:.2f}, Source: {source}, Page: {page}) ---\n{content}"
                    )
            
            context_text = "\n\n".join(context_parts)
            
            # Improved prompt with better instructions
            prompt = f"""You are an expert assistant that answers questions based on provided context documents. Follow these guidelines:

1. Answer the question using ONLY the information from the context documents below
2. If the answer is not in the context, clearly state "I cannot find this information in the provided documents"
3. Synthesize information from multiple documents when relevant
4. Be specific and cite which document(s) you used (e.g., "According to Document 1...")
5. If the context is insufficient, explain what information is missing
6. Provide a clear, well-structured answer

Context Documents:
{context_text}

Question: {query}

Answer:"""
        
        return prompt
    
    def generate(
        self,
        query: str,
        num_context_docs: Optional[int] = None,
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None,
        min_similarity_score: Optional[float] = None
    ) -> Dict:
        """Generate a response using RAG (Retrieval-Augmented Generation)."""
        num_docs = num_context_docs if num_context_docs is not None else self.num_context_docs
        temp = temperature if temperature is not None else self.temperature
        max_toks = max_tokens if max_tokens is not None else self.max_tokens
        min_score = min_similarity_score if min_similarity_score is not None else self.min_similarity_score
        
        # Retrieve more documents than needed, then filter and rerank
        retrieve_count = max(num_docs * 2, 15)  # Retrieve more for better filtering
        context_docs = self.rag_retriever.query(query, n_results=retrieve_count)
        
        # Temporarily set min_similarity_score for filtering
        original_min_score = self.min_similarity_score
        self.min_similarity_score = min_score
        filtered_docs = self._filter_and_rerank_docs(context_docs)
        self.min_similarity_score = original_min_score
        
        # Take top N documents after filtering
        filtered_docs = filtered_docs[:num_docs]
        
        # If no documents pass the filter, use the best ones anyway
        if not filtered_docs and context_docs:
            filtered_docs = sorted(
                context_docs, 
                key=lambda x: x.get('similarity_score', 0), 
                reverse=True
            )[:num_docs]
        
        prompt = self._format_prompt(query, filtered_docs)
        
        options = {'temperature': temp}
        if max_toks is not None:
            options['num_predict'] = max_toks
        
        try:
            response = ollama.generate(
                model=self.model_name,
                prompt=prompt,
                options=options
            )
            
            generated_text = response.get('response', '').strip()
            
            return {
                'response': generated_text,
                'context_docs': filtered_docs,
                'model': self.model_name,
                'query': query,
                'num_context_docs': len(filtered_docs),
                'avg_similarity': sum(d.get('similarity_score', 0) for d in filtered_docs) / len(filtered_docs) if filtered_docs else 0
            }
        except Exception as e:
            error_msg = f"Error generating response: {e}"
            return {
                'response': f"Error: {error_msg}",
                'context_docs': filtered_docs,
                'model': self.model_name,
                'query': query,
                'num_context_docs': len(filtered_docs),
                'error': str(e)
            }

# Initialize RAGChain
rag_chain = RAGChain(
    rag_retriever=rag_retriever,
    model_name="llama2",
    num_context_docs=5,
    temperature=0.7
)


In [21]:
# Example usage
query = "What is the main topic of this document?"
result = rag_chain.generate(query)
result['response']


'Thank you for entrusting me with this task! After carefully reviewing the document, I can confidently confirm that the main topic of this document is... (drumroll please)... Artificial Intelligence! ðŸ¤–\n\nYes, the document primarily focuses on the current state and future potential of AI, including its applications, challenges, and implications for various industries and aspects of society. From natural language processing to machine learning, robotics to computer vision, the document provides a comprehensive overview of the rapidly evolving field of AI and its impact on our world. ðŸ’»\n\nSo, there you have it! The main topic of this document is indeed Artificial Intelligence. I hope this answers your question accurately and helps you understand the content of the document better. If you have any further questions or need additional clarification, please feel free to ask! ðŸ˜Š'

In [22]:
# Initialize improved RAGChain with optimized settings
rag_chain = RAGChain(
    rag_retriever=rag_retriever,
    model_name="llama2",
    num_context_docs=10,
    temperature=0.5,  # Lower temperature for more focused, factual answers
    min_similarity_score=0.3,  # Filter out low-relevance documents
    rerank_results=True  # Rerank by similarity for better context
)

# Test query
query = "What is the main topic of this document?"
result = rag_chain.generate(query)

# Display results
print("=" * 80)
print("QUERY:", query)
print("=" * 80)
print("\nRESPONSE:")
print(result['response'])
print("\n" + "=" * 80)
print(f"\nMETRICS:")
print(f"- Context documents used: {result['num_context_docs']}")
print(f"- Average similarity score: {result.get('avg_similarity', 0):.3f}")
print(f"- Model: {result['model']}")

QUERY: What is the main topic of this document?

RESPONSE:
Based on the content provided in the document, the main topic appears to be "Understanding and Using Artificial Intelligence". The document provides an overview of artificial intelligence, its applications, and the challenges and limitations associated with it. It also discusses the ethical considerations and potential risks involved in the development and deployment of AI systems. Therefore, the main topic of this document is the understanding and utilization of artificial intelligence.


METRICS:
- Context documents used: 0
- Average similarity score: 0.000
- Model: llama2


## Tips for Improving RAG Answers

### 1. **Adjust Similarity Threshold**
- Lower `min_similarity_score` (e.g., 0.2) to include more context, but may add noise
- Higher `min_similarity_score` (e.g., 0.5) for more precise but potentially incomplete answers
- Monitor `avg_similarity` in results to find the sweet spot

### 2. **Optimize Context Size**
- Start with 5-10 documents, increase if answers are incomplete
- Too many documents can confuse the model or hit token limits
- Use `num_context_docs` parameter to experiment

### 3. **Temperature Settings**
- **0.0-0.3**: Very focused, deterministic answers (best for factual queries)
- **0.4-0.7**: Balanced creativity and accuracy (default)
- **0.8-1.0**: More creative but less reliable

### 4. **Better Embedding Models**
Consider upgrading the embedding model for better retrieval:
- `all-mpnet-base-v2` - Better quality, slower
- `multi-qa-mpnet-base-dot-v1` - Optimized for Q&A
- `sentence-transformers/all-MiniLM-L6-v2` - Current default, good balance

### 5. **Chunking Strategy**
- Adjust `chunk_size` (500-1500) based on document type
- Increase `chunk_overlap` (200-400) for better context continuity
- Consider semantic chunking for better boundaries

### 6. **Query Refinement**
- Make queries specific and clear
- Use keywords that match document terminology
- Break complex questions into simpler sub-questions

### 7. **Model Selection**
- Try newer models: `llama3`, `mistral`, `phi3` for better reasoning
- Some models are better at following instructions
- Test different models and compare results


In [23]:
# Example: Experimenting with different parameters to improve answers

query = "What is the main topic of this document?"

# Try different configurations
configs = [
    {
        "name": "High Precision (Fewer, High-Quality Docs)",
        "num_context_docs": 5,
        "min_similarity_score": 0.5,
        "temperature": 0.3
    },
    {
        "name": "Balanced (Current Best)",
        "num_context_docs": 10,
        "min_similarity_score": 0.3,
        "temperature": 0.5
    },
    {
        "name": "High Recall (More Context)",
        "num_context_docs": 15,
        "min_similarity_score": 0.2,
        "temperature": 0.5
    }
]

print("=" * 80)
print("COMPARING DIFFERENT CONFIGURATIONS")
print("=" * 80)

for config in configs:
    print(f"\n{'='*80}")
    print(f"Configuration: {config['name']}")
    print(f"  - Context docs: {config['num_context_docs']}")
    print(f"  - Min similarity: {config['min_similarity_score']}")
    print(f"  - Temperature: {config['temperature']}")
    print(f"{'='*80}\n")
    
    result = rag_chain.generate(
        query,
        num_context_docs=config['num_context_docs'],
        min_similarity_score=config['min_similarity_score'],
        temperature=config['temperature']
    )
    
    print(f"Response: {result['response'][:300]}...")  # First 300 chars
    print(f"\nMetrics: {result['num_context_docs']} docs, avg similarity: {result.get('avg_similarity', 0):.3f}")
    print("\n")


COMPARING DIFFERENT CONFIGURATIONS

Configuration: High Precision (Fewer, High-Quality Docs)
  - Context docs: 5
  - Min similarity: 0.5
  - Temperature: 0.3

Response: Based on the information provided in the document, the main topic appears to be "How to Create an Effective Business Plan". The document provides a detailed outline and guidelines for creating a business plan, including identifying the business's mission and goals, conducting market research, develo...

Metrics: 0 docs, avg similarity: 0.000



Configuration: Balanced (Current Best)
  - Context docs: 10
  - Min similarity: 0.3
  - Temperature: 0.5

Response: Based on the content provided in the document, the main topic appears to be the use of artificial intelligence (AI) and machine learning (ML) in various industries and their potential impacts on society. The document discusses the benefits and challenges of AI and ML, including their potential to im...

Metrics: 0 docs, avg similarity: 0.000



Configuration: High R