## Data Ingestion

In [5]:
from langchain_core.documents import Document


In [None]:
doc = Document(page_content="Hello, world!", metadata={"source": "example.pdf", "author": "John Doe","date": "2021-01-01"})
doc


Hello, world!
{'source': 'example.pdf', 'author': 'John Doe', 'date': '2021-01-01'}


In [6]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../data/example.pdf")
docs = loader.load()


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from langchain_community.document_loaders import PyMuPDFLoader
loaderNew = PyMuPDFLoader("../data/example.pdf")
docsNew = loaderNew.load()



{'producer': 'Skia/PDF m88', 'creator': 'Adobe Acrobat Pro 11.0.9', 'creationdate': '2020-11-09T15:54:35+02:00', 'source': '../data/example.pdf', 'file_path': '../data/example.pdf', 'total_pages': 658, 'format': 'PDF 1.6', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2020-11-09T16:02:22+02:00', 'trapped': '', 'modDate': "D:20201109160222+02'00'", 'creationDate': "D:20201109155435+02'00'", 'page': 0}


In [None]:
len(docs)

Number of documents: 658
This is because PyPDFLoader creates ONE document per PAGE
Your PDF has 658 pages

First document metadata:
{'producer': 'Skia/PDF m88', 'creator': 'Adobe Acrobat Pro 11.0.9', 'creationdate': '2020-11-09T15:54:35+02:00', 'moddate': '2020-11-09T16:02:22+02:00', 'title': '', 'source': '../data/example.pdf', 'total_pages': 658, 'page': 0, 'page_label': '1'}
{'producer': 'Skia/PDF m88', 'creator': 'Adobe Acrobat Pro 11.0.9', 'creationdate': '2020-11-09T15:54:35+02:00', 'moddate': '2020-11-09T16:02:22+02:00', 'title': '', 'source': '../data/example.pdf', 'total_pages': 658, 'page': 657, 'page_label': '658'}


## Why Multiple Documents?

**PyPDFLoader creates one Document object per page** of the PDF. This is by design and beneficial for RAG applications:

1. **Granular Retrieval**: You can retrieve specific pages relevant to a query
2. **Manageable Chunk Sizes**: Each page is a separate unit for embedding
3. **Better Context**: Page-level metadata helps track where information came from

If you need a single combined document, see the cell below.


In [None]:
# Optional: Combine all pages into a single document
from langchain_core.documents import Document

# Combine all page contents
combined_content = "\n\n".join([doc.page_content for doc in docs])

# Create a single document with combined metadata
combined_doc = Document(
    page_content=combined_content,
    metadata={
        "source": docs[0].metadata.get("source"),
        "total_pages": len(docs),
        "combined": True
    }
)


Combined document length: 1089938 characters
Combined document metadata: {'source': '../data/example.pdf', 'total_pages': 658, 'combined': True}


In [None]:
# Iterate through documents
for doc in docs:
    pass




## Text Chunking

Chunking breaks down large documents into smaller, manageable pieces for better embedding and retrieval. This is especially useful when:
- Pages are too long for effective embeddings
- You want more granular retrieval
- You need to respect token limits


In [None]:
# Try modern import first, fallback to legacy import
try:
    from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
except ImportError:
    from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

from langchain_core.documents import Document
from typing import List

class DocumentChunker:
    """
    A class to chunk documents into smaller pieces for RAG applications.
    """
    
    def __init__(
        self,
        chunk_size: int = 1000,
        chunk_overlap: int = 200,
        separators: List[str] = None,
        method: str = "recursive"
    ):
        """
        Initialize the chunker.
        
        Args:
            chunk_size: Maximum size of chunks (in characters)
            chunk_overlap: Overlap between chunks (in characters) to preserve context
            separators: List of separators to use for splitting (default: None uses smart defaults)
            method: "recursive" (smart splitting) or "character" (simple character-based)
        """
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.method = method
        
        if method == "recursive":
            self.splitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                separators=separators,
                length_function=len
            )
        else:  # character-based
            self.splitter = CharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                separator="\n\n",
                length_function=len
            )
    
    def chunk_documents(self, documents: List[Document]) -> List[Document]:
        """
        Split a list of documents into chunks.
        
        Args:
            documents: List of Document objects to chunk
            
        Returns:
            List of Document objects (chunks) with preserved metadata
        """
        all_chunks = []
        
        for doc in documents:
            # Split the document content
            chunks = self.splitter.split_text(doc.page_content)
            
            # Create new Document objects for each chunk with metadata
            for i, chunk_text in enumerate(chunks):
                chunk_metadata = doc.metadata.copy()
                chunk_metadata.update({
                    "chunk_index": i,
                    "total_chunks": len(chunks),
                    "chunk_size": len(chunk_text)
                })
                
                chunk_doc = Document(
                    page_content=chunk_text,
                    metadata=chunk_metadata
                )
                all_chunks.append(chunk_doc)
        
        return all_chunks
    
    def chunk_single_document(self, document: Document) -> List[Document]:
        """
        Split a single document into chunks.
        
        Args:
            document: Document object to chunk
            
        Returns:
            List of Document objects (chunks)
        """
        return self.chunk_documents([document])

# Example usage
chunker = DocumentChunker(chunk_size=1000, chunk_overlap=200)


Creating DocumentChunker with default settings...
Chunk size: 1000, Overlap: 200


In [None]:
# Example: Chunk all pages of the PDF
all_docs = docs  # All pages
chunked_docs = chunker.chunk_documents(all_docs)


Chunking all pages of the PDF...
This may take a while for 658 pages...

Original documents: 658
Chunked documents: 1527

First chunk preview:
Content length: 195 characters
Metadata: {'producer': 'Skia/PDF m88', 'creator': 'Adobe Acrobat Pro 11.0.9', 'creationdate': '2020-11-09T15:54:35+02:00', 'moddate': '2020-11-09T16:02:22+02:00', 'title': '', 'source': '../data/example.pdf', 'total_pages': 658, 'page': 0, 'page_label': '1', 'chunk_index': 0, 'total_chunks': 1, 'chunk_size': 195}

Content preview (first 200 chars):
Preface - Neural Networks from Scratch in Python
 
2
 
 
 
 
 
 
 
 
Neural Networks
 
 
 
from Scratch in
 
 
 
Python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Harrison Kinsley & Daniel Kukieła...


In [None]:
# Example: Chunk all documents (all pages)
# Uncomment to chunk the entire PDF (this may take a while for 658 pages)
# print("Chunking all pages...")
# all_chunked_docs = chunker.chunk_documents(docs)
# print(f"Original: {len(docs)} pages")
# print(f"After chunking: {len(all_chunked_docs)} chunks")


In [None]:
# Example: Custom chunking with different parameters
# For longer context windows or different use cases
custom_chunker = DocumentChunker(
    chunk_size=2000,      # Larger chunks
    chunk_overlap=400,     # More overlap for better context
    method="recursive"     # Smart splitting
)


Custom chunker created:
Chunk size: 2000
Overlap: 400


### Embedding and Vector Store  

In [10]:
import numpy as np
from sentence_transformers import SentenceTransformer 
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any
from sklearn.metrics.pairwise import cosine_similarity  


In [11]:
class EmbeddingManager:
    def __init__(self, model_name:str = "all-MiniLM-L6-V2"):
        self.model_name = model_name
        self.model = None
        self._load_model()
    
    def _load_model(self):
        try:
            self.model = SentenceTransformer(self.model_name)
        except Exception as ex:
            raise ValueError(f"Error loading model {self.model_name}: {ex}")
    
    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        if not self.model :
            raise ValueError("Model not loaded")
        
        embeddings = self.model.encode(texts, show_progress_bar=False)
        return embeddings
    
## Intialize Embedding Manager
embedding_manager = EmbeddingManager()
## Generate Embeddings
# Extract page_content from Document objects to get list of strings
texts = [doc.page_content for doc in chunked_docs]
embeddings = embedding_manager.generate_embeddings(texts)

    
    

loading model all-MiniLM-L6-V2
Model dimensions 384
 Generating embeddings for 1527 texts ...


Batches: 100%|██████████| 48/48 [00:05<00:00,  9.03it/s]

Generated embedding with shape (1527, 384)





### VectoreStore

In [None]:
from langchain_core.documents.base import Document
import os
from typing import Any
class VectorStoreManager:
    def __init__(self, collection_name:str = "pdf_documents", persist_directory:str = "../data/vector_store"):
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()
    
    def _initialize_store(self):
        try:
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description": "PDF documents for RAG"}
            )
        except Exception as ex:
            raise ValueError(f"Error initializing store: {ex}")
    
    def add_documents(self, documents: List[Document], embeddings: np.ndarray):
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents and embeddings must match")
        ids = []
        metadatas = []
        documents_texts = []
        embeddings_list = []
        
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)
            metadata = dict(doc.metadata)
            metadata['doc_index'] = 1
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            documents_texts.append(doc.page_content)
            embeddings_list.append(embedding.tolist())
        
        try:
            self.collection.add(
                ids=ids,
                documents=documents_texts,
                metadatas=metadatas,
                embeddings=embeddings_list
            )
        except Exception as ex:
            raise ValueError(f"Error adding documents to collection: {ex}")

vector_store_manager = VectorStoreManager()
vector_store_manager

Initializing Chroma client for collection pdf_documents...
Collection pdf_documents initialized successfully
Existing documents in collection: 1527


<__main__.VectorStoreManager at 0x30f16e630>

In [None]:
class RagRetriever:
    """
    A RAG (Retrieval-Augmented Generation) retriever for querying vector stores.
    """
    
    def __init__(self, vector_store_manager, embedding_manager):
        """
        Initialize the RAG Retriever.
        
        Args:
            vector_store_manager: VectorStoreManager instance with the document collection
            embedding_manager: EmbeddingManager instance to generate query embeddings
        """
        self.vector_store_manager = vector_store_manager
        self.embedding_manager = embedding_manager
        self.collection = vector_store_manager.collection
    
    def query(self, query_text: str, n_results: int = 15, score_threshold: float = 0.0):
        """
        Query the vector store with a text query.
        
        Args:
            query_text: The query text to search for
            n_results: Number of results to return
            score_threshold: Minimum similarity score (0.0 to 1.0) to include results
            
        Returns:
            Query results with documents, metadata, and distances
        """
        try:
            # Generate embedding for the query using the same model as documents
            query_embedding = self.embedding_manager.generate_embeddings([query_text])
            
            # Query using the embedding (not query_texts) to match stored embeddings
            results = self.collection.query(
                query_embeddings=[query_embedding[0].tolist()],
                n_results=n_results
            )
            
            retrieved_docs = []
            
            # Check if we have results - ChromaDB returns results as a list of lists (one per query)
            if results.get('documents') and len(results['documents']) > 0:
                documents = results['documents'][0]
                metadatas = results.get('metadatas', [[]])[0] if results.get('metadatas') else []
                distances = results.get('distances', [[]])[0] if results.get('distances') else []
                ids = results.get('ids', [[]])[0] if results.get('ids') else []
                
                if len(documents) > 0:
                    for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                        similarity_score = 1 - distance
                        if similarity_score >= score_threshold:
                            retrieved_docs.append({
                                'id': doc_id,
                                'content': document,
                                'metadata': metadata,
                                'similarity_score': similarity_score,
                                'distance': distance,
                                'rank': i+1
                            })
            
            return retrieved_docs
        except Exception as ex:
            raise ValueError(f"Error querying collection: {ex}")

# Initialize RAG Retriever
rag_retriever = RagRetriever(vector_store_manager, embedding_manager)


## Setting Up Ollama Server

Before using RAGChain, you need to start the Ollama server. Here's how:

### Step 1: Install Ollama

If you haven't installed Ollama yet:

**macOS:**
```bash
# Download from https://ollama.com/download or use Homebrew
brew install ollama
```

**Linux:**
```bash
# Download from https://ollama.com/download or use curl
curl -fsSL https://ollama.com/install.sh | sh
```

**Windows:**
- Download the installer from https://ollama.com/download

### Step 2: Start Ollama Server

**Option A: Start in Terminal (Recommended for testing)**
```bash
# Start the server in a terminal window
ollama serve
```

The server will run on `http://localhost:11434` by default.

**Option B: Start as Background Service (macOS/Linux)**
```bash
# On macOS, Ollama typically runs as a service automatically after installation
# Check if it's running:
ollama list

# If not running, start it:
ollama serve &
```

**Option C: Check if Already Running**
```bash
# Test if Ollama is already running
ollama list
# If this works, the server is already running!
```

### How to Stop Ollama Server

**Option 1: If running in a terminal (foreground)**
```bash
# Press Ctrl+C in the terminal where ollama serve is running
```

**Option 2: Find process by port (Recommended if you get "address already in use" error)**
```bash
# Find the process using port 11434 (Ollama's default port)
# macOS/Linux:
lsof -i :11434

# Alternative on Linux:
netstat -tulpn | grep 11434
# or
ss -tulpn | grep 11434

# Kill the process by PID (replace <PID> with the actual process ID from above)
kill <PID>

# Or force kill if needed
kill -9 <PID>
```

**Option 3: Find and kill the process by name**
```bash
# Find the Ollama process
ps aux | grep ollama

# Kill the process by PID (replace <PID> with the actual process ID)
kill <PID>

# Or force kill if needed
kill -9 <PID>
```

**Option 4: Kill all Ollama processes**
```bash
# Kill all ollama processes
pkill ollama

# Or force kill all
pkill -9 ollama
```

**Option 5: Using systemctl (Linux with systemd)**
```bash
# Stop the service
sudo systemctl stop ollama

# Disable auto-start on boot (optional)
sudo systemctl disable ollama
```

**Option 6: Using launchctl (macOS)**
```bash
# Stop the service
launchctl unload ~/Library/LaunchAgents/com.ollama.ollama.plist

# Or if installed system-wide
sudo launchctl unload /Library/LaunchDaemons/com.ollama.ollama.plist
```

**Verify it's stopped:**
```bash
# This should fail if Ollama is stopped
ollama list
```

### Step 3: Download a Model

You need to download at least one model before using RAGChain:

```bash
# Download one or more models (choose based on your needs):
# Single model:
ollama pull llama2

# Or download multiple models at once:
for model in llama2 llama3 mistral phi gemma:2b; do ollama pull $model; done

# Popular models:
# - llama2: Llama 2 (7B parameters)
# - llama3: Llama 3 (8B parameters)  
# - mistral: Mistral 7B
# - phi: Phi-2 (smaller, faster)
# - gemma:2b: Gemma 2B (very fast)

# Check downloaded models:
ollama list
```

### Step 4: Verify Setup

Run the cell below to verify Ollama is working:


In [None]:
# Verify Ollama is running and check available models
try:
    import ollama
    
    # List available models
    models_response = ollama.list()
    available_models = []
    if models_response and 'models' in models_response:
        for model in models_response['models']:
            model_name = model.get('name', 'Unknown')
            available_models.append(model_name)
    
    if available_models:
        # Test a simple generation with the first available model
        test_response = ollama.generate(
            model=available_models[0],
            prompt='Say "Hello" if you can read this.'
        )
    
except ImportError:
    pass
except Exception:
    pass


✓ Ollama is running!

Available models:
  - Unknown (1.56 GB)
  - Unknown (1.49 GB)
  - Unknown (4.07 GB)
  - Unknown (3.56 GB)
  - Unknown (1.88 GB)
  - Unknown (4.58 GB)
  - Unknown (4.58 GB)
  - Unknown (4.34 GB)

✓ Testing Ollama connection with model: Unknown...
  ⚠️  Could not test generation: model 'Unknown' not found (status code: 404)
  But Ollama server is running. You can proceed.


## RAG Chain with Ollama LLM

The RAGChain class combines retrieval with LLM generation using Ollama. It retrieves relevant context documents and uses them to generate contextualized answers.


In [None]:
import ollama
from typing import List, Dict, Optional

class RAGChain:
    """
    A RAG (Retrieval-Augmented Generation) chain that combines document retrieval
    with LLM-powered response generation using Ollama.
    """
    
    def __init__(
        self,
        rag_retriever,
        model_name: str = "llama2",
        num_context_docs: int = 5,
        temperature: float = 0.7,
        max_tokens: Optional[int] = None
    ):
        """
        Initialize the RAG Chain.
        
        Args:
            rag_retriever: RagRetriever instance for document retrieval
            model_name: Name of the Ollama model to use (e.g., "llama2", "mistral", "phi")
            num_context_docs: Number of retrieved documents to include in context
            temperature: Temperature for generation (0.0 to 1.0, default: 0.7)
            max_tokens: Maximum tokens for response (None for no limit)
        """
        self.rag_retriever = rag_retriever
        self.model_name = model_name
        self.num_context_docs = num_context_docs
        self.temperature = temperature
        self.max_tokens = max_tokens
        
        # Verify Ollama is available
        self._verify_ollama()
    
    def _verify_ollama(self):
        """Verify that Ollama is running and the model is available."""
        try:
            # Check if Ollama is running by listing models
            models = ollama.list()
            available_models = [model['name'] for model in models.get('models', [])]
            
            # Check if our model is available (handle model name variations)
            model_found = False
            for model in available_models:
                if self.model_name in model or model in self.model_name:
                    model_found = True
                    self.model_name = model  # Use the exact model name
                    break
        except Exception:
            pass
    
    def _format_prompt(self, query: str, context_docs: List[Dict]) -> str:
        """
        Format the prompt with retrieved context and user query.
        
        Args:
            query: User query
            context_docs: List of retrieved document dictionaries
            
        Returns:
            Formatted prompt string
        """
        if not context_docs:
            # No context retrieved - still answer but note the limitation
            prompt = f"""You are a helpful assistant. Answer the following question based on your knowledge.

Question: {query}

Answer:"""
        else:
            # Format context from retrieved documents
            context_parts = []
            for i, doc in enumerate(context_docs, 1):
                content = doc.get('content', '')
                metadata = doc.get('metadata', {})
                source = metadata.get('source', 'Unknown')
                page = metadata.get('page', 'N/A')
                
                context_parts.append(f"[Context {i} - Source: {source}, Page: {page}]\n{content}")
            
            context_text = "\n\n".join(context_parts)
            
            prompt = f"""Use the following context documents to answer the question. If the answer cannot be found in the context, say so clearly.

Context:
{context_text}

Question: {query}

Answer based on the context above:"""
        
        return prompt
    
    def generate(
        self,
        query: str,
        num_context_docs: Optional[int] = None,
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None
    ) -> Dict:
        """
        Generate a response using RAG (Retrieval-Augmented Generation).
        
        Args:
            query: User query to answer
            num_context_docs: Override default number of context documents
            temperature: Override default temperature
            max_tokens: Override default max tokens
            
        Returns:
            Dictionary with:
                - 'response': Generated answer
                - 'context_docs': Retrieved context documents
                - 'model': Model used
                - 'query': Original query
        """
        # Use provided parameters or fall back to instance defaults
        num_docs = num_context_docs if num_context_docs is not None else self.num_context_docs
        temp = temperature if temperature is not None else self.temperature
        max_toks = max_tokens if max_tokens is not None else self.max_tokens
        
        # Retrieve relevant context documents
        context_docs = self.rag_retriever.query(query, n_results=num_docs)
        
        # Format prompt with context
        prompt = self._format_prompt(query, context_docs)
        
        # Prepare generation options
        options = {
            'temperature': temp
        }
        if max_toks is not None:
            options['num_predict'] = max_toks
        
        # Generate response using Ollama
        try:
            response = ollama.generate(
                model=self.model_name,
                prompt=prompt,
                options=options
            )
            
            generated_text = response.get('response', '')
            
            return {
                'response': generated_text,
                'context_docs': context_docs,
                'model': self.model_name,
                'query': query,
                'num_context_docs': len(context_docs)
            }
        except Exception as e:
            error_msg = f"Error generating response: {e}"
            return {
                'response': f"Error: {error_msg}",
                'context_docs': context_docs,
                'model': self.model_name,
                'query': query,
                'num_context_docs': len(context_docs),
                'error': str(e)
            }




RAGChain class defined successfully!


### Initialize RAGChain

Create a RAGChain instance using the existing rag_retriever. You can specify:
- Model name (e.g., "llama2", "mistral", "phi", "llama3")
- Number of context documents to retrieve (default: 5)
- Temperature for generation (default: 0.7)


In [16]:
# Initialize RAGChain with default settings
# Make sure Ollama is running and you have a model installed (e.g., ollama pull llama2)
rag_chain = RAGChain(
    rag_retriever=rag_retriever,
    model_name="llama2",  # Change to your preferred model
    num_context_docs=5,    # Number of documents to retrieve
    temperature=0.7        # Generation temperature
)


Make sure Ollama is running. You can start it with: ollama serve


### Example Queries

Test the RAGChain with sample queries. The system will:
1. Retrieve relevant documents from the vector store
2. Format them into a prompt with context
3. Generate a contextualized answer using Ollama


In [None]:
# Example 1: Simple query
query = "What is the main topic of this document?"
result = rag_chain.generate(query)
result['response']


Retrieving top 5 relevant documents...
 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Generated embedding with shape (1, 384)
Debug - Results structure: documents=1, first query has 5 results
Debug - Found 5 documents from query, 5 ids, 5 distances
Retrieved 0 documents after filtering by similarity score 0.0
Retrieved 0 context documents
Generating response using model: llama2...
Query: What is the main topic of this document?
Model: llama2
Context Documents Used: 0

Generated Response:
--------------------------------------------------------------------------------

Based on the content of the document, the main topic appears to be "Understanding the Importance of Sustainable Development in Today's World".


In [None]:
# Example 2: Query with custom parameters
query = "Explain neural networks in simple terms"
result = rag_chain.generate(
    query,
    num_context_docs=10,  # Retrieve more context
    temperature=0.5        # Lower temperature for more focused responses
)

result['response']


Retrieving top 10 relevant documents...
 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]


Generated embedding with shape (1, 384)
Debug - Results structure: documents=1, first query has 10 results
Debug - Found 10 documents from query, 10 ids, 10 distances
Retrieved 10 documents after filtering by similarity score 0.0
Retrieved 10 context documents
Generating response using model: llama2...
Query: Explain neural networks in simple terms

Generated Response:
--------------------------------------------------------------------------------
Neural networks are a type of machine learning that are inspired by the structure and function of the human brain. They are made up of layers of interconnected nodes or "neurons," which process and transmit information. Each neuron receives input from other neurons, performs a computation on that input, and then sends the output to other neurons in the next layer. This process continues until the network reaches an output layer, where the final output is produced.

The key to neural networks is their ability to learn and adapt through a proc

### Customizing the Prompt Template

You can modify the `_format_prompt` method in the RAGChain class to customize how context and queries are formatted. The default template includes:
- Clear instructions for the LLM
- Retrieved context documents with source information
- The user's question
- Instructions to answer based on context


In [None]:
# Example 3: Different model or configuration
# Uncomment and modify as needed:

# # Using a different model
# rag_chain_mistral = RAGChain(
#     rag_retriever=rag_retriever,
#     model_name="mistral",
#     num_context_docs=8,
#     temperature=0.8
# )

# # Query with the new model
# result = rag_chain_mistral.generate("What are the key concepts discussed?")
# print(result['response'])


### Troubleshooting

**If you get connection errors:**
- Make sure Ollama is running: `ollama serve`
- Check available models: `ollama list`
- Pull a model if needed: `ollama pull llama2`

**If no context is retrieved:**
- Check that documents are properly indexed in the vector store
- Try adjusting the `score_threshold` in `rag_retriever.query()`
- Verify embeddings were generated with the same model used for queries


In [None]:
vector_store_manager.add_documents(chunked_docs, embeddings)


Added 1527 documents to collection pdf_documents
Collection size: 7635
 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:00<00:00, 19.66it/s]

Generated embedding with shape (1, 384)
Retrieved document 0 documents after filtering by similarity score 0.0





[]

=== Collection Diagnostics ===
Collection name: pdf_documents
Collection count: 7635

=== Testing Query ===
 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]

Generated embedding with shape (1, 384)
Retrieved document 5 documents after filtering by similarity score 0.0

Query returned 5 results





=== Clearing and Re-indexing Collection ===
Deleted collection: pdf_documents
Initializing Chroma client for collection pdf_documents...
Collection pdf_documents initialized successfully
Existing documents in collection: 0

=== Re-adding documents ===
Added 1527 documents to collection pdf_documents
Collection size: 1527

=== Testing Query After Re-indexing ===
 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:00<00:00, 24.60it/s]

Generated embedding with shape (1, 384)
Retrieved document 0 documents after filtering by similarity score 0.0

❌ Still no results. Check the debug output above.





 Generating embeddings for 1 texts ...


Batches: 100%|██████████| 1/1 [00:00<00:00, 22.03it/s]

Generated embedding with shape (1, 384)
Retrieved document 0 documents after filtering by similarity score 0.0





[]

## Important: Re-indexing Documents

If you're still seeing gibberish results, the vector store may contain documents embedded with a different model. You need to:
1. Clear the existing vector store
2. Re-add documents with embeddings generated by the same model used for queries


In [None]:
# Option 1: Clear the existing collection and re-add documents
# Uncomment the lines below to clear and re-index

# Delete the existing collection
# vector_store_manager.client.delete_collection(name=vector_store_manager.collection_name)
# print("Collection deleted. Re-run the VectorStoreManager cell to create a new one.")

# Then re-add documents:
# vector_store_manager = VectorStoreManager()  # Creates new collection
# vector_store_manager.add_documents(chunked_docs, embeddings)
