# PGVector RAG Demo for OpenShift AI

This notebook demonstrates how to use the PGVector RAG system from a Jupyter notebook in OpenShift AI.

## 1. Setup and Connection

In [None]:
# Install required packages if not already installed
%pip install psycopg2-binary pgvector numpy sentence-transformers

In [None]:
import sys
sys.path.append('/path/to/vector-utils')  # Adjust path as needed

from pgvector_rag import PGVectorRAG
import numpy as np
import uuid
try:
    from sentence_transformers import SentenceTransformer  # type: ignore
except ImportError:
    print("sentence-transformers not installed. Run: pip install sentence-transformers")
    SentenceTransformer = None
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Connection parameters
conn_params = {
    "host": "postgres-pgvector.pgvector.svc.cluster.local",
    "port": 5432,
    "database": "vectordb",
    "user": "vectoruser",
    "password": "vectorpass"
}

# Initialize embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
logger.info(f"Model dimension: {model.get_sentence_embedding_dimension()}")

## 2. Create Project and Add Documents

In [None]:
# Initialize RAG client
rag = PGVectorRAG(conn_params)

# Create a project
project_id = "notebook_demo"
rag.create_project(
    project_id=project_id,
    name="Notebook Demo Project",
    description="Demo from Jupyter notebook"
)

In [None]:
# Sample documents
documents = [
    {
        "title": "Introduction to RAG",
        "chunks": [
            "Retrieval-Augmented Generation (RAG) combines the benefits of retrieval-based and generative AI models.",
            "RAG systems first retrieve relevant documents, then use them to generate contextually appropriate responses.",
            "This approach reduces hallucinations and provides more accurate, grounded responses."
        ]
    },
    {
        "title": "PGVector Overview",
        "chunks": [
            "PGVector is an open-source extension for PostgreSQL that enables vector similarity search.",
            "It supports multiple distance metrics including L2, inner product, and cosine distance.",
            "With version 0.8.0, PGVector now supports sparse vectors for hybrid search capabilities."
        ]
    }
]

In [None]:
# Add documents to the system
for doc in documents:
    doc_id = str(uuid.uuid4())
    
    for idx, chunk in enumerate(doc["chunks"]):
        # Generate embedding
        embedding = model.encode(chunk)
        
        # Add to database
        chunk_id = rag.add_document_chunk(
            project_id=project_id,
            document_id=doc_id,
            document_name=doc["title"],
            chunk_text=chunk,
            chunk_index=idx,
            dense_embedding=embedding,
            metadata={"source": "notebook_demo"},
            topic="ai_ml"
        )
        
        logger.info(f"Added chunk from '{doc['title']}': {chunk[:50]}...")

## 3. Search Examples

In [None]:
# Function to display search results nicely
def display_results(results, query):
    print(f"\nQuery: '{query}'")
    print(f"Found {len(results)} results:\n")
    
    for i, result in enumerate(results):
        print(f"{i+1}. Document: {result['document_name']}")
        print(f"   Text: {result['chunk_text']}")
        print(f"   Distance: {result.get('distance', 'N/A')}")
        print()

In [None]:
# Search example 1: Basic semantic search
query = "How does RAG reduce errors in AI responses?"
query_embedding = model.encode(query)

results = rag.dense_search(
    project_id=project_id,
    query_embedding=query_embedding,
    limit=3
)

display_results(results, query)

In [None]:
# Search example 2: Filtered search
query = "vector database features"
query_embedding = model.encode(query)

results = rag.dense_search(
    project_id=project_id,
    query_embedding=query_embedding,
    topic="ai_ml",  # Filter by topic
    metadata_filter={"source": "notebook_demo"},  # Filter by metadata
    limit=3
)

display_results(results, query)

## 4. Project Statistics

In [None]:
# Get project statistics
stats = rag.get_project_stats(project_id)

print("Project Statistics:")
if stats:
    print(f"Total chunks: {stats['total_chunks']}")
    print(f"Total documents: {stats['total_documents']}")
    print(f"Topics: {stats['topics']}")
    print(f"Average chunk length: {stats['avg_chunk_length']:.1f} characters")
    print(f"Estimated storage size: {stats['storage_size_estimate']}")
else:
    print("No project stats available")

## 5. Advanced: Batch Processing

In [None]:
# Example of batch processing for better performance
batch_chunks = []

# Simulate processing a larger document
large_doc_id = str(uuid.uuid4())
for i in range(10):
    chunk_text = f"This is chunk {i} of a larger document about machine learning and AI."
    
    batch_chunks.append({
        "document_id": large_doc_id,
        "document_name": "Large ML Document",
        "chunk_text": chunk_text,
        "chunk_index": i,
        "dense_embedding": model.encode(chunk_text),
        "metadata": {"batch": True, "chunk_num": i}
    })

# Add all chunks in one transaction
chunk_ids = rag.add_document_chunks_batch(project_id, batch_chunks)
print(f"Added {len(chunk_ids)} chunks in batch")

## 6. Cleanup

In [None]:
# Close connection when done
rag.close()
print("Connection closed")

## Next Steps

1. **Implement Sparse Embeddings**: Add SPLADE or BM25 for hybrid search
2. **Document Processing**: Add document parsing (PDF, DOCX, etc.)
3. **Advanced Chunking**: Implement sliding window or semantic chunking
4. **Production Pipeline**: Create automated ingestion pipelines
5. **Monitoring**: Add performance metrics and monitoring