# Ask RAG Questions

This notebook demonstrates how to ask questions to the RAG system and get answers based on your ingested documents.

## Setup

In [None]:
import os
import sys
import json
import requests
import numpy as np
from dotenv import load_dotenv
from typing import List, Dict
import pandas as pd

# Add parent directory to path
sys.path.append('..')
from pgvector_rag import PGVectorRAG

# Load environment variables
load_dotenv('../.env')

# Initialize connections
conn_params = {
    "host": os.getenv('DB_HOST', 'postgres-pgvector.pgvector.svc.cluster.local'),
    "port": int(os.getenv('DB_PORT', '5432')),
    "database": os.getenv('DB_NAME', 'vectordb'),
    "user": os.getenv('DB_USER', 'vectoruser'),
    "password": os.getenv('DB_PASSWORD', 'vectorpass')
}

# API configurations
NOMIC_URL = os.getenv('NOMIC_EMBED_URL')
if NOMIC_URL and not NOMIC_URL.endswith('/v1'):
    NOMIC_URL = f"{NOMIC_URL}/v1"
NOMIC_API_KEY = os.getenv('NOMIC_EMBED_API_KEY')
NOMIC_MODEL = os.getenv('NOMIC_EMBED_MODEL_NAME')

LLAMA_URL = os.getenv('LLAMA_3-2_URL')
if LLAMA_URL and not LLAMA_URL.endswith('/v1'):
    LLAMA_URL = f"{LLAMA_URL}/v1"
LLAMA_API_KEY = os.getenv('LLAMA_3-2_API_KEY')
LLAMA_MODEL = os.getenv('LLAMA_3-2_MODEL_NAME')

print("Configuration loaded successfully")

In [None]:
# Initialize RAG client
rag = PGVectorRAG(conn_params)
print("Connected to PGVector database")

# Set project ID
PROJECT_ID = os.getenv('PROJECT_ID', 'demo_project')

In [None]:
# Helper functions
def get_embedding(text: str) -> np.ndarray:
    """Get embedding for a single text"""
    response = requests.post(
        f"{NOMIC_URL}/embeddings",
        headers={
            'Authorization': f"Bearer {NOMIC_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': NOMIC_MODEL,
            'input': text
        }
    )
    
    if response.status_code == 200:
        data = response.json()
        return np.array(data['data'][0]['embedding'])
    else:
        raise Exception(f"Error getting embedding: {response.status_code}")

def generate_answer(prompt: str, max_tokens: int = 200) -> str:
    """Generate answer using Llama"""
    response = requests.post(
        f"{LLAMA_URL}/completions",
        headers={
            'Authorization': f"Bearer {LLAMA_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': LLAMA_MODEL,
            'prompt': prompt,
            'max_tokens': max_tokens,
            'temperature': 0.3
        }
    )
    
    if response.status_code == 200:
        return response.json()['choices'][0]['text'].strip()
    else:
        raise Exception(f"Error generating answer: {response.status_code}")

## Check Available Data

In [None]:
# Get project statistics
stats = rag.get_project_stats(PROJECT_ID)

print(f"Project: {PROJECT_ID}")
if stats:
    print(f"Total documents: {stats['total_documents']}")
    print(f"Total chunks: {stats['total_chunks']}")
    print(f"Topics: {stats['topics']}")
    print(f"Average chunk length: {stats['avg_chunk_length']:.0f} characters")
    print(f"Storage size: {stats['storage_size_estimate']}")
else:
    print("No project stats available")

## RAG Question-Answering Function

In [None]:
def ask_rag(question: str, num_sources: int = 5, show_sources: bool = True) -> Dict:
    """
    Ask a question to the RAG system
    
    Args:
        question: The question to ask
        num_sources: Number of source documents to retrieve
        show_sources: Whether to display source documents
    
    Returns:
        Dict with answer and sources
    """
    print(f"\n{'='*60}")
    print(f"Question: {question}")
    print(f"{'='*60}\n")
    
    # Get question embedding
    print("1. Generating question embedding...")
    question_embedding = get_embedding(question)
    
    # Search for relevant documents
    print(f"2. Searching for top {num_sources} relevant documents...")
    results = rag.dense_search(
        project_id=PROJECT_ID,
        query_embedding=question_embedding,
        limit=num_sources
    )
    
    if not results:
        return {
            'answer': "I couldn't find any relevant information to answer your question.",
            'sources': []
        }
    
    # Prepare context from search results
    context_parts = []
    sources = []
    
    for i, result in enumerate(results):
        context_parts.append(f"[{i+1}] {result['chunk_text']}")
        sources.append({
            'document': result['document_name'],
            'page': result.get('page_number', 'N/A'),
            'text': result['chunk_text'][:200] + '...' if len(result['chunk_text']) > 200 else result['chunk_text'],
            'distance': result['distance']
        })
    
    context = "\n\n".join(context_parts)
    
    # Generate answer
    print("3. Generating answer...")
    prompt = f"""Based on the following context, answer the question. If the answer is not in the context, say so.

Context:
{context}

Question: {question}

Answer:"""
    
    answer = generate_answer(prompt, max_tokens=300)
    
    # Display results
    print("\n" + "-"*60)
    print("ANSWER:")
    print("-"*60)
    print(answer)
    
    if show_sources:
        print("\n" + "-"*60)
        print("SOURCES:")
        print("-"*60)
        for i, source in enumerate(sources):
            print(f"\n[{i+1}] Document: {source['document']}")
            print(f"    Page: {source['page']}")
            print(f"    Distance: {source['distance']:.4f}")
            print(f"    Preview: {source['text']}")
    
    return {
        'answer': answer,
        'sources': sources
    }

## Ask Questions

In [None]:
# Example questions - modify based on your ingested content
questions = [
    "What is PGVector and what are its main features?",
    "How does vector similarity search work?",
    "What are the benefits of using RAG systems?",
    "How do I create an index in PGVector?",
    "What distance metrics does PGVector support?"
]

In [None]:
# Ask the first question
result = ask_rag(questions[0])

In [None]:
# Interactive question asking
# Uncomment to use in Jupyter
# while True:
#     question = input("\nEnter your question (or 'quit' to exit): ")
#     if question.lower() == 'quit':
#         break
#     result = ask_rag(question)

## Advanced RAG Features

In [None]:
def ask_rag_with_filters(
    question: str, 
    topic: str | None = None, 
    metadata_filter: Dict | None = None,
    num_sources: int = 5
) -> Dict:
    """
    Ask RAG with topic and metadata filters
    """
    print(f"\nQuestion: {question}")
    if topic:
        print(f"Topic filter: {topic}")
    if metadata_filter:
        print(f"Metadata filter: {metadata_filter}")
    
    # Get embedding
    question_embedding = get_embedding(question)
    
    # Search with filters
    results = rag.dense_search(
        project_id=PROJECT_ID,
        query_embedding=question_embedding,
        topic=topic,
        metadata_filter=metadata_filter,
        limit=num_sources
    )
    
    if not results:
        return {'answer': 'No results found with the specified filters.', 'sources': []}
    
    # Generate answer (same as before)
    context = "\n\n".join([f"[{i+1}] {r['chunk_text']}" for i, r in enumerate(results)])
    
    prompt = f"""Context:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
    answer = generate_answer(prompt)
    
    print(f"\nAnswer: {answer}")
    print(f"\nFound {len(results)} relevant documents")
    
    return {'answer': answer, 'num_sources': len(results)}

In [None]:
# Example with filters (adjust based on your data)
# ask_rag_with_filters(
#     "What are the key features?",
#     topic="databases",
#     metadata_filter={"file_type": "pdf"}
# )

## Multi-turn Conversation

In [None]:
class RAGConversation:
    def __init__(self, rag_client, project_id):
        self.rag = rag_client
        self.project_id = project_id
        self.history = []
    
    def ask(self, question: str) -> str:
        # Get embedding and search
        embedding = get_embedding(question)
        results = self.rag.dense_search(
            project_id=self.project_id,
            query_embedding=embedding,
            limit=3
        )
        
        # Build context including history
        context = "\n".join([r['chunk_text'] for r in results])
        
        history_text = ""
        if self.history:
            history_text = "Previous conversation:\n"
            for h in self.history[-3:]:  # Last 3 exchanges
                history_text += f"Q: {h['question']}\nA: {h['answer'][:100]}...\n\n"
        
        prompt = f"""{history_text}Context:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
        answer = generate_answer(prompt)
        
        # Save to history
        self.history.append({'question': question, 'answer': answer})
        
        return answer
    
    def clear_history(self):
        self.history = []

In [None]:
# Example conversation
conversation = RAGConversation(rag, PROJECT_ID)

print("Starting a conversation (memory-enabled):\n")

# First question
answer1 = conversation.ask("What is PGVector?")
print(f"Q: What is PGVector?")
print(f"A: {answer1}\n")

# Follow-up question
answer2 = conversation.ask("What are its main advantages?")
print(f"Q: What are its main advantages?")
print(f"A: {answer2}")

## Cleanup

In [None]:
# Close database connection
rag.close()
print("Closed database connection")