## 1. Setup Environment

Load environment variables and verify API keys.

In [None]:
import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Verify API keys
cohere_key = os.getenv("COHERE_API_KEY")
aws_region = os.getenv("AWS_DEFAULT_REGION", "us-east-1")

print(f"‚úÖ Cohere API Key: {'Set' if cohere_key else '‚ùå NOT SET'}")
print(f"‚úÖ AWS Region: {aws_region}")

if not cohere_key:
    print("\n‚ö†Ô∏è  Please set COHERE_API_KEY in your .env file")

## 2. Run RAG Setup Script

Execute the RAG setup to populate ChromaDB.

In [None]:
# Run the RAG setup script
import sys
sys.path.insert(0, '.')

from agentic.rag_setup import main as setup_rag

# This will load KB articles, generate embeddings, and store in ChromaDB
setup_rag()

## 3. Test Semantic Search

Test the RAG search tool with various queries.

In [None]:
from agentic.tools.rag_tools import search_knowledge_base

# Test query 1: Cancellation
print("üîç Query 1: Cancellation Policy\n")
print("="*80)

results = search_knowledge_base.invoke({
    "query": "How do I cancel my reservation?",
    "top_k": 3
})

for idx, article in enumerate(results, 1):
    print(f"\n{idx}. {article['title']} (Category: {article['category']})")
    print(f"   Relevance: {article['relevance_score']:.1%}")
    print(f"   Preview: {article['content'][:200]}...")
    print("-"*80)

In [None]:
# Test query 2: Refund
print("üîç Query 2: Refund Information\n")
print("="*80)

results = search_knowledge_base.invoke({
    "query": "When will I get my refund?",
    "top_k": 3
})

for idx, article in enumerate(results, 1):
    print(f"\n{idx}. {article['title']}")
    print(f"   Relevance: {article['relevance_score']:.1%}")
    print(f"   Category: {article['category']}")
    print(f"   Content:\n   {article['content'][:300]}...")
    print("-"*80)

In [None]:
# Test query 3: Premium membership
print("üîç Query 3: Premium Subscription\n")
print("="*80)

results = search_knowledge_base.invoke({
    "query": "What are the benefits of premium membership?",
    "top_k": 3
})

for idx, article in enumerate(results, 1):
    print(f"\n{idx}. {article['title']}")
    print(f"   Relevance: {article['relevance_score']:.1%}")
    print(f"   Content:\n   {article['content'][:250]}...")
    print("-"*80)

## 4. Test Database Tools

Verify database query tools work correctly.

In [None]:
from agentic.tools.db_tools import (
    get_user_info,
    search_events,
    get_reservation_info,
)

# Test user lookup
print("üë§ Test: Get User Info\n")
user = get_user_info.invoke({"user_id": "u_00001"})
print(f"User: {user.get('full_name', 'N/A')}")
print(f"Email: {user.get('email', 'N/A')}")
print(f"Subscription: {user.get('subscription', {})}")")

In [None]:
# Test event search
print("üé≠ Test: Search Events\n")
events = search_events.invoke({
    "category": "Music",
    "limit": 3
})

for event in events:
    print(f"\n‚Ä¢ {event['title']}")
    print(f"  Venue: {event['venue_name']}")
    print(f"  Date: {event.get('event_date_formatted', 'TBD')}")
    print(f"  Available: {event['available_tickets']} tickets")

## 5. Combined RAG + DB Query Test

Simulate an agent using both RAG search and database queries.

In [None]:
# Scenario: User asks about cancellation policy
print("ü§ñ Agent Simulation: Cancellation Question\n")
print("="*80)

user_query = "I want to cancel my booking. What's the policy?"
print(f"User Query: {user_query}\n")

# Step 1: Search knowledge base
print("Step 1: Searching knowledge base...")
kb_results = search_knowledge_base.invoke({
    "query": user_query,
    "top_k": 2
})

print(f"Found {len(kb_results)} relevant articles:\n")
for article in kb_results:
    print(f"  ‚Ä¢ {article['title']} (Relevance: {article['relevance_score']:.1%})")

# Step 2: Get user's reservations (if we had user_id)
print("\nStep 2: Would check user's active reservations...")
print("(In real scenario, agent would use get_user_reservations tool)")

# Step 3: Agent would combine KB info + user data to respond
print("\nStep 3: Agent Response:")
print(f"Based on our cancellation policy, {kb_results[0]['content'][:200]}...")

## 6. ChromaDB Statistics

Inspect the vector database.

In [None]:
from agentic.tools.rag_tools import get_or_create_collection

collection = get_or_create_collection()

print("üìä ChromaDB Collection Statistics\n")
print("="*80)
print(f"Collection name: {collection.name}")
print(f"Total documents: {collection.count()}")
print(f"Metadata: {collection.metadata}")

# Sample a few documents
sample = collection.peek(limit=3)
print(f"\nSample documents:")
for i, (doc_id, metadata) in enumerate(zip(sample['ids'], sample['metadatas']), 1):
    print(f"\n  {i}. {metadata['title']}")
    print(f"     ID: {doc_id}")
    print(f"     Category: {metadata['category']}")

## 7. Summary

RAG system is ready for integration with agents!

### ‚úÖ Verified Components:
- Cohere embeddings (embed-english-v3.0)
- ChromaDB storage with HNSW indexing
- Semantic search with relevance scoring
- Database query tools

### üöÄ Next Steps:
1. Create classifier agent
2. Create resolver agent (uses RAG + DB tools)
3. Create escalation agent
4. Build LangGraph workflow

In [None]:
print("‚úÖ RAG System Test Complete!")
print("\nüìÅ Files created:")
print("  ‚Ä¢ eventhub/agentic/rag_setup.py")
print("  ‚Ä¢ eventhub/agentic/tools/rag_tools.py")
print("  ‚Ä¢ eventhub/agentic/tools/db_tools.py")
print("\nüóÑÔ∏è Vector database: data/vectordb/")
print("üöÄ Ready for agent development!")