# Vector Context Integration Test
This notebook demonstrates retrieving vector context (cue cards, adaptive prompts, and similar conversations) using the existing `EnhancedConversationDB` class.

In [None]:
# Cell 1: Setup and Imports
import sys
from pathlib import Path
import json

# Add parent directories to path for imports
sys.path.insert(0, str(Path.cwd()))

# Import the database class
from program_files.database.enhanced_conversation_db import EnhancedConversationDB

print("🧪 Vector Context Integration Test")
print("=" * 50)

In [None]:
# Cell 2: Initialize Database


# Initialize the database
try:
    db = EnhancedConversationDB()
    print("✅ Database initialized successfully")
    
    # Get basic stats
    stats = db.get_conversation_stats()
    print(f"📊 Database stats:")
    print(f"   - Total conversations: {stats['total_conversations']}")
    print(f"   - Total audio features: {stats['total_audio_features']}")
    print(f"   - Conversations with audio: {stats['conversations_with_audio']}")
    
except Exception as e:
    print(f"❌ Error initializing database: {e}")
    db = None

In [None]:
# Cell 3: Test Individual Search Methods

if db:
    print("🔍 Testing individual search methods:")
    
    # Test cue cards search
    print("\n1. Searching cue cards:")
    cue_cards = db.search_cue_cards("diabetes", top_k=3)
    print(f"   Found {len(cue_cards)} cue cards")
    
    if cue_cards:
        for i, card in enumerate(cue_cards[:2]):  # Show first 2
            print(f"   {i+1}. Q: {card['question'][:60]}...")
            print(f"      A: {card['answer'][:60]}...")
    
    # Test adaptive prompts search
    print("\n2. Searching adaptive prompts:")
    prompts = db.search_adaptive_prompts("hypertension", top_k=3)
    print(f"   Found {len(prompts)} adaptive prompts")
    
    if prompts:
        for i, prompt in enumerate(prompts[:2]):  # Show first 2
            print(f"   {i+1}. Issue: {prompt['issue']}")
            print(f"      Prompt: {prompt['prompt'][:60]}...")
    
    # Test conversations search
    print("\n3. Searching conversations:")
    conversations = db.search_conversations("medical", top_k=3)
    print(f"   Found {len(conversations)} similar conversations")
    
    if conversations:
        for i, conv in enumerate(conversations[:2]):  # Show first 2
            print(f"   {i+1}. Speaker: {conv['speaker']}")
            print(f"      Text: {conv['text'][:60]}...")
else:
    print("❌ Database not available")

In [None]:
# Cell 4: Test Combined Vector Context

if db:
    print("🎯 Testing combined vector context retrieval:")
    
    test_queries = [
        "diabetes management",
        "blood pressure control", 
        "medication adherence",
        "heart disease"
    ]
    
    for query in test_queries:
        print(f"\n🔍 Query: '{query}'")
        context = db.get_vector_context(query, top_k=2)
        
        if context:
            cue_count = len(context.get('relevant_cue_cards', []))
            prompt_count = len(context.get('relevant_prompts', []))
            conv_count = len(context.get('similar_conversations', []))
            
            print(f"   ✅ Found: {cue_count} cue cards, {prompt_count} prompts, {conv_count} conversations")
            
            # Show sample results
            if context.get('relevant_cue_cards'):
                sample = context['relevant_cue_cards'][0]
                print(f"   📝 Sample cue card: {sample['question'][:50]}...")
                
        else:
            print("   ❌ No context found")
else:
    print("❌ Database not available")

In [None]:
# Cell 5: Test Pipeline Helper Function

# Define the pipeline helper function (same as in pipeline_helpers.py)
def get_vector_context(query: str, conversation_context: str = "", top_k: int = 3, vector_db=None):
    """Get relevant vector context from database"""
    try:
        if vector_db is None:
            return None
        
        context = vector_db.get_vector_context(query, top_k=top_k)
        
        if not context:
            return None
        
        # Format for Gemma client
        return {
            "relevant_cue_cards": [{"q": c.get("question", ""), "a": c.get("answer", "")} for c in context.get("relevant_cue_cards", [])],
            "relevant_prompts": [{"issue": p.get("issue", ""), "prompt": p.get("prompt", "")} for p in context.get("relevant_prompts", [])],
            "similar_conversations": [{"text": c.get("text", ""), "speaker": c.get("speaker", "")} for c in context.get("similar_conversations", [])]
        }
    except Exception as e:
        print(f"Error getting vector context: {e}")
        return None

print("✅ Pipeline helper function defined")

In [None]:
# Cell 6: Test Pipeline Integration

if db:
    print("🔄 Testing pipeline integration:")
    
    # Test with database
    print("\n1. Testing with database:")
    query = "How should I manage my diabetes?"
    context = get_vector_context(query, vector_db=db)
    
    if context:
        print("   ✅ Vector context retrieved successfully")
        print(f"   📊 Results:")
        print(f"      - Cue cards: {len(context.get('relevant_cue_cards', []))}")
        print(f"      - Adaptive prompts: {len(context.get('relevant_prompts', []))}")
        print(f"      - Similar conversations: {len(context.get('similar_conversations', []))}")
        
        # Show formatted output (as would be sent to Gemma)
        print(f"\n   📤 Formatted for Gemma:")
        print(json.dumps(context, indent=2)[:300] + "...")
    else:
        print("   ❌ No vector context found")
    
    # Test without database
    print("\n2. Testing without database:")
    context_no_db = get_vector_context(query, vector_db=None)
    if context_no_db is None:
        print("   ✅ Correctly returns None when no database provided")
    else:
        print("   ❌ Should return None when no database provided")
        
else:
    print("❌ Database not available")

In [None]:
# Cell 7: Performance and Statistics

if db:
    print("📈 Performance and Statistics:")
    
    import time
    
    # Test performance
    query = "diabetes management"
    start_time = time.time()
    context = db.get_vector_context(query, top_k=5)
    end_time = time.time()
    
    print(f"\n⏱️  Query performance:")
    print(f"   Query: '{query}'")
    print(f"   Time: {(end_time - start_time)*1000:.2f}ms")
    
    if context:
        total_results = (len(context.get('relevant_cue_cards', [])) + 
                        len(context.get('relevant_prompts', [])) + 
                        len(context.get('similar_conversations', [])))
        print(f"   Results: {total_results} items found")
    
    # Get detailed database statistics
    print(f"\n📊 Detailed statistics:")
    try:
        # Get all data to analyze content types
        all_data = db.conversations.get()
        
        content_types = {}
        for metadata in all_data.get('metadatas', []):
            content_type = metadata.get('content_type', 'conversation')
            content_types[content_type] = content_types.get(content_type, 0) + 1
        
        print(f"   Content type breakdown:")
        for content_type, count in content_types.items():
            print(f"      - {content_type}: {count}")
            
    except Exception as e:
        print(f"   Error getting detailed stats: {e}")
        
else:
    print("❌ Database not available")

In [None]:
# Cell 8: Summary and Conclusion

print("🎉 Vector Context Integration Test Summary")
print("=" * 50)

if db:
    print("✅ All tests completed successfully!")
    print("\n📋 What was tested:")
    print("   ✅ Database initialization")
    print("   ✅ Individual search methods (cue cards, prompts, conversations)")
    print("   ✅ Combined vector context retrieval")
    print("   ✅ Pipeline helper function integration")
    print("   ✅ Performance measurement")
    print("   ✅ Detailed statistics")
    
    print("\n🚀 The vector context integration is ready to use!")
    print("   - Set use_vector_context = True in config.py")
    print("   - The system will automatically enhance Gemma responses")
    print("   - No additional code changes needed")
    
else:
    print("❌ Database initialization failed")
    print("   Please check your database setup and try again")

print("\n" + "=" * 50)