# BookSouls Indexer Test

Test the dual vector indexer and character profiles functionality.

In [1]:
import os
import sys
import json

# Add parent directory to path for imports
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath('.')))
sys.path.insert(0, parent_dir)

from test_indexers import setup_indexer, load_real_data, index_data
from config import load_test_config

In [2]:
# Load configuration
test_config = load_test_config()
print(f"Using OpenAI: {test_config.use_openai}")
print(f"Base persist dir: {test_config.base_persist_dir}")
print(f"Skip indexing if exists: {test_config.skip_indexing_if_exists}")

Using OpenAI: True
Base persist dir: ../vector_stores
Skip indexing if exists: True


In [3]:
# Setup indexer
indexer = setup_indexer(test_config)
print("✅ Indexer setup complete")

🔍 Environment API key found: Yes
🔍 Config API key found: Yes
🔍 API key parameter: No
🔧 Using OpenAI embeddings
✅ Indexer setup complete


In [4]:
# Check current stats
stats = indexer.get_stats()
print(f"Current stats:")
print(f"  Total documents: {stats['total_documents']}")
print(f"  Narrative docs: {stats['narrative_store']['document_count']}")
print(f"  Dialogue docs: {stats['dialogue_store']['document_count']}")

Current stats:
  Total documents: 27
  Narrative docs: 27
  Dialogue docs: 0


In [5]:
# Check stats after indexing
indexer.reset_stores()
stats = indexer.get_stats()
print(f"After indexing:")
print(f"  Total documents: {stats['total_documents']}")
print(f"  Narrative docs: {stats['narrative_store']['document_count']}")
print(f"  Dialogue docs: {stats['dialogue_store']['document_count']}")



Resetting both vector stores...
Vector stores reset complete
After indexing:
  Total documents: 0
  Narrative docs: 0
  Dialogue docs: 0


In [6]:
# Load and index data (force re-index by temporarily changing config)
original_skip = test_config.skip_indexing_if_exists
test_config.skip_indexing_if_exists = False  # Force re-indexing

indexer = index_data(indexer, test_config)

📖 Loading: section_index_20250724_101634.json
💬 Loading: dialogue_index_1.json
✅ Loaded 27 sections and 87 dialogues

📊 Indexing data...
Indexing narrative chunks for Author Agent...
Narrative indexing complete: 27 chunks in 2.89s
Indexing dialogue chunks for Character Agents...
Dialogue indexing complete: 115 chunks in 3.72s
✅ Indexed 27 narrative chunks
✅ Indexed 115 dialogue chunks


In [7]:
# Test character profiles query
print("Testing character profiles query...")
results = indexer.query_character_profiles("bilbo", n_results=1)
print(f"Results structure: {list(results.keys())}")


Testing character profiles query...
Results structure: ['query', 'character_filter', 'results', 'store_type']


In [8]:
query_results = results['results']


print(query_results['documents'][0][0])

{
  "name": "Bilbo",
  "chapter_number": 1,
  "personality_traits": [
    {
      "trait": "curious",
      "manifestation": "yearns for adventure and new experiences",
      "contradiction": "but feels a strong attachment to home and comfort"
    },
    {
      "trait": "humorous",
      "manifestation": "uses wit to engage guests and lighten the mood",
      "contradiction": "but struggles with deeper feelings of anxiety and uncertainty"
    },
    {
      "trait": "defensive",
      "manifestation": "stands firm about his possessions and choices",
      "contradiction": "but reveals vulnerability when discussing his age and desires"
    }
  ],
  "motivations": [],
  "speech_style": {
    "vocabulary": "mixed",
    "sentence_style": "elaborate with whimsical flourishes, interspersed with short, punchy statements",
    "verbal_tics": [
      "you know",
      "I mean",
      "of course"
    ],
    "unique_phrases": [
      "eleventy-one",
      "my dear Bagginses and Bof\ufb01ns"
    

In [None]:
# Check what's actually in the dialogue store
dialogue_results = indexer.query_dialogue("*", n_results=10)
if 'results' in dialogue_results:
    query_results = dialogue_results['results']
    ids = query_results.get('ids', [[]])[0]
    print(f"Total dialogue docs: {len(ids)}")
    
    # Check for character profile types
    profile_docs = []
    for i, doc_id in enumerate(ids):
        if 'metadatas' in query_results and i < len(query_results['metadatas'][0]):
            metadata = query_results['metadatas'][0][i]
            if metadata.get('type') == 'character_profile':
                profile_docs.append(doc_id)
    
    print(f"Character profile docs found: {len(profile_docs)}")
    for doc_id in profile_docs[:5]:
        print(f"  - {doc_id}")

In [None]:
# Debug: Check raw dialogue data for character profiles
section_index, dialogue_index = load_real_data(test_config)

print(f"Dialogue index character_profiles keys: {list(dialogue_index.character_profiles.keys()) if dialogue_index.character_profiles else 'None'}")
print(f"Total character profiles: {len(dialogue_index.character_profiles) if dialogue_index.character_profiles else 0}")

# Show first few character profiles
if dialogue_index.character_profiles:
    for char, profiles in list(dialogue_index.character_profiles.items())[:3]:
        print(f"\nCharacter: {char}")
        print(f"  Profiles: {len(profiles)}")
        if profiles:
            profile = profiles[0]
            print(f"  First profile: {profile.personality_traits[:100]}...")

In [None]:
# Test other query methods
print("Testing narrative query:")
narrative_results = indexer.query_narrative("adventure", n_results=3)
if 'results' in narrative_results:
    ids = narrative_results['results'].get('ids', [[]])[0]
    print(f"Found {len(ids)} narrative results")

print("\nTesting dialogue query:")
dialogue_results = indexer.query_dialogue("hello", n_results=3)
if 'results' in dialogue_results:
    ids = dialogue_results['results'].get('ids', [[]])[0]
    print(f"Found {len(ids)} dialogue results")