From 243f7ed271626197659135b8cebb6d9c47f97cbd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 28 Apr 2026 12:58:02 -0700 Subject: [PATCH 1/2] Update AGENTS.md: worktree/branch rules, meta-instruction --- AGENTS.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6e391b01..8fb7e8d9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -3,16 +3,27 @@ **NEVER use TEST_MODEL_NAME or "test" embedding model outside of test files** Never run git commands that make any changes. (`git status` and `git diff` are fine) +Exceptions: `git push`, `git worktree`, `git branch` (for tracking setup), as instructed below. -**NEVER COMMIT CODE. Do not run `git commit` or any other git commands -that make changes to the repository. Not even `git add`** +**NEVER COMMIT CODE.** Do not run `git commit` or any other git commands +that make changes to the repository. Exception: Worktrees/Branches below. +`git add` is fine. When moving, copying or deleting files, use the git commands: `git mv`, `git cp`, `git rm` -When I ask to update AGENTS.md (even if maybe) extract a general rule from what I said -before and update AGENTS.md (unless it's already in there -- maybe reformulate since -it apparently didn't work). Also, when it looks like I state a general rule, add it to -AGENTS.md. In all cases show what you added to AGENTS.md. +## Worktrees and Branches + +- Each session uses its own worktree with a feature branch +- Create worktrees with: `git worktree add ../- -b ` +- Push the branch to the `me` remote: `git push me ` +- Set upstream to `me/`: `git branch --set-upstream-to me/` +- **Never** upstream to `me/main` — that must stay identical to `origin/main` +- The worktree directory name should be `-` (sibling of the main checkout) + +**Whenever the user tells you how to do something, states a preference, or corrects you, +extract a general rule and add it to AGENTS.md** (unless it's already covered -- maybe +reformulate since it apparently didn't work). This applies even without being asked. +In all cases show what you added to AGENTS.md. - Don't use '!' on the command line, it's some bash magic (even inside single quotes) - When running 'make' commands, do not use the venv (the Makefile uses 'uv run') From 21be83815c65a22cf3f53f8f6cd81b348065ab73 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 28 Apr 2026 16:34:38 -0700 Subject: [PATCH 2/2] Turn async accessor methods to sync properties for provider attributes --- AGENTS.md | 9 +++ src/typeagent/knowpro/conversation_base.py | 8 +-- src/typeagent/knowpro/factory.py | 8 +-- src/typeagent/knowpro/interfaces_storage.py | 26 ++++--- src/typeagent/knowpro/secindex.py | 36 +++------- src/typeagent/podcasts/podcast.py | 4 +- src/typeagent/podcasts/podcast_ingest.py | 2 +- src/typeagent/storage/memory/messageindex.py | 2 +- src/typeagent/storage/memory/provider.py | 26 ++++--- src/typeagent/storage/sqlite/provider.py | 51 +++----------- src/typeagent/transcripts/transcript.py | 4 +- tests/conftest.py | 2 +- tests/test_message_text_index_population.py | 6 +- tests/test_property_index_population.py | 4 +- tests/test_related_terms_index_population.py | 12 ++-- tests/test_reltermsindex.py | 4 +- tests/test_secindex.py | 10 +-- tests/test_secindex_storage_integration.py | 14 ++-- tests/test_semrefindex.py | 14 ++-- tests/test_sqlitestore.py | 12 ++-- tests/test_storage_providers_unified.py | 72 ++++++++++---------- tools/ingest_email.py | 2 +- tools/ingest_vtt.py | 4 +- tools/load_json.py | 2 +- tools/query.py | 2 +- 25 files changed, 153 insertions(+), 183 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8fb7e8d9..981fc974 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,6 +19,15 @@ When moving, copying or deleting files, use the git commands: `git mv`, `git cp` - Set upstream to `me/`: `git branch --set-upstream-to me/` - **Never** upstream to `me/main` — that must stay identical to `origin/main` - The worktree directory name should be `-` (sibling of the main checkout) +- **Work in the worktree directory**, not the main checkout — edit files there, run tests there +- VS Code may show buffers from the main checkout; ignore those when working in a worktree. + When in doubt, verify edits landed on disk with `cat` or `grep` in the terminal. + +## Debugging discipline + +- When a bug seems impossible, suspect stale files or wrong working directory — not exotic causes. +- If you're tempted to blame installed package versions, `__pycache__`, or similar, + **stop and ask the user** before investigating further. You're probably on the wrong track. **Whenever the user tells you how to do something, states a preference, or corrects you, extract a general rule and add it to AGENTS.md** (unless it's already covered -- maybe diff --git a/src/typeagent/knowpro/conversation_base.py b/src/typeagent/knowpro/conversation_base.py index 131b0ceb..f69ab7c7 100644 --- a/src/typeagent/knowpro/conversation_base.py +++ b/src/typeagent/knowpro/conversation_base.py @@ -96,10 +96,10 @@ async def create( tags if tags is not None else [], ) instance.storage_provider = storage_provider - instance.messages = await storage_provider.get_message_collection() - instance.semantic_refs = await storage_provider.get_semantic_ref_collection() - instance.semantic_ref_index = await storage_provider.get_semantic_ref_index() - instance.secondary_indexes = await secindex.ConversationSecondaryIndexes.create( + instance.messages = storage_provider.messages + instance.semantic_refs = storage_provider.semantic_refs + instance.semantic_ref_index = storage_provider.semantic_ref_index + instance.secondary_indexes = secindex.ConversationSecondaryIndexes( storage_provider, settings.related_term_index_settings ) return instance diff --git a/src/typeagent/knowpro/factory.py b/src/typeagent/knowpro/factory.py index bdebf8f2..5c94bce1 100644 --- a/src/typeagent/knowpro/factory.py +++ b/src/typeagent/knowpro/factory.py @@ -60,10 +60,10 @@ async def create_conversation[TMessage: IMessage]( tags=tags if tags is not None else [], ) conversation.storage_provider = storage_provider - conversation.messages = await storage_provider.get_message_collection() - conversation.semantic_refs = await storage_provider.get_semantic_ref_collection() - conversation.semantic_ref_index = await storage_provider.get_semantic_ref_index() - conversation.secondary_indexes = await secindex.ConversationSecondaryIndexes.create( + conversation.messages = storage_provider.messages + conversation.semantic_refs = storage_provider.semantic_refs + conversation.semantic_ref_index = storage_provider.semantic_ref_index + conversation.secondary_indexes = secindex.ConversationSecondaryIndexes( storage_provider, settings.related_term_index_settings ) return conversation diff --git a/src/typeagent/knowpro/interfaces_storage.py b/src/typeagent/knowpro/interfaces_storage.py index c0450f29..b8df832b 100644 --- a/src/typeagent/knowpro/interfaces_storage.py +++ b/src/typeagent/knowpro/interfaces_storage.py @@ -126,23 +126,31 @@ async def get_metadata_multiple( class IStorageProvider[TMessage: IMessage](Protocol): """API spec for storage providers -- maybe in-memory or persistent.""" - async def get_message_collection(self) -> IMessageCollection[TMessage]: ... + @property + def messages(self) -> IMessageCollection[TMessage]: ... - async def get_semantic_ref_collection(self) -> ISemanticRefCollection: ... + @property + def semantic_refs(self) -> ISemanticRefCollection: ... - # Index getters - ALL 6 index types for this conversation + # Index properties - ALL 6 index types for this conversation - async def get_semantic_ref_index(self) -> ITermToSemanticRefIndex: ... + @property + def semantic_ref_index(self) -> ITermToSemanticRefIndex: ... - async def get_property_index(self) -> IPropertyToSemanticRefIndex: ... + @property + def property_index(self) -> IPropertyToSemanticRefIndex: ... - async def get_timestamp_index(self) -> ITimestampToTextRangeIndex: ... + @property + def timestamp_index(self) -> ITimestampToTextRangeIndex: ... - async def get_message_text_index(self) -> IMessageTextIndex[TMessage]: ... + @property + def message_text_index(self) -> IMessageTextIndex[TMessage]: ... - async def get_related_terms_index(self) -> ITermToRelatedTermsIndex: ... + @property + def related_terms_index(self) -> ITermToRelatedTermsIndex: ... - async def get_conversation_threads(self) -> IConversationThreads: ... + @property + def conversation_threads(self) -> IConversationThreads: ... # Metadata management diff --git a/src/typeagent/knowpro/secindex.py b/src/typeagent/knowpro/secindex.py index baee18b9..f101f9cb 100644 --- a/src/typeagent/knowpro/secindex.py +++ b/src/typeagent/knowpro/secindex.py @@ -22,32 +22,12 @@ def __init__( settings: RelatedTermIndexSettings, ): self._storage_provider = storage_provider - # Initialize all indexes through storage provider immediately - self.property_to_semantic_ref_index = None - self.timestamp_index = None - self.term_to_related_terms_index = None - self.threads = None - self.message_index = None - - @classmethod - async def create( - cls, - storage_provider: IStorageProvider, - settings: RelatedTermIndexSettings, - ) -> "ConversationSecondaryIndexes": - """Create and initialize a ConversationSecondaryIndexes with all indexes.""" - self = cls(storage_provider, settings) # Initialize all indexes from storage provider - self.property_to_semantic_ref_index = ( - await storage_provider.get_property_index() - ) - self.timestamp_index = await storage_provider.get_timestamp_index() - self.term_to_related_terms_index = ( - await storage_provider.get_related_terms_index() - ) - self.threads = await storage_provider.get_conversation_threads() - self.message_index = await storage_provider.get_message_text_index() - return self + self.property_to_semantic_ref_index = storage_provider.property_index + self.timestamp_index = storage_provider.timestamp_index + self.term_to_related_terms_index = storage_provider.related_terms_index + self.threads = storage_provider.conversation_threads + self.message_index = storage_provider.message_text_index async def build_secondary_indexes[ @@ -59,7 +39,7 @@ async def build_secondary_indexes[ ) -> None: if conversation.secondary_indexes is None: storage_provider = await conversation_settings.get_storage_provider() - conversation.secondary_indexes = await ConversationSecondaryIndexes.create( + conversation.secondary_indexes = ConversationSecondaryIndexes( storage_provider, conversation_settings.related_term_index_settings ) else: @@ -82,9 +62,9 @@ async def build_transient_secondary_indexes[ settings: ConversationSettings, ) -> None: if conversation.secondary_indexes is None: - conversation.secondary_indexes = await ConversationSecondaryIndexes.create( + conversation.secondary_indexes = ConversationSecondaryIndexes( await settings.get_storage_provider(), - (settings.related_term_index_settings), + settings.related_term_index_settings, ) await build_property_index(conversation) await build_timestamp_index(conversation) diff --git a/src/typeagent/podcasts/podcast.py b/src/typeagent/podcasts/podcast.py index 5376d20e..8038ea60 100644 --- a/src/typeagent/podcasts/podcast.py +++ b/src/typeagent/podcasts/podcast.py @@ -187,8 +187,8 @@ async def read_from_file( data = Podcast._read_conversation_data_from_file(filename_prefix) provider = await settings.get_storage_provider() - msgs = await provider.get_message_collection() - semrefs = await provider.get_semantic_ref_collection() + msgs = provider.messages + semrefs = provider.semantic_refs if await msgs.size() or await semrefs.size(): raise RuntimeError( f"Database {dbname!r} already has messages or semantic refs." diff --git a/src/typeagent/podcasts/podcast_ingest.py b/src/typeagent/podcasts/podcast_ingest.py index d2de7c82..2ff5c764 100644 --- a/src/typeagent/podcasts/podcast_ingest.py +++ b/src/typeagent/podcasts/podcast_ingest.py @@ -114,7 +114,7 @@ async def ingest_podcast( PodcastMessage, ) settings.storage_provider = provider - msg_coll = await provider.get_message_collection() + msg_coll = provider.messages if (msg_size := await msg_coll.size()) > start_message: raise RuntimeError( f"{dbname!r} has {msg_size} messages; start_message ({start_message}) should be at least that." diff --git a/src/typeagent/storage/memory/messageindex.py b/src/typeagent/storage/memory/messageindex.py index 8d742794..efcc4ddf 100644 --- a/src/typeagent/storage/memory/messageindex.py +++ b/src/typeagent/storage/memory/messageindex.py @@ -30,7 +30,7 @@ async def build_message_index[ if csi is None: return if csi.message_index is None: - csi.message_index = await storage_provider.get_message_text_index() + csi.message_index = storage_provider.message_text_index messages = conversation.messages # Convert collection to list for add_messages messages_list = await messages.get_slice(0, await messages.size()) diff --git a/src/typeagent/storage/memory/provider.py b/src/typeagent/storage/memory/provider.py index 603fbd24..42c9c3fc 100644 --- a/src/typeagent/storage/memory/provider.py +++ b/src/typeagent/storage/memory/provider.py @@ -77,30 +77,36 @@ async def __aexit__( """Exit transaction context. No-op for in-memory storage.""" pass - async def get_semantic_ref_index(self) -> ITermToSemanticRefIndex: + @property + def semantic_ref_index(self) -> ITermToSemanticRefIndex: return self._conversation_index - async def get_property_index(self) -> IPropertyToSemanticRefIndex: + @property + def property_index(self) -> IPropertyToSemanticRefIndex: return self._property_index - async def get_timestamp_index(self) -> ITimestampToTextRangeIndex: + @property + def timestamp_index(self) -> ITimestampToTextRangeIndex: return self._timestamp_index - async def get_message_text_index(self) -> IMessageTextIndex[TMessage]: + @property + def message_text_index(self) -> IMessageTextIndex[TMessage]: return self._message_text_index - async def get_related_terms_index(self) -> ITermToRelatedTermsIndex: + @property + def related_terms_index(self) -> ITermToRelatedTermsIndex: return self._related_terms_index - async def get_conversation_threads(self) -> IConversationThreads: + @property + def conversation_threads(self) -> IConversationThreads: return self._conversation_threads - async def get_message_collection( - self, message_type: type[TMessage] | None = None - ) -> MemoryMessageCollection[TMessage]: + @property + def messages(self) -> MemoryMessageCollection[TMessage]: return self._message_collection - async def get_semantic_ref_collection(self) -> MemorySemanticRefCollection: + @property + def semantic_refs(self) -> MemorySemanticRefCollection: return self._semantic_ref_collection async def close(self) -> None: diff --git a/src/typeagent/storage/sqlite/provider.py b/src/typeagent/storage/sqlite/provider.py index 2978c8ed..a69024b8 100644 --- a/src/typeagent/storage/sqlite/provider.py +++ b/src/typeagent/storage/sqlite/provider.py @@ -12,6 +12,7 @@ from ...knowpro.convsettings import MessageTextIndexSettings, RelatedTermIndexSettings from ...knowpro.interfaces import ConversationMetadata, STATUS_INGESTED from ...knowpro.interfaces_storage import ChunkFailure +from ..memory.convthreads import ConversationThreads from .collections import SqliteMessageCollection, SqliteSemanticRefCollection from .messageindex import SqliteMessageTextIndex from .propindex import SqlitePropertyIndex @@ -100,6 +101,11 @@ def __init__( self.db, self.related_term_index_settings.embedding_index_settings ) + # Initialize conversation threads + self._conversation_threads = ConversationThreads( + self.message_text_index_settings.embedding_index_settings + ) + # Connect message collection to message text index for automatic indexing self._message_collection.set_message_text_index(self._message_text_index) @@ -325,7 +331,7 @@ def semantic_refs(self) -> SqliteSemanticRefCollection: return self._semantic_ref_collection @property - def term_to_semantic_ref_index(self) -> SqliteTermToSemanticRefIndex: + def semantic_ref_index(self) -> SqliteTermToSemanticRefIndex: return self._term_to_semantic_ref_index @property @@ -344,46 +350,9 @@ def message_text_index(self) -> SqliteMessageTextIndex: def related_terms_index(self) -> SqliteRelatedTermsIndex: return self._related_terms_index - # Async getters required by base class - async def get_message_collection( - self, message_type: type[TMessage] | None = None - ) -> interfaces.IMessageCollection[TMessage]: - """Get the message collection.""" - return self._message_collection - - async def get_semantic_ref_collection(self) -> interfaces.ISemanticRefCollection: - """Get the semantic reference collection.""" - return self._semantic_ref_collection - - async def get_semantic_ref_index(self) -> interfaces.ITermToSemanticRefIndex: - """Get the semantic reference index.""" - return self._term_to_semantic_ref_index - - async def get_property_index(self) -> interfaces.IPropertyToSemanticRefIndex: - """Get the property index.""" - return self._property_index - - async def get_timestamp_index(self) -> interfaces.ITimestampToTextRangeIndex: - """Get the timestamp index.""" - return self._timestamp_index - - async def get_message_text_index(self) -> interfaces.IMessageTextIndex[TMessage]: - """Get the message text index.""" - return self._message_text_index - - async def get_related_terms_index(self) -> interfaces.ITermToRelatedTermsIndex: - """Get the related terms index.""" - return self._related_terms_index - - async def get_conversation_threads(self) -> interfaces.IConversationThreads: - """Get the conversation threads.""" - # For now, return a simple implementation - # In a full implementation, this would be stored/retrieved from SQLite - from ...storage.memory.convthreads import ConversationThreads - - return ConversationThreads( - self.message_text_index_settings.embedding_index_settings - ) + @property + def conversation_threads(self) -> ConversationThreads: + return self._conversation_threads async def clear(self) -> None: """Clear all data from the storage provider.""" diff --git a/src/typeagent/transcripts/transcript.py b/src/typeagent/transcripts/transcript.py index 5033e293..08c4fdae 100644 --- a/src/typeagent/transcripts/transcript.py +++ b/src/typeagent/transcripts/transcript.py @@ -187,8 +187,8 @@ async def read_from_file( data = Transcript._read_conversation_data_from_file(filename_prefix) provider = await settings.get_storage_provider() - msgs = await provider.get_message_collection() - semrefs = await provider.get_semantic_ref_collection() + msgs = provider.messages + semrefs = provider.semantic_refs if await msgs.size() or await semrefs.size(): raise RuntimeError( f"Database {dbname!r} already has messages or semantic refs." diff --git a/tests/conftest.py b/tests/conftest.py index 7f0f11f5..bdb5def4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -328,7 +328,7 @@ async def ensure_initialized(self): storage_provider = await self.settings.get_storage_provider() self._storage_provider = storage_provider if self.semantic_ref_index is None: - self.semantic_ref_index = await storage_provider.get_semantic_ref_index() # type: ignore + self.semantic_ref_index = storage_provider.semantic_ref_index # type: ignore if self._has_secondary_indexes: # Set up secondary indexes diff --git a/tests/test_message_text_index_population.py b/tests/test_message_text_index_population.py index 13d53c00..a069d979 100644 --- a/tests/test_message_text_index_population.py +++ b/tests/test_message_text_index_population.py @@ -59,7 +59,7 @@ async def test_message_text_index_population_from_database(): ), ] - msg_collection = await storage1.get_message_collection() + msg_collection = storage1.messages await msg_collection.extend(test_messages) assert await msg_collection.size() == len(test_messages) @@ -74,7 +74,7 @@ async def test_message_text_index_population_from_database(): ) # Check message collection size - msg_collection2 = await storage2.get_message_collection() + msg_collection2 = storage2.messages msg_count = await msg_collection2.size() print(f"Message collection size: {msg_count}") assert msg_count == len( @@ -82,7 +82,7 @@ async def test_message_text_index_population_from_database(): ), f"Expected {len(test_messages)} messages, got {msg_count}" # Check message text index - msg_text_index = await storage2.get_message_text_index() + msg_text_index = storage2.message_text_index # Check that it implements the interface correctly from typeagent.knowpro.interfaces import IMessageTextIndex diff --git a/tests/test_property_index_population.py b/tests/test_property_index_population.py index 5a158353..f1f24c7c 100644 --- a/tests/test_property_index_population.py +++ b/tests/test_property_index_population.py @@ -79,7 +79,7 @@ async def test_property_index_population_from_database(really_needs_auth): ), ] - sem_ref_collection = await storage1.get_semantic_ref_collection() + sem_ref_collection = storage1.semantic_refs for sem_ref in test_data: await sem_ref_collection.append(sem_ref) @@ -111,7 +111,7 @@ async def test_property_index_population_from_database(really_needs_auth): # Build property index from the semantic refs await build_property_index(conversation) - prop_index = await storage2.get_property_index() + prop_index = storage2.property_index from typeagent.knowpro.interfaces import IPropertyToSemanticRefIndex assert isinstance(prop_index, IPropertyToSemanticRefIndex) diff --git a/tests/test_related_terms_index_population.py b/tests/test_related_terms_index_population.py index bf40722e..a4e615c4 100644 --- a/tests/test_related_terms_index_population.py +++ b/tests/test_related_terms_index_population.py @@ -61,12 +61,12 @@ async def test_related_terms_index_population_from_database(really_needs_auth): ), ] - msg_collection = await storage1.get_message_collection() + msg_collection = storage1.messages for message in test_messages: await msg_collection.append(message) # Add some semantic refs to create terms for the related terms index - sem_ref_collection = await storage1.get_semantic_ref_collection() + sem_ref_collection = storage1.semantic_refs # Add some entities entity_refs = [ @@ -97,7 +97,7 @@ async def test_related_terms_index_population_from_database(really_needs_auth): await sem_ref_collection.append(sem_ref) # Manually populate the semantic ref index since the user guarantees it's complete externally - semantic_ref_index = await storage1.get_semantic_ref_index() + semantic_ref_index = storage1.semantic_ref_index for sem_ref in entity_refs: knowledge = sem_ref.knowledge @@ -119,7 +119,7 @@ async def test_related_terms_index_population_from_database(really_needs_auth): ) # Check message collection size - msg_collection2 = await storage2.get_message_collection() + msg_collection2 = storage2.messages msg_count = await msg_collection2.size() print(f"Message collection size: {msg_count}") assert msg_count == len( @@ -127,7 +127,7 @@ async def test_related_terms_index_population_from_database(really_needs_auth): ), f"Expected {len(test_messages)} messages, got {msg_count}" # Check semantic ref collection size - sem_ref_collection2 = await storage2.get_semantic_ref_collection() + sem_ref_collection2 = storage2.semantic_refs sem_ref_count = await sem_ref_collection2.size() print(f"Semantic ref collection size: {sem_ref_count}") assert sem_ref_count == len( @@ -148,7 +148,7 @@ async def test_related_terms_index_population_from_database(really_needs_auth): await build_related_terms_index(conversation, related_terms_settings) # Check related terms index - related_terms_index = await storage2.get_related_terms_index() + related_terms_index = storage2.related_terms_index assert isinstance(related_terms_index, SqliteRelatedTermsIndex) # Check if fuzzy index has entries diff --git a/tests/test_reltermsindex.py b/tests/test_reltermsindex.py index 20752084..20c0f0b7 100644 --- a/tests/test_reltermsindex.py +++ b/tests/test_reltermsindex.py @@ -50,7 +50,7 @@ def get_knowledge(self): message_text_settings=message_text_settings, related_terms_settings=related_terms_settings, ) - index = await storage_provider.get_related_terms_index() + index = storage_provider.related_terms_index yield index else: provider = SqliteStorageProvider( @@ -59,7 +59,7 @@ def get_knowledge(self): message_text_index_settings=message_text_settings, related_term_index_settings=related_terms_settings, ) - index = await provider.get_related_terms_index() + index = provider.related_terms_index yield index await provider.close() diff --git a/tests/test_secindex.py b/tests/test_secindex.py index 39665b05..730cb831 100644 --- a/tests/test_secindex.py +++ b/tests/test_secindex.py @@ -45,15 +45,15 @@ def test_conversation_secondary_indexes_initialization( embedding_settings = TextEmbeddingIndexSettings(test_model) settings = RelatedTermIndexSettings(embedding_settings) indexes = ConversationSecondaryIndexes(storage_provider, settings) - # Note: indexes are None until initialize() is called - assert indexes.property_to_semantic_ref_index is None - assert indexes.timestamp_index is None - assert indexes.term_to_related_terms_index is None + # Indexes are initialized from storage provider in __init__ + assert indexes.property_to_semantic_ref_index is not None + assert indexes.timestamp_index is not None + assert indexes.term_to_related_terms_index is not None # Test with custom settings settings2 = RelatedTermIndexSettings(embedding_settings) indexes_with_settings = ConversationSecondaryIndexes(storage_provider, settings2) - assert indexes_with_settings.property_to_semantic_ref_index is None + assert indexes_with_settings.property_to_semantic_ref_index is not None @pytest.mark.asyncio diff --git a/tests/test_secindex_storage_integration.py b/tests/test_secindex_storage_integration.py index 15738bb6..0c3751fe 100644 --- a/tests/test_secindex_storage_integration.py +++ b/tests/test_secindex_storage_integration.py @@ -23,9 +23,7 @@ async def test_secondary_indexes_use_storage_provider( embedding_settings = TextEmbeddingIndexSettings(test_model) related_terms_settings = RelatedTermIndexSettings(embedding_settings) - indexes = await ConversationSecondaryIndexes.create( - storage_provider, related_terms_settings - ) + indexes = ConversationSecondaryIndexes(storage_provider, related_terms_settings) assert indexes.property_to_semantic_ref_index is not None assert indexes.timestamp_index is not None @@ -34,11 +32,11 @@ async def test_secondary_indexes_use_storage_provider( assert indexes.message_index is not None # Verify they are the same instances as those from storage provider - storage_prop_index = await storage_provider.get_property_index() - storage_timestamp_index = await storage_provider.get_timestamp_index() - storage_related_terms = await storage_provider.get_related_terms_index() - storage_threads = await storage_provider.get_conversation_threads() - storage_message_index = await storage_provider.get_message_text_index() + storage_prop_index = storage_provider.property_index + storage_timestamp_index = storage_provider.timestamp_index + storage_related_terms = storage_provider.related_terms_index + storage_threads = storage_provider.conversation_threads + storage_message_index = storage_provider.message_text_index assert indexes.property_to_semantic_ref_index is storage_prop_index assert indexes.timestamp_index is storage_timestamp_index diff --git a/tests/test_semrefindex.py b/tests/test_semrefindex.py index 5f580992..f12de683 100644 --- a/tests/test_semrefindex.py +++ b/tests/test_semrefindex.py @@ -65,7 +65,7 @@ def get_knowledge(self): message_text_settings=message_text_settings, related_terms_settings=related_terms_settings, ) - index = await provider.get_semantic_ref_index() + index = provider.semantic_ref_index yield index else: provider = SqliteStorageProvider( @@ -83,7 +83,7 @@ def get_knowledge(self): Topic, ) - collection = await provider.get_semantic_ref_collection() + collection = provider.semantic_refs # Create semantic refs with ordinals 1, 2, 3 that the tests expect for i in range(1, 4): @@ -94,7 +94,7 @@ def get_knowledge(self): ) await collection.append(ref) - index = await provider.get_semantic_ref_index() + index = provider.semantic_ref_index yield index await provider.close() @@ -125,8 +125,8 @@ def get_knowledge(self): message_text_settings=message_text_settings, related_terms_settings=related_terms_settings, ) - index = await provider.get_semantic_ref_index() - collection = await provider.get_semantic_ref_collection() + index = provider.semantic_ref_index + collection = provider.semantic_refs yield {"index": index, "collection": collection} else: provider = SqliteStorageProvider( @@ -135,8 +135,8 @@ def get_knowledge(self): message_text_index_settings=message_text_settings, related_term_index_settings=related_terms_settings, ) - index = await provider.get_semantic_ref_index() - collection = await provider.get_semantic_ref_collection() + index = provider.semantic_ref_index + collection = provider.semantic_refs yield {"index": index, "collection": collection} await provider.close() diff --git a/tests/test_sqlitestore.py b/tests/test_sqlitestore.py index 27a522d0..3d973215 100644 --- a/tests/test_sqlitestore.py +++ b/tests/test_sqlitestore.py @@ -74,7 +74,7 @@ def make_dummy_semantic_ref(ordinal: int = 0) -> SemanticRef: async def test_sqlite_message_collection_append_and_get( dummy_sqlite_storage_provider: SqliteStorageProvider[DummyMessage], ): - store = await dummy_sqlite_storage_provider.get_message_collection() + store = dummy_sqlite_storage_provider.messages msg = DummyMessage(["foo"]) await store.append(msg) assert await store.size() == 1 @@ -90,7 +90,7 @@ async def test_sqlite_message_collection_append_and_get( async def test_sqlite_message_collection_iter( dummy_sqlite_storage_provider: SqliteStorageProvider[DummyMessage], ): - collection = await dummy_sqlite_storage_provider.get_message_collection() + collection = dummy_sqlite_storage_provider.messages msgs = [DummyMessage([f"msg{i}"]) for i in range(3)] for m in msgs: await collection.append(m) @@ -101,7 +101,7 @@ async def test_sqlite_message_collection_iter( async def test_sqlite_semantic_ref_collection_append_and_get( dummy_sqlite_storage_provider: SqliteStorageProvider[DummyMessage], ): - collection = await dummy_sqlite_storage_provider.get_semantic_ref_collection() + collection = dummy_sqlite_storage_provider.semantic_refs ref = make_dummy_semantic_ref(123) await collection.append(ref) assert await collection.size() == 1 @@ -117,7 +117,7 @@ async def test_sqlite_semantic_ref_collection_append_and_get( async def test_sqlite_semantic_ref_collection_iter( dummy_sqlite_storage_provider: SqliteStorageProvider[DummyMessage], ): - collection = await dummy_sqlite_storage_provider.get_semantic_ref_collection() + collection = dummy_sqlite_storage_provider.semantic_refs refs = [make_dummy_semantic_ref(i) for i in range(2)] for r in refs: await collection.append(r) @@ -132,7 +132,7 @@ async def test_sqlite_timestamp_index( from typeagent.knowpro.interfaces import DateRange # Set up database with some messages - message_collection = await dummy_sqlite_storage_provider.get_message_collection() + message_collection = dummy_sqlite_storage_provider.messages # Add test messages messages = [ @@ -145,7 +145,7 @@ async def test_sqlite_timestamp_index( await message_collection.append(msg) # Create timestamp index - timestamp_index = await dummy_sqlite_storage_provider.get_timestamp_index() + timestamp_index = dummy_sqlite_storage_provider.timestamp_index # Test add_timestamp - use actual message ordinals from the database test_timestamps = [ diff --git a/tests/test_storage_providers_unified.py b/tests/test_storage_providers_unified.py index 179b1a7b..f12f6fde 100644 --- a/tests/test_storage_providers_unified.py +++ b/tests/test_storage_providers_unified.py @@ -102,27 +102,27 @@ async def test_all_index_creation( storage_provider, _ = storage_provider_type # Test all index types are created and return proper interface objects - conv_index = await storage_provider.get_semantic_ref_index() + conv_index = storage_provider.semantic_ref_index assert conv_index is not None assert hasattr(conv_index, "lookup_term") # Basic interface check - prop_index = await storage_provider.get_property_index() + prop_index = storage_provider.property_index assert prop_index is not None assert hasattr(prop_index, "lookup_property") # Basic interface check - time_index = await storage_provider.get_timestamp_index() + time_index = storage_provider.timestamp_index assert time_index is not None assert hasattr(time_index, "lookup_range") # Basic interface check - msg_index = await storage_provider.get_message_text_index() + msg_index = storage_provider.message_text_index assert msg_index is not None assert hasattr(msg_index, "lookup_messages") # Basic interface check - rel_index = await storage_provider.get_related_terms_index() + rel_index = storage_provider.related_terms_index assert rel_index is not None assert hasattr(rel_index, "aliases") # Basic interface check - threads = await storage_provider.get_conversation_threads() + threads = storage_provider.conversation_threads assert threads is not None assert hasattr(threads, "threads") # Basic interface check @@ -135,16 +135,16 @@ async def test_index_persistence( storage_provider, _ = storage_provider_type # All index types should return same instance across calls - conv1 = await storage_provider.get_semantic_ref_index() - conv2 = await storage_provider.get_semantic_ref_index() + conv1 = storage_provider.semantic_ref_index + conv2 = storage_provider.semantic_ref_index assert conv1 is conv2 - prop1 = await storage_provider.get_property_index() - prop2 = await storage_provider.get_property_index() + prop1 = storage_provider.property_index + prop2 = storage_provider.property_index assert prop1 is prop2 - time1 = await storage_provider.get_timestamp_index() - time2 = await storage_provider.get_timestamp_index() + time1 = storage_provider.timestamp_index + time2 = storage_provider.timestamp_index assert time1 is time2 @@ -156,7 +156,7 @@ async def test_message_collection_basic_operations( storage_provider, _ = storage_provider_type # Create message collection - collection = await storage_provider.get_message_collection() + collection = storage_provider.messages # Test initial state assert await collection.size() == 0 @@ -196,7 +196,7 @@ async def test_semantic_ref_collection_basic_operations( storage_provider, _ = storage_provider_type # Create semantic ref collection - collection = await storage_provider.get_semantic_ref_collection() + collection = storage_provider.semantic_refs # Test initial state assert await collection.size() == 0 @@ -251,7 +251,7 @@ async def test_semantic_ref_index_behavior_parity( """Test that semantic ref index behaves identically in both providers.""" storage_provider, _ = storage_provider_type - conv_index = await storage_provider.get_semantic_ref_index() + conv_index = storage_provider.semantic_ref_index # Test empty state empty_results = await conv_index.lookup_term("nonexistent") @@ -269,7 +269,7 @@ async def test_timestamp_index_behavior_parity( """Test that timestamp index behaves identically in both providers.""" storage_provider, _provider_type = storage_provider_type - time_index = await storage_provider.get_timestamp_index() + time_index = storage_provider.timestamp_index # Test empty lookup_range interface start_time = Datetime.fromisoformat("2024-01-01T00:00:00Z") @@ -288,7 +288,7 @@ async def test_message_text_index_interface_parity( """Test that message text index interface works identically in both providers.""" storage_provider, _ = storage_provider_type - msg_index = await storage_provider.get_message_text_index() + msg_index = storage_provider.message_text_index # Test empty lookup_messages empty_results = await msg_index.lookup_messages("nonexistent query", 10) @@ -303,7 +303,7 @@ async def test_related_terms_index_interface_parity( """Test that related terms index interface works identically in both providers.""" storage_provider, _ = storage_provider_type - rel_index = await storage_provider.get_related_terms_index() + rel_index = storage_provider.related_terms_index # Test interface properties aliases = rel_index.aliases @@ -321,7 +321,7 @@ async def test_conversation_threads_interface_parity( """Test that conversation threads interface works identically in both providers.""" storage_provider, _ = storage_provider_type - threads = await storage_provider.get_conversation_threads() + threads = storage_provider.conversation_threads # Test initial empty state assert len(threads.threads) == 0 @@ -352,8 +352,8 @@ async def test_cross_provider_message_collection_equivalence( try: # Create collections in both - memory_collection = await memory_provider.get_message_collection() - sqlite_collection = await sqlite_provider.get_message_collection() + memory_collection = memory_provider.messages + sqlite_collection = sqlite_provider.messages # Add identical data to both test_messages = [ @@ -394,8 +394,8 @@ async def test_property_index_population_from_semantic_refs( storage_provider, provider_type = storage_provider_type # Get collections - sem_ref_collection = await storage_provider.get_semantic_ref_collection() - prop_index = await storage_provider.get_property_index() + sem_ref_collection = storage_provider.semantic_refs + prop_index = storage_provider.property_index # Check initial state initial_sem_ref_count = await sem_ref_collection.size() @@ -476,7 +476,7 @@ async def test_property_index_basic_operations( """Test basic property index operations work identically in both providers.""" storage_provider, _ = storage_provider_type - prop_index = await storage_provider.get_property_index() + prop_index = storage_provider.property_index # Test initial state - should be able to handle lookups even when empty empty_results = await prop_index.lookup_property("name", "nonexistent") @@ -495,7 +495,7 @@ async def test_timestamp_index_range_queries( """Test timestamp index range query functionality in both providers.""" storage_provider, _ = storage_provider_type - timestamp_index = await storage_provider.get_timestamp_index() + timestamp_index = storage_provider.timestamp_index # Test basic interface - empty range query start_time = Datetime.fromisoformat("2024-01-01T00:00:00Z") @@ -526,8 +526,8 @@ async def test_timestamp_index_with_data( storage_provider, provider_type = storage_provider_type # First add some messages to work with - message_collection = await storage_provider.get_message_collection() - timestamp_index = await storage_provider.get_timestamp_index() + message_collection = storage_provider.messages + timestamp_index = storage_provider.timestamp_index # Add test messages test_messages = [ @@ -631,12 +631,12 @@ async def test_storage_provider_independence( ) # Test memory provider independence - memory_index1 = await memory_provider1.get_semantic_ref_index() - memory_index2 = await memory_provider2.get_semantic_ref_index() + memory_index1 = memory_provider1.semantic_ref_index + memory_index2 = memory_provider2.semantic_ref_index assert memory_index1 is not memory_index2 - memory_collection1 = await memory_provider1.get_message_collection() - memory_collection2 = await memory_provider2.get_message_collection() + memory_collection1 = memory_provider1.messages + memory_collection2 = memory_provider2.messages # Add data to first memory provider await memory_collection1.append(DummyTestMessage(["memory test 1"])) @@ -644,12 +644,12 @@ async def test_storage_provider_independence( assert await memory_collection2.size() == 0 # Second provider unaffected # Test sqlite provider independence - sqlite_index1 = await sqlite_provider1.get_semantic_ref_index() - sqlite_index2 = await sqlite_provider2.get_semantic_ref_index() + sqlite_index1 = sqlite_provider1.semantic_ref_index + sqlite_index2 = sqlite_provider2.semantic_ref_index assert sqlite_index1 is not sqlite_index2 - sqlite_collection1 = await sqlite_provider1.get_message_collection() - sqlite_collection2 = await sqlite_provider2.get_message_collection() + sqlite_collection1 = sqlite_provider1.messages + sqlite_collection2 = sqlite_provider2.messages # Add data to first sqlite provider await sqlite_collection1.append(DummyTestMessage(["sqlite test 1"])) @@ -681,7 +681,7 @@ async def test_collection_operations_comprehensive( storage_provider, _ = storage_provider_type # Test message collection operations - message_collection = await storage_provider.get_message_collection() + message_collection = storage_provider.messages # Test initial state assert await message_collection.size() == 0 diff --git a/tools/ingest_email.py b/tools/ingest_email.py index eccac4cb..74a259ea 100644 --- a/tools/ingest_email.py +++ b/tools/ingest_email.py @@ -311,7 +311,7 @@ async def ingest_emails( skipped_count = 0 start_time = time.time() - semref_coll = await settings.storage_provider.get_semantic_ref_collection() + semref_coll = settings.storage_provider.semantic_refs storage_provider = settings.storage_provider for source_id, email_file, label in _iter_emails(eml_paths, verbose, offset, limit): diff --git a/tools/ingest_vtt.py b/tools/ingest_vtt.py index ffaccfc1..0beb7f66 100644 --- a/tools/ingest_vtt.py +++ b/tools/ingest_vtt.py @@ -242,8 +242,8 @@ async def ingest_vtt_files( print(f"\nParsing VTT files and creating messages...") try: # Get collections from our storage provider - msg_coll = await storage_provider.get_message_collection() - semref_coll = await storage_provider.get_semantic_ref_collection() + msg_coll = storage_provider.messages + semref_coll = storage_provider.semantic_refs # Database should be empty (we checked it doesn't exist earlier) # But verify collections are empty just in case diff --git a/tools/load_json.py b/tools/load_json.py index 8a885047..a1f01bc2 100644 --- a/tools/load_json.py +++ b/tools/load_json.py @@ -54,7 +54,7 @@ async def load_json_to_database( # Get the storage provider to check if database is empty provider = await settings.get_storage_provider() - msgs = await provider.get_message_collection() + msgs = provider.messages # Check if database already has data msg_count = await msgs.size() diff --git a/tools/query.py b/tools/query.py index c7ec7908..117d7fbf 100644 --- a/tools/query.py +++ b/tools/query.py @@ -550,7 +550,7 @@ async def main(): # Load existing database provider = await settings.get_storage_provider() - msgs = await provider.get_message_collection() + msgs = provider.messages if await msgs.size() == 0: raise SystemExit(f"Error: Database '{args.database}' is empty.")