From 722aae4fd1fb2a23647d71e6c43b84ec36d77683 Mon Sep 17 00:00:00 2001 From: morgana Date: Mon, 12 Feb 2024 19:50:20 -0800 Subject: [PATCH] community: add delete method to rocksetdb vectorstore to support recordmanager (#17030) - **Description:** This adds a delete method so that rocksetdb can be used with `RecordManager`. - **Issue:** N/A - **Dependencies:** N/A - **Twitter handle:** `@_morgan_adams_` --------- Co-authored-by: Rockset API Bot --- .../docs/modules/data_connection/indexing.ipynb | 2 +- .../vectorstores/rocksetdb.py | 17 +++++++++++++++++ .../vectorstores/test_rocksetdb.py | 16 ++++++++++++++++ .../vectorstores/test_indexing_docs.py | 1 + 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/docs/modules/data_connection/indexing.ipynb b/docs/docs/modules/data_connection/indexing.ipynb index b888a77958b874..45a5d92a42bc95 100644 --- a/docs/docs/modules/data_connection/indexing.ipynb +++ b/docs/docs/modules/data_connection/indexing.ipynb @@ -60,7 +60,7 @@ " * document addition by id (`add_documents` method with `ids` argument)\n", " * delete by id (`delete` method with `ids` argument)\n", "\n", - "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", + "Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", " \n", "## Caution\n", "\n", diff --git a/libs/community/langchain_community/vectorstores/rocksetdb.py b/libs/community/langchain_community/vectorstores/rocksetdb.py index 992f53db081ddf..0e94089a0cddfd 100644 --- a/libs/community/langchain_community/vectorstores/rocksetdb.py +++ b/libs/community/langchain_community/vectorstores/rocksetdb.py @@ -6,6 +6,7 @@ from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.runnables import run_in_executor from langchain_core.vectorstores import VectorStore logger = logging.getLogger(__name__) @@ -332,3 +333,19 @@ def delete_texts(self, ids: List[str]) -> None: data=[DeleteDocumentsRequestData(id=i) for i in ids], workspace=self._workspace, ) + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + try: + if ids is None: + ids = [] + self.delete_texts(ids) + except Exception as e: + logger.error("Exception when deleting docs from Rockset: %s\n", e) + return False + + return True + + async def adelete( + self, ids: Optional[List[str]] = None, **kwargs: Any + ) -> Optional[bool]: + return await run_in_executor(None, self.delete, ids, **kwargs) diff --git a/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py b/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py index 25f56d522464b4..b4c79b610718f9 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py @@ -155,3 +155,19 @@ def test_build_query_sql_with_where(self) -> None: LIMIT 4 """ assert q_str == expected + + def test_add_documents_and_delete(self) -> None: + """ "add_documents" and "delete" are requirements to support use + with RecordManager""" + + texts = ["foo", "bar", "baz"] + metadatas = [{"metadata_index": i} for i in range(len(texts))] + + _docs = zip(texts, metadatas) + docs = [Document(page_content=pc, metadata=i) for pc, i in _docs] + + ids = self.rockset_vectorstore.add_documents(docs) + assert len(ids) == len(texts) + + deleted = self.rockset_vectorstore.delete(ids) + assert deleted diff --git a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py index 1232d6bb9a681a..85c5312d1f9245 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py +++ b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py @@ -68,6 +68,7 @@ def check_compatibility(vector_store: VectorStore) -> bool: "Pinecone", "Qdrant", "Redis", + "Rockset", "ScaNN", "SemaDB", "SupabaseVectorStore",