From c9c799795ebbd991b0be76149f62a3d7fc31dee7 Mon Sep 17 00:00:00 2001 From: Ujjwal Ayyangar Date: Sun, 21 Apr 2024 02:30:01 -0400 Subject: [PATCH 1/3] feat: Adds the index deletion functionality to the WeviateVectoreStore --- .../vector_stores/WeaviateIndexDemo.ipynb | 160 +++++++++--------- .../vector_stores/weaviate/base.py | 25 ++- .../pyproject.toml | 2 +- 3 files changed, 101 insertions(+), 86 deletions(-) diff --git a/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb b/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb index 4dac1daa62cc6..a7c9b89dc65cb 100644 --- a/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb +++ b/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "08ad68ce", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "eccceb71", "metadata": {}, "outputs": [], @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "72a4b618-668d-4713-84c5-6362030e9f19", "metadata": {}, "outputs": [], @@ -97,14 +97,19 @@ { "cell_type": "code", "execution_count": null, - "id": "df8b27e5-5ad5-4dfe-90c7-0cf1f1d1b37f", - "metadata": {}, + "id": "ad860554", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "# cloud\n", "resource_owner_config = weaviate.AuthClientPassword(\n", - " username=\"\",\n", - " password=\"\",\n", + " username=\"\",\n", + " password=\"\",\n", ")\n", "client = weaviate.Client(\n", " \"https://llama-test-ezjahb4m.weaviate.network\",\n", @@ -126,21 +131,10 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "0a2bcc07", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO:numexpr.utils:Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", - "Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", - "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n", - "NumExpr defaulting to 8 threads.\n" - ] - } - ], + "outputs": [], "source": [ "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", "from llama_index.vector_stores.weaviate import WeaviateVectorStore\n", @@ -169,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "68cbd239-880e-41a3-98d8-dbb3fab55431", "metadata": {}, "outputs": [], @@ -227,21 +221,7 @@ "execution_count": null, "id": "bedbb693-725f-478f-be26-fa7180ea38b2", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "\n", - "Growing up, the author wrote short stories, experimented with programming on an IBM 1401, nagged his father to buy a TRS-80 computer, wrote simple games, a program to predict how high his model rockets would fly, and a word processor. He also studied philosophy in college, switched to AI, and worked on building the infrastructure of the web. He wrote essays and published them online, had dinners for a group of friends every Thursday night, painted, and bought a building in Cambridge." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(Markdown(f\"{response}\"))" ] @@ -296,21 +276,7 @@ "execution_count": null, "id": "bc9a2ad0", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "\n", - "At Interleaf, a group of people worked on projects for customers. One of the employees told the narrator about a new thing called HTML, which was a derivative of SGML. The narrator left Interleaf to go back to RISD and did freelance work for the group that did projects for customers. Later, the narrator and a college friend started a new company called Viaweb, which was a web app that allowed users to build stores through the browser. They got seed funding and recruited two programmers to help them build the software. They opened for business in January 1996 with 6 stores." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# set Logging to DEBUG for more detailed outputs\n", "query_engine = loaded_index.as_query_engine()\n", @@ -334,20 +300,7 @@ "execution_count": null, "id": "df6b6d46", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'filename': 'README.md', 'category': 'codebase'}\n", - "-----\n", - "\n", - "Context\n", - "LLMs are a phenomenonal piece of technology for knowledge generation and reasoning. \n", - "They a\n" - ] - } - ], + "outputs": [], "source": [ "from llama_index.core import Document\n", "\n", @@ -372,21 +325,7 @@ "execution_count": null, "id": "c1bd18f8", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "\n", - "The name of the file is README.md." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters\n", "\n", @@ -397,6 +336,64 @@ "response = query_engine.query(\"What is the name of the file?\")\n", "display(Markdown(f\"{response}\"))" ] + }, + { + "cell_type": "markdown", + "id": "29a92918", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "# Deleting the index completely\n", + "\n", + "You can delete the index created by the vector store using the `delete_index` function" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a0a5b319", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:llama_index.vector_stores.weaviate.base:Successfully deleted index 'LlamaIndex'.\n", + "Successfully deleted index 'LlamaIndex'.\n" + ] + } + ], + "source": [ + "vector_store.delete_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "71932f10-3783-4f8d-a112-b90538d66971", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:llama_index.vector_stores.weaviate.base:Index 'LlamaIndex' does not exist. No action taken.\n", + "Index 'LlamaIndex' does not exist. No action taken.\n" + ] + } + ], + "source": [ + "vector_store.delete_index() # calling the function again does nothing" + ] } ], "metadata": { @@ -414,7 +411,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.12.2" } }, "nbformat": 4, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py index 49b1f205d3534..29647816b49c0 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py @@ -31,7 +31,7 @@ import weaviate # noqa from weaviate import AuthApiKey, Client -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) def _transform_weaviate_filter_condition(condition: str) -> str: @@ -164,7 +164,7 @@ def __init__( # validate class prefix starts with a capital letter if class_prefix is not None: - logger.warning("class_prefix is deprecated, please use index_name") + _logger.warning("class_prefix is deprecated, please use index_name") # legacy, kept for backward compatibility index_name = f"{class_prefix}_Node" @@ -276,6 +276,23 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: for entry in entries: self._client.data_object.delete(entry["_additional"]["id"], self.index_name) + def delete_index(self) -> None: + """ + Delete the index associated with the client + + Raises: + - Exception: If the deletion fails, for some reason. + """ + if not class_schema_exists(self._client, self.index_name): + _logger.warning(f"Index '{self.index_name}' does not exist. No action taken.") + return + try: + self._client.schema.delete_class(self.index_name) + _logger.info(f"Successfully deleted index '{self.index_name}'.") + except Exception as e: + _logger.error(f"Failed to delete index '{self.index_name}': {e}") + raise Exception(f"Failed to delete index '{self.index_name}': {e}") + def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: """Query index for top k most similar nodes.""" all_properties = get_all_properties(self._client, self.index_name) @@ -311,7 +328,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul vector = query.query_embedding similarity_key = "distance" if query.mode == VectorStoreQueryMode.DEFAULT: - logger.debug("Using vector search") + _logger.debug("Using vector search") if vector is not None: query_builder = query_builder.with_near_vector( { @@ -335,7 +352,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul query_builder = query_builder.with_where(kwargs["filter"]) query_builder = query_builder.with_limit(query.similarity_top_k) - logger.debug(f"Using limit of {query.similarity_top_k}") + _logger.debug(f"Using limit of {query.similarity_top_k}") # execute query query_result = query_builder.do() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/pyproject.toml index 92354b6c18f5d..c4b4f01b7d751 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-weaviate" readme = "README.md" -version = "0.1.4" +version = "0.1.5" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" From 83d5a0d034e781905b1e0084f1436685976d37a5 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Sun, 21 Apr 2024 20:15:53 -0600 Subject: [PATCH 2/3] change leftover logger name --- .../llama_index/vector_stores/weaviate/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py index 29647816b49c0..c0a1ef4da67d2 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-weaviate/llama_index/vector_stores/weaviate/base.py @@ -277,14 +277,15 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: self._client.data_object.delete(entry["_additional"]["id"], self.index_name) def delete_index(self) -> None: - """ - Delete the index associated with the client + """Delete the index associated with the client. Raises: - Exception: If the deletion fails, for some reason. """ if not class_schema_exists(self._client, self.index_name): - _logger.warning(f"Index '{self.index_name}' does not exist. No action taken.") + _logger.warning( + f"Index '{self.index_name}' does not exist. No action taken." + ) return try: self._client.schema.delete_class(self.index_name) @@ -336,7 +337,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul } ) elif query.mode == VectorStoreQueryMode.HYBRID: - logger.debug(f"Using hybrid search with alpha {query.alpha}") + _logger.debug(f"Using hybrid search with alpha {query.alpha}") similarity_key = "score" if vector is not None and query.query_str: query_builder = query_builder.with_hybrid( From e18dd316f8ea0dee15116e0537116ddb71563062 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Sun, 21 Apr 2024 20:30:51 -0600 Subject: [PATCH 3/3] linting --- .../vector_stores/WeaviateIndexDemo.ipynb | 40 ++++++------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb b/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb index a7c9b89dc65cb..6db9b0e8ea932 100644 --- a/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb +++ b/docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "08ad68ce", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "eccceb71", "metadata": {}, "outputs": [], @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "72a4b618-668d-4713-84c5-6362030e9f19", "metadata": {}, "outputs": [], @@ -98,12 +98,7 @@ "cell_type": "code", "execution_count": null, "id": "ad860554", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, + "metadata": {}, "outputs": [], "source": [ "# cloud\n", @@ -131,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "0a2bcc07", "metadata": {}, "outputs": [], @@ -163,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "68cbd239-880e-41a3-98d8-dbb3fab55431", "metadata": {}, "outputs": [], @@ -340,12 +335,7 @@ { "cell_type": "markdown", "id": "29a92918", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, + "metadata": {}, "source": [ "# Deleting the index completely\n", "\n", @@ -354,14 +344,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "a0a5b319", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -378,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "71932f10-3783-4f8d-a112-b90538d66971", "metadata": {}, "outputs": [ @@ -392,7 +377,7 @@ } ], "source": [ - "vector_store.delete_index() # calling the function again does nothing" + "vector_store.delete_index() # calling the function again does nothing" ] } ], @@ -411,8 +396,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" + "pygments_lexer": "ipython3" } }, "nbformat": 4,