Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Adds the index deletion functionality to the WeviateVectoreStore #12993

Merged
merged 3 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 56 additions & 74 deletions docs/docs/examples/vector_stores/WeaviateIndexDemo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@
{
"cell_type": "code",
"execution_count": null,
"id": "df8b27e5-5ad5-4dfe-90c7-0cf1f1d1b37f",
"id": "ad860554",
"metadata": {},
"outputs": [],
"source": [
"# cloud\n",
"resource_owner_config = weaviate.AuthClientPassword(\n",
" username=\"<username>\",\n",
" password=\"<password>\",\n",
" username=\"\",\n",
" password=\"\",\n",
")\n",
"client = weaviate.Client(\n",
" \"https://llama-test-ezjahb4m.weaviate.network\",\n",
Expand All @@ -129,18 +129,7 @@
"execution_count": null,
"id": "0a2bcc07",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:numexpr.utils:Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
"Note: NumExpr detected 12 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
"INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
"NumExpr defaulting to 8 threads.\n"
]
}
],
"outputs": [],
"source": [
"from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
"from llama_index.vector_stores.weaviate import WeaviateVectorStore\n",
Expand Down Expand Up @@ -227,21 +216,7 @@
"execution_count": null,
"id": "bedbb693-725f-478f-be26-fa7180ea38b2",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<b>\n",
"Growing up, the author wrote short stories, experimented with programming on an IBM 1401, nagged his father to buy a TRS-80 computer, wrote simple games, a program to predict how high his model rockets would fly, and a word processor. He also studied philosophy in college, switched to AI, and worked on building the infrastructure of the web. He wrote essays and published them online, had dinners for a group of friends every Thursday night, painted, and bought a building in Cambridge.</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"display(Markdown(f\"<b>{response}</b>\"))"
]
Expand Down Expand Up @@ -296,21 +271,7 @@
"execution_count": null,
"id": "bc9a2ad0",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<b>\n",
"At Interleaf, a group of people worked on projects for customers. One of the employees told the narrator about a new thing called HTML, which was a derivative of SGML. The narrator left Interleaf to go back to RISD and did freelance work for the group that did projects for customers. Later, the narrator and a college friend started a new company called Viaweb, which was a web app that allowed users to build stores through the browser. They got seed funding and recruited two programmers to help them build the software. They opened for business in January 1996 with 6 stores.</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"# set Logging to DEBUG for more detailed outputs\n",
"query_engine = loaded_index.as_query_engine()\n",
Expand All @@ -334,20 +295,7 @@
"execution_count": null,
"id": "df6b6d46",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'filename': 'README.md', 'category': 'codebase'}\n",
"-----\n",
"\n",
"Context\n",
"LLMs are a phenomenonal piece of technology for knowledge generation and reasoning. \n",
"They a\n"
]
}
],
"outputs": [],
"source": [
"from llama_index.core import Document\n",
"\n",
Expand All @@ -372,21 +320,7 @@
"execution_count": null,
"id": "c1bd18f8",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<b>\n",
"The name of the file is README.md.</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters\n",
"\n",
Expand All @@ -397,6 +331,54 @@
"response = query_engine.query(\"What is the name of the file?\")\n",
"display(Markdown(f\"<b>{response}</b>\"))"
]
},
{
"cell_type": "markdown",
"id": "29a92918",
"metadata": {},
"source": [
"# Deleting the index completely\n",
"\n",
"You can delete the index created by the vector store using the `delete_index` function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0a5b319",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:llama_index.vector_stores.weaviate.base:Successfully deleted index 'LlamaIndex'.\n",
"Successfully deleted index 'LlamaIndex'.\n"
]
}
],
"source": [
"vector_store.delete_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71932f10-3783-4f8d-a112-b90538d66971",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:llama_index.vector_stores.weaviate.base:Index 'LlamaIndex' does not exist. No action taken.\n",
"Index 'LlamaIndex' does not exist. No action taken.\n"
]
}
],
"source": [
"vector_store.delete_index() # calling the function again does nothing"
]
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import weaviate # noqa
from weaviate import AuthApiKey, Client

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


def _transform_weaviate_filter_condition(condition: str) -> str:
Expand Down Expand Up @@ -164,7 +164,7 @@ def __init__(

# validate class prefix starts with a capital letter
if class_prefix is not None:
logger.warning("class_prefix is deprecated, please use index_name")
_logger.warning("class_prefix is deprecated, please use index_name")
# legacy, kept for backward compatibility
index_name = f"{class_prefix}_Node"

Expand Down Expand Up @@ -276,6 +276,24 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
for entry in entries:
self._client.data_object.delete(entry["_additional"]["id"], self.index_name)

def delete_index(self) -> None:
"""Delete the index associated with the client.

Raises:
- Exception: If the deletion fails, for some reason.
"""
if not class_schema_exists(self._client, self.index_name):
_logger.warning(
f"Index '{self.index_name}' does not exist. No action taken."
)
return
try:
self._client.schema.delete_class(self.index_name)
_logger.info(f"Successfully deleted index '{self.index_name}'.")
except Exception as e:
_logger.error(f"Failed to delete index '{self.index_name}': {e}")
raise Exception(f"Failed to delete index '{self.index_name}': {e}")

def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""Query index for top k most similar nodes."""
all_properties = get_all_properties(self._client, self.index_name)
Expand Down Expand Up @@ -311,15 +329,15 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
vector = query.query_embedding
similarity_key = "distance"
if query.mode == VectorStoreQueryMode.DEFAULT:
logger.debug("Using vector search")
_logger.debug("Using vector search")
if vector is not None:
query_builder = query_builder.with_near_vector(
{
"vector": vector,
}
)
elif query.mode == VectorStoreQueryMode.HYBRID:
logger.debug(f"Using hybrid search with alpha {query.alpha}")
_logger.debug(f"Using hybrid search with alpha {query.alpha}")
similarity_key = "score"
if vector is not None and query.query_str:
query_builder = query_builder.with_hybrid(
Expand All @@ -335,7 +353,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
query_builder = query_builder.with_where(kwargs["filter"])

query_builder = query_builder.with_limit(query.similarity_top_k)
logger.debug(f"Using limit of {query.similarity_top_k}")
_logger.debug(f"Using limit of {query.similarity_top_k}")

# execute query
query_result = query_builder.do()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-weaviate"
readme = "README.md"
version = "0.1.4"
version = "0.1.5"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
Expand Down
Loading