fix qdrant bug with checking existing collection (run-llama#13009)

mattf · Apr 25, 2024 · 77459a7 · 77459a7
1 parent 527092d
commit 77459a7
Show file tree

Hide file tree

Showing 4 changed files with 186 additions and 31 deletions.
diff --git a/docs/docs/examples/vector_stores/QdrantIndexDemo.ipynb b/docs/docs/examples/vector_stores/QdrantIndexDemo.ipynb
@@ -372,7 +372,10 @@
    "outputs": [],
    "source": [
     "vector_store = QdrantVectorStore(\n",
-    "    aclient=aclient, collection_name=\"paul_graham\", prefer_grpc=True\n",
+    "    collection_name=\"paul_graham\",\n",
+    "    client=client,\n",
+    "    aclient=aclient,\n",
+    "    prefer_grpc=True,\n",
     ")\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
     "index = VectorStoreIndex.from_documents(\n",

diff --git a/docs/docs/examples/vector_stores/qdrant_hybrid.ipynb b/docs/docs/examples/vector_stores/qdrant_hybrid.ipynb
@@ -19,7 +19,7 @@
     "\n",
     "`sparse` vectors are slightly different. They use a specialized approach or model (TF-IDF, BM25, SPLADE, etc.) for generating vectors. These vectors are typically mostly zeros, making them `sparse` vectors. These `sparse` vectors are great at capturing specific keywords and similar small details.\n",
     "\n",
-    "This notebook walks through setting up and customizing hybrid search with Qdrant and `naver/efficient-splade-VI-BT-large` variants from Huggingface."
+    "This notebook walks through setting up and customizing hybrid search with Qdrant and `\"prithvida/Splade_PP_en_v1\"` variants from Huggingface."
    ]
   },
   {
@@ -37,16 +37,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index-vector-stores-qdrant"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install llama-index qdrant-client pypdf \"transformers[torch]\""
+    "%pip install -U llama-index llama-index-vector-stores-qdrant fastembed"
    ]
   },
   {
@@ -91,27 +82,194 @@
     "\n",
     "Hybrid search with Qdrant must be enabled from the beginning -- we can simply set `enable_hybrid=True`.\n",
     "\n",
-    "This will run sparse vector generation locally using the `\"naver/efficient-splade-VI-BT-large-doc\"` model from Huggingface, in addition to generating dense vectors with OpenAI."
+    "This will run sparse vector generation locally using the `\"prithvida/Splade_PP_en_v1\"` using fastembed, in addition to generating dense vectors with OpenAI."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c6b8fd0680504f10b5b83c8bc94a5f8c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06b8964d910b4adc8864b38b5f14a4cc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       ".gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "38a8a07edf454ef8866bb7151503e007",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "generation_config.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "94053bb2d9004ab89e1c0e9533106fd0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ef485a7bc74c41cf816db14d69aa5dbe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "623b7b6461ac49d8ae55755f98bd9c3d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b539d2c1490b43de858923f9b3e80bd6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/133 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6fdf5c72a8954a39b774c91554eb11e2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.onnx:   0%|          | 0.00/532M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e5b675b999ef4c84bc859aea78f97b01",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "33081a9b77464c8c899361ce05297372",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6b88789a16b443c38ad8e5376ef9510c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from llama_index.core import VectorStoreIndex, StorageContext\n",
     "from llama_index.core import Settings\n",
     "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
-    "from qdrant_client import QdrantClient\n",
+    "from qdrant_client import QdrantClient, AsyncQdrantClient\n",
     "\n",
     "# creates a persistant index to disk\n",
-    "client = QdrantClient(path=\"./qdrant_data\")\n",
+    "client = QdrantClient(host=\"localhost\", port=6333)\n",
+    "aclient = AsyncQdrantClient(host=\"localhost\", port=6333)\n",
     "\n",
     "# create our vector store with hybrid indexing enabled\n",
     "# batch_size controls how many nodes are encoded with sparse vectors at once\n",
     "vector_store = QdrantVectorStore(\n",
-    "    \"llama2_paper\", client=client, enable_hybrid=True, batch_size=20\n",
+    "    \"llama2_paper\",\n",
+    "    client=client,\n",
+    "    aclient=aclient,\n",
+    "    enable_hybrid=True,\n",
+    "    batch_size=20,\n",
     ")\n",
     "\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
@@ -155,7 +313,7 @@
     {
      "data": {
       "text/markdown": [
-       "Llama2 was specifically trained differently from Llama1 by making several changes to improve performance. These changes included performing more robust data cleaning, updating the data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models."
+       "Llama 2 was specifically trained differently from Llama 1 by making changes such as performing more robust data cleaning, updating data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models. Additionally, Llama 2 adopted most of the pretraining setting and model architecture from Llama 1 but included architectural enhancements like increased context length and grouped-query attention."
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -207,7 +365,7 @@
     {
      "data": {
       "text/markdown": [
-       "Llama 2 was specifically trained differently from Llama 1 by making several changes to improve performance. These changes included performing more robust data cleaning, updating the data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models. These modifications were made to enhance the training process and optimize the performance of Llama 2 compared to Llama 1."
+       "Llama 2 was specifically trained differently from Llama 1 by making changes to improve performance, such as performing more robust data cleaning, updating data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models."
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -261,15 +419,12 @@
     "from llama_index.core import VectorStoreIndex, StorageContext\n",
     "from llama_index.core import Settings\n",
     "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
-    "from qdrant_client import AsyncQdrantClient\n",
     "\n",
     "\n",
-    "# creates a persistant index to disk\n",
-    "aclient = AsyncQdrantClient(path=\"./qdrant_data_async\")\n",
-    "\n",
     "# create our vector store with hybrid indexing enabled\n",
     "vector_store = QdrantVectorStore(\n",
     "    collection_name=\"llama2_paper\",\n",
+    "    client=client,\n",
     "    aclient=aclient,\n",
     "    enable_hybrid=True,\n",
     "    batch_size=20,\n",
@@ -300,9 +455,9 @@
     "\n",
     "### Customizing Sparse Vector Generation\n",
     "\n",
-    "By default, sparse vector generation is done using seperate models for queries and documents -- `\"naver/efficient-splade-VI-BT-large-doc\"` and `\"naver/efficient-splade-VI-BT-large-query\"`\n",
+    "Sparse vector generation can be done using a single model, or sometimes distinct seperate models for queries and documents. Here we use two -- `\"naver/efficient-splade-VI-BT-large-doc\"` and `\"naver/efficient-splade-VI-BT-large-query\"`\n",
     "\n",
-    "Below is the default code for generating the sparse vectors and how you can set the functionality in the constructor. You can use this and customize as needed."
+    "Below is the sample code for generating the sparse vectors and how you can set the functionality in the constructor. You can use this and customize as needed."
    ]
   },
   {

diff --git a/...s/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py b/...s/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py
@@ -748,8 +748,6 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult:
             try:
                 node = metadata_dict_to_node(payload)
             except Exception:
-                # NOTE: deprecated legacy logic for backward compatibility
-                logger.debug("Failed to parse Node metadata, fallback to legacy logic.")
                 metadata, node_info, relationships = legacy_metadata_dict_to_node(
                     payload
                 )
@@ -871,11 +869,10 @@ def _build_query_filter(self, query: VectorStoreQuery) -> Optional[Any]:
         return Filter(must=must_conditions)
 
     def use_old_sparse_encoder(self, collection_name: str) -> bool:
-        collection_info = self.client.get_collection(collection_name)
         return (
             self._collection_exists(collection_name)
-            and collection_info.config.params.sparse_vectors is not None
-            and SPARSE_VECTOR_NAME_OLD in collection_info.config.params.sparse_vectors
+            and SPARSE_VECTOR_NAME_OLD
+            in self.client.get_collection(collection_name).config.params.vectors
         )
 
     @property

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-vector-stores-qdrant"
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.13"