Skip to content

Commit

Permalink
fix qdrant bug with checking existing collection (run-llama#13009)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored and mattf committed Apr 25, 2024
1 parent 527092d commit 77459a7
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 31 deletions.
5 changes: 4 additions & 1 deletion docs/docs/examples/vector_stores/QdrantIndexDemo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,10 @@
"outputs": [],
"source": [
"vector_store = QdrantVectorStore(\n",
" aclient=aclient, collection_name=\"paul_graham\", prefer_grpc=True\n",
" collection_name=\"paul_graham\",\n",
" client=client,\n",
" aclient=aclient,\n",
" prefer_grpc=True,\n",
")\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"index = VectorStoreIndex.from_documents(\n",
Expand Down
203 changes: 179 additions & 24 deletions docs/docs/examples/vector_stores/qdrant_hybrid.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"\n",
"`sparse` vectors are slightly different. They use a specialized approach or model (TF-IDF, BM25, SPLADE, etc.) for generating vectors. These vectors are typically mostly zeros, making them `sparse` vectors. These `sparse` vectors are great at capturing specific keywords and similar small details.\n",
"\n",
"This notebook walks through setting up and customizing hybrid search with Qdrant and `naver/efficient-splade-VI-BT-large` variants from Huggingface."
"This notebook walks through setting up and customizing hybrid search with Qdrant and `\"prithvida/Splade_PP_en_v1\"` variants from Huggingface."
]
},
{
Expand All @@ -37,16 +37,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-vector-stores-qdrant"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install llama-index qdrant-client pypdf \"transformers[torch]\""
"%pip install -U llama-index llama-index-vector-stores-qdrant fastembed"
]
},
{
Expand Down Expand Up @@ -91,27 +82,194 @@
"\n",
"Hybrid search with Qdrant must be enabled from the beginning -- we can simply set `enable_hybrid=True`.\n",
"\n",
"This will run sparse vector generation locally using the `\"naver/efficient-splade-VI-BT-large-doc\"` model from Huggingface, in addition to generating dense vectors with OpenAI."
"This will run sparse vector generation locally using the `\"prithvida/Splade_PP_en_v1\"` using fastembed, in addition to generating dense vectors with OpenAI."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c6b8fd0680504f10b5b83c8bc94a5f8c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 9 files: 0%| | 0/9 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06b8964d910b4adc8864b38b5f14a4cc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
".gitattributes: 0%| | 0.00/1.52k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "38a8a07edf454ef8866bb7151503e007",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"generation_config.json: 0%| | 0.00/90.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "94053bb2d9004ab89e1c0e9533106fd0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0%| | 0.00/712k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ef485a7bc74c41cf816db14d69aa5dbe",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/755 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "623b7b6461ac49d8ae55755f98bd9c3d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/1.38k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b539d2c1490b43de858923f9b3e80bd6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"README.md: 0%| | 0.00/133 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6fdf5c72a8954a39b774c91554eb11e2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.onnx: 0%| | 0.00/532M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e5b675b999ef4c84bc859aea78f97b01",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "33081a9b77464c8c899361ce05297372",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/695 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6b88789a16b443c38ad8e5376ef9510c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 9 files: 0%| | 0/9 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from llama_index.core import VectorStoreIndex, StorageContext\n",
"from llama_index.core import Settings\n",
"from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
"from qdrant_client import QdrantClient\n",
"from qdrant_client import QdrantClient, AsyncQdrantClient\n",
"\n",
"# creates a persistant index to disk\n",
"client = QdrantClient(path=\"./qdrant_data\")\n",
"client = QdrantClient(host=\"localhost\", port=6333)\n",
"aclient = AsyncQdrantClient(host=\"localhost\", port=6333)\n",
"\n",
"# create our vector store with hybrid indexing enabled\n",
"# batch_size controls how many nodes are encoded with sparse vectors at once\n",
"vector_store = QdrantVectorStore(\n",
" \"llama2_paper\", client=client, enable_hybrid=True, batch_size=20\n",
" \"llama2_paper\",\n",
" client=client,\n",
" aclient=aclient,\n",
" enable_hybrid=True,\n",
" batch_size=20,\n",
")\n",
"\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
Expand Down Expand Up @@ -155,7 +313,7 @@
{
"data": {
"text/markdown": [
"Llama2 was specifically trained differently from Llama1 by making several changes to improve performance. These changes included performing more robust data cleaning, updating the data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models."
"Llama 2 was specifically trained differently from Llama 1 by making changes such as performing more robust data cleaning, updating data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models. Additionally, Llama 2 adopted most of the pretraining setting and model architecture from Llama 1 but included architectural enhancements like increased context length and grouped-query attention."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand Down Expand Up @@ -207,7 +365,7 @@
{
"data": {
"text/markdown": [
"Llama 2 was specifically trained differently from Llama 1 by making several changes to improve performance. These changes included performing more robust data cleaning, updating the data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models. These modifications were made to enhance the training process and optimize the performance of Llama 2 compared to Llama 1."
"Llama 2 was specifically trained differently from Llama 1 by making changes to improve performance, such as performing more robust data cleaning, updating data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention (GQA) to improve inference scalability for larger models."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand Down Expand Up @@ -261,15 +419,12 @@
"from llama_index.core import VectorStoreIndex, StorageContext\n",
"from llama_index.core import Settings\n",
"from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
"from qdrant_client import AsyncQdrantClient\n",
"\n",
"\n",
"# creates a persistant index to disk\n",
"aclient = AsyncQdrantClient(path=\"./qdrant_data_async\")\n",
"\n",
"# create our vector store with hybrid indexing enabled\n",
"vector_store = QdrantVectorStore(\n",
" collection_name=\"llama2_paper\",\n",
" client=client,\n",
" aclient=aclient,\n",
" enable_hybrid=True,\n",
" batch_size=20,\n",
Expand Down Expand Up @@ -300,9 +455,9 @@
"\n",
"### Customizing Sparse Vector Generation\n",
"\n",
"By default, sparse vector generation is done using seperate models for queries and documents -- `\"naver/efficient-splade-VI-BT-large-doc\"` and `\"naver/efficient-splade-VI-BT-large-query\"`\n",
"Sparse vector generation can be done using a single model, or sometimes distinct seperate models for queries and documents. Here we use two -- `\"naver/efficient-splade-VI-BT-large-doc\"` and `\"naver/efficient-splade-VI-BT-large-query\"`\n",
"\n",
"Below is the default code for generating the sparse vectors and how you can set the functionality in the constructor. You can use this and customize as needed."
"Below is the sample code for generating the sparse vectors and how you can set the functionality in the constructor. You can use this and customize as needed."
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -748,8 +748,6 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult:
try:
node = metadata_dict_to_node(payload)
except Exception:
# NOTE: deprecated legacy logic for backward compatibility
logger.debug("Failed to parse Node metadata, fallback to legacy logic.")
metadata, node_info, relationships = legacy_metadata_dict_to_node(
payload
)
Expand Down Expand Up @@ -871,11 +869,10 @@ def _build_query_filter(self, query: VectorStoreQuery) -> Optional[Any]:
return Filter(must=must_conditions)

def use_old_sparse_encoder(self, collection_name: str) -> bool:
collection_info = self.client.get_collection(collection_name)
return (
self._collection_exists(collection_name)
and collection_info.config.params.sparse_vectors is not None
and SPARSE_VECTOR_NAME_OLD in collection_info.config.params.sparse_vectors
and SPARSE_VECTOR_NAME_OLD
in self.client.get_collection(collection_name).config.params.vectors
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-qdrant"
readme = "README.md"
version = "0.2.1"
version = "0.2.2"

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
Expand Down

0 comments on commit 77459a7

Please sign in to comment.