Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update redis vector store #12386

Merged
2 changes: 1 addition & 1 deletion docs/docs/community/integrations/vector_stores.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ as the storage backend for `VectorStoreIndex`.
- Pinecone (`PineconeVectorStore`). [Installation/Quickstart](https://docs.pinecone.io/docs/quickstart).
- Qdrant (`QdrantVectorStore`) [Installation](https://qdrant.tech/documentation/install/) [Python Client](https://qdrant.tech/documentation/install/#python-client)
- LanceDB (`LanceDBVectorStore`) [Installation/Quickstart](https://lancedb.github.io/lancedb/basic/)
- Redis (`RedisVectorStore`). [Installation](https://redis.io/docs/getting-started/installation/).
- Redis (`RedisVectorStore`). [Installation](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/).
- Supabase (`SupabaseVectorStore`). [Quickstart](https://supabase.github.io/vecs/api/).
- TiDB (`TiDBVectorStore`). [Quickstart](../../examples/vector_stores/TiDBVector.ipynb). [Installation](https://tidb.cloud/ai). [Python Client](https://github.com/pingcap/tidb-vector-python).
- TimeScale (`TimescaleVectorStore`). [Installation](https://github.com/timescale/python-vector).
Expand Down
174 changes: 142 additions & 32 deletions docs/docs/examples/ingestion/ingestion_gdrive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,107 @@
" IngestionPipeline,\n",
" IngestionCache,\n",
")\n",
"from llama_index.core.ingestion.cache import RedisCache\n",
"from llama_index.storage.kvstore.redis import RedisKVStore as RedisCache\n",
"from llama_index.storage.docstore.redis import RedisDocumentStore\n",
"from llama_index.core.node_parser import SentenceSplitter\n",
"from llama_index.vector_stores.redis import RedisVectorStore"
"from llama_index.vector_stores.redis import RedisVectorStore\n",
"\n",
"from redisvl.schema import IndexSchema"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "baf744be",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ac74203675564f14b73882a6ae270d18",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors: 0%| | 0.00/133M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c93811def32744ce870253a77767777e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/366 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8c237673c9ec4e22a4eba34c934cc322",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f66602de35274bb299d100783e73a01b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0%| | 0.00/711k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7cc44d9f4fd84913b403a05124e71d9a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/125 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f3a4992e06c44f2aac3f1a4d21e49065",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"1_Pooling/config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")"
]
},
{
Expand All @@ -113,15 +210,32 @@
"metadata": {},
"outputs": [],
"source": [
"vector_store = RedisVectorStore(\n",
" index_name=\"redis_vector_store\",\n",
" index_prefix=\"vectore_store\",\n",
" redis_url=\"redis://localhost:6379\",\n",
"custom_schema = IndexSchema.from_dict(\n",
" {\n",
" \"index\": {\"name\": \"gdrive\", \"prefix\": \"doc\"},\n",
" # customize fields that are indexed\n",
" \"fields\": [\n",
" # required fields for llamaindex\n",
" {\"type\": \"tag\", \"name\": \"id\"},\n",
" {\"type\": \"tag\", \"name\": \"doc_id\"},\n",
" {\"type\": \"text\", \"name\": \"text\"},\n",
" # custom vector field for bge-small-en-v1.5 embeddings\n",
" {\n",
" \"type\": \"vector\",\n",
" \"name\": \"vector\",\n",
" \"attrs\": {\n",
" \"dims\": 384,\n",
" \"algorithm\": \"hnsw\",\n",
" \"distance_metric\": \"cosine\",\n",
" },\n",
" },\n",
" ],\n",
" }\n",
")\n",
"\n",
"cache = IngestionCache(\n",
" cache=RedisCache.from_host_and_port(\"localhost\", 6379),\n",
" collection=\"redis_cache\",\n",
"vector_store = RedisVectorStore(\n",
" schema=custom_schema,\n",
" redis_url=\"redis://localhost:6379\",\n",
")"
]
},
Expand All @@ -133,19 +247,31 @@
"outputs": [],
"source": [
"# Optional: clear vector store if exists\n",
"if vector_store._index_exists():\n",
"if vector_store.index_exists():\n",
" vector_store.delete_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6d98845",
"metadata": {},
"outputs": [],
"source": [
"# Set up the ingestion cache layer\n",
"cache = IngestionCache(\n",
" cache=RedisCache.from_host_and_port(\"localhost\", 6379),\n",
" collection=\"redis_cache\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3be817bd-81a1-436f-8f92-3eb48531c915",
"metadata": {},
"outputs": [],
"source": [
"embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
"\n",
"pipeline = IngestionPipeline(\n",
" transformations=[\n",
" SentenceSplitter(),\n",
Expand Down Expand Up @@ -239,15 +365,7 @@
"execution_count": null,
"id": "c77f74b2-9bbe-46d6-b35f-23ea757b315b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ingested 6 Nodes\n"
]
}
],
"outputs": [],
"source": [
"nodes = pipeline.run(documents=docs)\n",
"print(f\"Ingested {len(nodes)} Nodes\")"
Expand Down Expand Up @@ -326,15 +444,7 @@
"execution_count": null,
"id": "d490fbb8-82ec-4284-a19d-1a8ca69da2a4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ingested 1 Nodes\n"
]
}
],
"outputs": [],
"source": [
"docs = load_data(folder_id=\"1RFhr3-KmOZCR5rtp4dlOMNl3LKe1kOA5\")\n",
"nodes = pipeline.run(documents=docs)\n",
Expand Down Expand Up @@ -398,9 +508,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "llama_index_v2",
"display_name": "llama-index-vector-stores-redis-MBNLFpFJ-py3.9",
"language": "python",
"name": "llama_index_v2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand Down
78 changes: 41 additions & 37 deletions docs/docs/examples/ingestion/redis_ingestion_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,6 @@
"%pip install llama-index-embeddings-huggingface"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: redis in /home/loganm/.cache/pypoetry/virtualenvs/llama-index-4a-wkI5X-py3.11/lib/python3.11/site-packages (5.0.1)\n",
"Requirement already satisfied: async-timeout>=4.0.2 in /home/loganm/.cache/pypoetry/virtualenvs/llama-index-4a-wkI5X-py3.11/lib/python3.11/site-packages (from redis) (4.0.3)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install redis"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -75,7 +54,8 @@
"source": [
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\""
]
},
{
Expand All @@ -102,16 +82,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/loganm/.cache/pypoetry/virtualenvs/llama-index-4a-wkI5X-py3.11/lib/python3.11/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.8.9) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"from llama_index.core import SimpleDirectoryReader\n",
"\n",
Expand Down Expand Up @@ -144,13 +115,46 @@
" IngestionPipeline,\n",
" IngestionCache,\n",
")\n",
"from llama_index.core.ingestion.cache import RedisCache\n",
"from llama_index.storage.kvstore.redis import RedisKVStore as RedisCache\n",
"from llama_index.storage.docstore.redis import RedisDocumentStore\n",
"from llama_index.core.node_parser import SentenceSplitter\n",
"from llama_index.vector_stores.redis import RedisVectorStore\n",
"\n",
"from redisvl.schema import IndexSchema\n",
"\n",
"\n",
"embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
"\n",
"custom_schema = IndexSchema.from_dict(\n",
" {\n",
" \"index\": {\"name\": \"redis_vector_store\", \"prefix\": \"doc\"},\n",
" # customize fields that are indexed\n",
" \"fields\": [\n",
" # required fields for llamaindex\n",
" {\"type\": \"tag\", \"name\": \"id\"},\n",
" {\"type\": \"tag\", \"name\": \"doc_id\"},\n",
" {\"type\": \"text\", \"name\": \"text\"},\n",
" # custom vector field for bge-small-en-v1.5 embeddings\n",
" {\n",
" \"type\": \"vector\",\n",
" \"name\": \"vector\",\n",
" \"attrs\": {\n",
" \"dims\": 384,\n",
" \"algorithm\": \"hnsw\",\n",
" \"distance_metric\": \"cosine\",\n",
" },\n",
" },\n",
" ],\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline = IngestionPipeline(\n",
" transformations=[\n",
" SentenceSplitter(),\n",
Expand All @@ -160,8 +164,7 @@
" \"localhost\", 6379, namespace=\"document_store\"\n",
" ),\n",
" vector_store=RedisVectorStore(\n",
" index_name=\"redis_vector_store\",\n",
" index_prefix=\"vectore_store\",\n",
" schema=custom_schema,\n",
" redis_url=\"redis://localhost:6379\",\n",
" ),\n",
" cache=IngestionCache(\n",
Expand Down Expand Up @@ -221,7 +224,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"I see two documents: \"test2.txt\" and \"test1.txt\".\n"
"I see two documents.\n"
]
}
],
Expand Down Expand Up @@ -261,6 +264,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"13:32:07 redisvl.index.index INFO Index already exists, not overwriting.\n",
"Ingested 2 Nodes\n"
]
}
Expand All @@ -284,7 +288,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"I see three documents: test3.txt, test1.txt, and test2.txt.\n",
"You see three documents: test3.txt, test1.txt, and test2.txt.\n",
"This is a test file: three!\n",
"This is a NEW test file: one!\n",
"This is a test file: two!\n"
Expand Down