Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/_templates/layout.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% extends "!layout.html" %}

{% block footer %}
{{ super() }}
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-2EW85FTY8C"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());

gtag('config', 'G-2EW85FTY8C');
</script>
{% endblock %}
176 changes: 66 additions & 110 deletions docs/user_guide/getting_started_01.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"outputs": [],
"source": [
"import numpy as np\n",
"from pprint import pprint\n",
"from jupyterutils import table_print\n",
"\n",
"data = [\n",
" {'user': 'john', 'age': 1, 'job': 'engineer', 'credit_score': 'high'},\n",
Expand All @@ -53,7 +53,9 @@
"source": [
"This will make up 3 entries in Redis (hashes) each with 4 sub-keys (users, age, job, credit_score).\n",
"\n",
"Now, we want to add vectors to represent each user. These are just dummy vectors to illustrate the point, but more complex vectors can be created and used as well. For more information on creating embeddings, see this [article](https://mlops.community/vector-similarity-search-from-basics-to-production/).\n"
"Now, we want to add vectors to represent each user. These are just dummy vectors to illustrate the point, but more complex vectors can be created and used as well. For more information on creating embeddings, see this [article](https://mlops.community/vector-similarity-search-from-basics-to-production/).\n",
"\n",
"As seen below, the sample vectors need to be turned into bytes before they can be loaded into Redis. Using ``NumPy``, this is fairly trivial."
]
},
{
Expand All @@ -62,25 +64,16 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'age': 1,\n",
" 'credit_score': 'high',\n",
" 'job': 'engineer',\n",
" 'user': 'john',\n",
" 'user_embedding': b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?'},\n",
" {'age': 2,\n",
" 'credit_score': 'low',\n",
" 'job': 'doctor',\n",
" 'user': 'mary',\n",
" 'user_embedding': b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?'},\n",
" {'age': 3,\n",
" 'credit_score': 'medium',\n",
" 'job': 'dentist',\n",
" 'user': 'joe',\n",
" 'user_embedding': b'fff?fff?\\xcd\\xcc\\xcc='}]\n"
]
"data": {
"text/html": [
"<table><tr><th>user</th><th>age</th><th>job</th><th>credit_score</th><th>user_embedding</th></tr><tr><td>john</td><td>1</td><td>engineer</td><td>high</td><td>b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?'</td></tr><tr><td>mary</td><td>2</td><td>doctor</td><td>low</td><td>b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?'</td></tr><tr><td>joe</td><td>3</td><td>dentist</td><td>medium</td><td>b'fff?fff?\\xcd\\xcc\\xcc='</td></tr></table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
Expand All @@ -94,17 +87,16 @@
"for record, vector in zip(data, vectors):\n",
" record[\"user_embedding\"] = vector\n",
"\n",
"pprint(data)"
"table_print(data)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"As seen above, the vectors themselves need to be turned into bytes before they can be loaded into Redis. Using ``NumPy``, this is fairly trivial. \n",
"\n",
"Our dataset is now ready to be used with ``redisvl``"
"Our dataset is now ready to be used with ``redisvl``. The next step is to define the schema for the data."
]
},
{
Expand Down Expand Up @@ -146,9 +138,8 @@
" # define vector fields\n",
" vector:\n",
" - name: user_embedding\n",
" algorithm: hnsw\n",
" distance_metric: cosine\n",
"\n",
" algorithm: hnsw\n",
" distance_metric: cosine\n",
"\n",
"```\n",
"\n",
Expand Down Expand Up @@ -213,15 +204,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m16:42:28\u001b[0m \u001b[35msam.partee-NW9MQX5Y74\u001b[0m \u001b[34mredisvl.cli.index[4009]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
"\u001b[32m16:42:28\u001b[0m \u001b[35msam.partee-NW9MQX5Y74\u001b[0m \u001b[34mredisvl.cli.index[4009]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n"
"\u001b[32m13:15:04\u001b[0m \u001b[35msam.partee-NW9MQX5Y74\u001b[0m \u001b[34mredisvl.cli.index[13683]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n",
"\u001b[32m13:15:04\u001b[0m \u001b[35msam.partee-NW9MQX5Y74\u001b[0m \u001b[34mredisvl.cli.index[13683]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n"
]
}
],
Expand All @@ -241,7 +232,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -261,11 +252,25 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.653301358223</td><td>joe</td><td>3</td><td>dentist</td><td>medium</td></tr></table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from redisvl.query import VectorQuery\n",
"from jupyterutils import result_print\n",
"\n",
"# create a vector query returning a number of results\n",
"# with specific fields to return.\n",
Expand All @@ -277,31 +282,8 @@
")\n",
"\n",
"# use the SearchIndex instance (or Redis client) to execute the query\n",
"results = index.query(query)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Score: 0\n",
"Document {'id': 'v1:john', 'payload': None, 'vector_distance': '0', 'user': 'john', 'age': '1', 'job': 'engineer', 'credit_score': 'high'}\n",
"Score: 0\n",
"Document {'id': 'v1:mary', 'payload': None, 'vector_distance': '0', 'user': 'mary', 'age': '2', 'job': 'doctor', 'credit_score': 'low'}\n",
"Score: 0.653301358223\n",
"Document {'id': 'v1:joe', 'payload': None, 'vector_distance': '0.653301358223', 'user': 'joe', 'age': '3', 'job': 'dentist', 'credit_score': 'medium'}\n"
]
}
],
"source": [
"for doc in results.docs:\n",
" print(\"Score:\", doc.vector_distance)\n",
" print(doc)\n"
"results = index.query(query)\n",
"result_print(results)"
]
},
{
Expand All @@ -315,44 +297,29 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"ename": "ResponseError",
"evalue": "Unknown Index name",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mResponseError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[11], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m redis_client \u001b[39m=\u001b[39m Redis(\u001b[39m\"\u001b[39m\u001b[39mlocalhost\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m6379\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[39m# create a new SearchIndex instance from an existing index\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m existing_index \u001b[39m=\u001b[39m SearchIndex\u001b[39m.\u001b[39;49mfrom_existing(redis_client, \u001b[39m\"\u001b[39;49m\u001b[39muser_index\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 9\u001b[0m \u001b[39m# run the same query\u001b[39;00m\n\u001b[1;32m 10\u001b[0m existing_index\u001b[39m.\u001b[39mquery(query)\n",
"File \u001b[0;32m~/Dropbox/Redis/data-loader/redisvl/index.py:107\u001b[0m, in \u001b[0;36mfrom_existing\u001b[0;34m(cls, client, index_name)\u001b[0m\n\u001b[1;32m 103\u001b[0m @classmethod\n\u001b[1;32m 104\u001b[0m def from_existing(cls, client: redis.Redis, index_name: str):\n\u001b[1;32m 105\u001b[0m \"\"\"Create a SearchIndex from an existing index in Redis\"\"\"\n\u001b[1;32m 106\u001b[0m # TODO assert client connected\n\u001b[0;32m--> 107\u001b[0m # TODO try/except\n\u001b[1;32m 108\u001b[0m info = convert_bytes(client.ft(index_name).info()) # TODO catch response error\n\u001b[1;32m 109\u001b[0m index_definition = make_dict(info[\"index_definition\"])\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/commands/search/commands.py:370\u001b[0m, in \u001b[0;36mSearchCommands.info\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minfo\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 363\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 364\u001b[0m \u001b[39m Get info an stats about the the current index, including the number of\u001b[39;00m\n\u001b[1;32m 365\u001b[0m \u001b[39m documents, memory consumption, etc\u001b[39;00m\n\u001b[1;32m 366\u001b[0m \n\u001b[1;32m 367\u001b[0m \u001b[39m For more information see `FT.INFO <https://redis.io/commands/ft.info>`_.\u001b[39;00m\n\u001b[1;32m 368\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 370\u001b[0m res \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mexecute_command(INFO_CMD, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mindex_name)\n\u001b[1;32m 371\u001b[0m it \u001b[39m=\u001b[39m \u001b[39mmap\u001b[39m(to_string, res)\n\u001b[1;32m 372\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mdict\u001b[39m(\u001b[39mzip\u001b[39m(it, it))\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/client.py:1269\u001b[0m, in \u001b[0;36mRedis.execute_command\u001b[0;34m(self, *args, **options)\u001b[0m\n\u001b[1;32m 1266\u001b[0m conn \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnection \u001b[39mor\u001b[39;00m pool\u001b[39m.\u001b[39mget_connection(command_name, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39moptions)\n\u001b[1;32m 1268\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1269\u001b[0m \u001b[39mreturn\u001b[39;00m conn\u001b[39m.\u001b[39;49mretry\u001b[39m.\u001b[39;49mcall_with_retry(\n\u001b[1;32m 1270\u001b[0m \u001b[39mlambda\u001b[39;49;00m: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_command_parse_response(\n\u001b[1;32m 1271\u001b[0m conn, command_name, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49moptions\n\u001b[1;32m 1272\u001b[0m ),\n\u001b[1;32m 1273\u001b[0m \u001b[39mlambda\u001b[39;49;00m error: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_disconnect_raise(conn, error),\n\u001b[1;32m 1274\u001b[0m )\n\u001b[1;32m 1275\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnection:\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/retry.py:46\u001b[0m, in \u001b[0;36mRetry.call_with_retry\u001b[0;34m(self, do, fail)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 45\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 46\u001b[0m \u001b[39mreturn\u001b[39;00m do()\n\u001b[1;32m 47\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_supported_errors \u001b[39mas\u001b[39;00m error:\n\u001b[1;32m 48\u001b[0m failures \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/client.py:1270\u001b[0m, in \u001b[0;36mRedis.execute_command.<locals>.<lambda>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1266\u001b[0m conn \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnection \u001b[39mor\u001b[39;00m pool\u001b[39m.\u001b[39mget_connection(command_name, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39moptions)\n\u001b[1;32m 1268\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1269\u001b[0m \u001b[39mreturn\u001b[39;00m conn\u001b[39m.\u001b[39mretry\u001b[39m.\u001b[39mcall_with_retry(\n\u001b[0;32m-> 1270\u001b[0m \u001b[39mlambda\u001b[39;00m: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_command_parse_response(\n\u001b[1;32m 1271\u001b[0m conn, command_name, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49moptions\n\u001b[1;32m 1272\u001b[0m ),\n\u001b[1;32m 1273\u001b[0m \u001b[39mlambda\u001b[39;00m error: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_disconnect_raise(conn, error),\n\u001b[1;32m 1274\u001b[0m )\n\u001b[1;32m 1275\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnection:\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/client.py:1246\u001b[0m, in \u001b[0;36mRedis._send_command_parse_response\u001b[0;34m(self, conn, command_name, *args, **options)\u001b[0m\n\u001b[1;32m 1242\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 1243\u001b[0m \u001b[39mSend a command and parse the response\u001b[39;00m\n\u001b[1;32m 1244\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 1245\u001b[0m conn\u001b[39m.\u001b[39msend_command(\u001b[39m*\u001b[39margs)\n\u001b[0;32m-> 1246\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparse_response(conn, command_name, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49moptions)\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/client.py:1286\u001b[0m, in \u001b[0;36mRedis.parse_response\u001b[0;34m(self, connection, command_name, **options)\u001b[0m\n\u001b[1;32m 1284\u001b[0m options\u001b[39m.\u001b[39mpop(NEVER_DECODE)\n\u001b[1;32m 1285\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1286\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mread_response()\n\u001b[1;32m 1287\u001b[0m \u001b[39mexcept\u001b[39;00m ResponseError:\n\u001b[1;32m 1288\u001b[0m \u001b[39mif\u001b[39;00m EMPTY_RESPONSE \u001b[39min\u001b[39;00m options:\n",
"File \u001b[0;32m~/.virtualenvs/rvl/lib/python3.8/site-packages/redis/connection.py:905\u001b[0m, in \u001b[0;36mAbstractConnection.read_response\u001b[0;34m(self, disable_decoding, disconnect_on_error)\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnext_health_check \u001b[39m=\u001b[39m time() \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhealth_check_interval\n\u001b[1;32m 904\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(response, ResponseError):\n\u001b[0;32m--> 905\u001b[0m \u001b[39mraise\u001b[39;00m response\n\u001b[1;32m 906\u001b[0m \u001b[39mreturn\u001b[39;00m response\n",
"\u001b[0;31mResponseError\u001b[0m: Unknown Index name"
]
"data": {
"text/html": [
"<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.653301358223</td><td>joe</td><td>3</td><td>dentist</td><td>medium</td></tr></table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from redis import Redis\n",
"\n",
"# initialize a redis client to connect to existing index\n",
"redis_client = Redis(\"localhost\", 6379)\n",
"\n",
"# create a new SearchIndex instance from an existing index\n",
"existing_index = SearchIndex.from_existing(redis_client, \"user_index\")\n",
"existing_index = SearchIndex.from_existing(\"user_index\", \"redis://localhost:6379\")\n",
"\n",
"# run the same query\n",
"existing_index.query(query)\n",
"\n",
"for doc in results.docs:\n",
" print(\"Score:\", doc.vector_distance)\n",
" print(doc)"
"results = existing_index.query(query)\n",
"result_print(results)\n"
]
},
{
Expand All @@ -366,25 +333,24 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Score: 0\n",
"Document {'id': 'v1:john', 'payload': None, 'vector_distance': '0', 'user': 'john', 'age': '1', 'job': 'engineer', 'credit_score': 'high'}\n",
"Score: 0\n",
"Document {'id': 'v1:mary', 'payload': None, 'vector_distance': '0', 'user': 'mary', 'age': '2', 'job': 'doctor', 'credit_score': 'low'}\n",
"Score: 0.653301358223\n",
"Document {'id': 'v1:joe', 'payload': None, 'vector_distance': '0.653301358223', 'user': 'joe', 'age': '3', 'job': 'dentist', 'credit_score': 'medium'}\n"
]
"data": {
"text/html": [
"<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.653301358223</td><td>joe</td><td>3</td><td>dentist</td><td>medium</td></tr></table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Asynchronous Search\n",
"\n",
"from redisvl.index import AsyncSearchIndex\n",
"\n",
"# construct a search index from the schema\n",
Expand All @@ -401,18 +367,8 @@
"\n",
"# run the same vector query but asynchronously\n",
"results = await index.query(query)\n",
"\n",
"for doc in results.docs:\n",
" print(\"Score:\", doc.vector_distance)\n",
" print(doc)"
"result_print(results)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down