diff --git a/docs/user_guide/05_hash_vs_json.ipynb b/docs/user_guide/05_hash_vs_json.ipynb index 550bec55..b6e9d71a 100644 --- a/docs/user_guide/05_hash_vs_json.ipynb +++ b/docs/user_guide/05_hash_vs_json.ipynb @@ -705,19 +705,13 @@ "results" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Cleanup" - ] - }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ + "# Cleanup\n", "bike_index.delete()" ] } diff --git a/docs/user_guide/11_advanced_queries.ipynb b/docs/user_guide/11_advanced_queries.ipynb new file mode 100644 index 00000000..1c737f21 --- /dev/null +++ b/docs/user_guide/11_advanced_queries.ipynb @@ -0,0 +1,1151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Advanced Query Types\n", + "\n", + "In this notebook, we will explore advanced query types available in RedisVL:\n", + "\n", + "1. **`TextQuery`**: Full text search with advanced scoring\n", + "2. **`HybridQuery`**: Combines text and vector search for hybrid retrieval\n", + "3. **`MultiVectorQuery`**: Search over multiple vector fields simultaneously\n", + "\n", + "These query types are powerful tools for building sophisticated search applications that go beyond simple vector similarity search.\n", + "\n", + "Prerequisites:\n", + "- Ensure `redisvl` is installed in your Python environment.\n", + "- Have a running instance of [Redis Stack](https://redis.io/docs/install/install-stack/) or [Redis Cloud](https://redis.io/cloud).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup and Data Preparation\n", + "\n", + "First, let's create a schema and prepare sample data that includes text fields, numeric fields, and vector fields." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from jupyterutils import result_print\n", + "\n", + "# Sample data with text descriptions, categories, and vectors\n", + "data = [\n", + " {\n", + " 'product_id': 'prod_1',\n", + " 'brief_description': 'comfortable running shoes for athletes',\n", + " 'full_description': 'Engineered with a dual-layer EVA foam midsole and FlexWeave breathable mesh upper, these running shoes deliver responsive cushioning for long-distance runs. The anatomical footbed adapts to your stride while the carbon rubber outsole provides superior traction on varied terrain.',\n", + " 'category': 'footwear',\n", + " 'price': 89.99,\n", + " 'rating': 4.5,\n", + " 'text_embedding': np.array([0.1, 0.2, 0.1], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.8, 0.1], dtype=np.float32).tobytes(),\n", + " },\n", + " {\n", + " 'product_id': 'prod_2',\n", + " 'brief_description': 'lightweight running jacket with water resistance',\n", + " 'full_description': 'Stay protected with this ultralight 2.5-layer DWR-coated shell featuring laser-cut ventilation zones and reflective piping for low-light visibility. Packs into its own chest pocket and weighs just 4.2 oz, making it ideal for unpredictable weather conditions.',\n", + " 'category': 'outerwear',\n", + " 'price': 129.99,\n", + " 'rating': 4.8,\n", + " 'text_embedding': np.array([0.2, 0.3, 0.2], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.7, 0.2], dtype=np.float32).tobytes(),\n", + " },\n", + " {\n", + " 'product_id': 'prod_3',\n", + " 'brief_description': 'professional tennis racket for competitive players',\n", + " 'full_description': 'Competition-grade racket featuring a 98 sq in head size, 16x19 string pattern, and aerospace-grade graphite frame that delivers explosive power with pinpoint control. Tournament-approved specs include 315g weight and 68 RA stiffness rating for advanced baseline play.',\n", + " 'category': 'equipment',\n", + " 'price': 199.99,\n", + " 'rating': 4.9,\n", + " 'text_embedding': np.array([0.9, 0.1, 0.05], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.1, 0.9], dtype=np.float32).tobytes(),\n", + " },\n", + " {\n", + " 'product_id': 'prod_4',\n", + " 'brief_description': 'yoga mat with extra cushioning for comfort',\n", + " 'full_description': 'Premium 8mm thick TPE yoga mat with dual-texture surface - smooth side for hot yoga flow and textured side for maximum grip during balancing poses. Closed-cell technology prevents moisture absorption while alignment markers guide proper positioning in asanas.',\n", + " 'category': 'accessories',\n", + " 'price': 39.99,\n", + " 'rating': 4.3,\n", + " 'text_embedding': np.array([0.15, 0.25, 0.15], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.5, 0.5], dtype=np.float32).tobytes(),\n", + " },\n", + " {\n", + " 'product_id': 'prod_5',\n", + " 'brief_description': 'basketball shoes with excellent ankle support',\n", + " 'full_description': 'High-top basketball sneakers with Zoom Air units in forefoot and heel, reinforced lateral sidewalls for explosive cuts, and herringbone traction pattern optimized for hardwood courts. The internal bootie construction and extended ankle collar provide lockdown support during aggressive drives.',\n", + " 'category': 'footwear',\n", + " 'price': 139.99,\n", + " 'rating': 4.7,\n", + " 'text_embedding': np.array([0.12, 0.18, 0.12], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.75, 0.15], dtype=np.float32).tobytes(),\n", + " },\n", + " {\n", + " 'product_id': 'prod_6',\n", + " 'brief_description': 'swimming goggles with anti-fog coating',\n", + " 'full_description': 'Low-profile competition goggles with curved polycarbonate lenses offering 180-degree peripheral vision and UV protection. Hydrophobic anti-fog coating lasts 10x longer than standard treatments, while the split silicone strap and interchangeable nose bridges ensure a watertight, custom fit.',\n", + " 'category': 'accessories',\n", + " 'price': 24.99,\n", + " 'rating': 4.4,\n", + " 'text_embedding': np.array([0.3, 0.1, 0.2], dtype=np.float32).tobytes(),\n", + " 'image_embedding': np.array([0.2, 0.8], dtype=np.float32).tobytes(),\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the Schema\n", + "\n", + "Our schema includes:\n", + "- **Tag fields**: `product_id`, `category`\n", + "- **Text fields**: `brief_description` and `full_description` for full-text search\n", + "- **Numeric fields**: `price`, `rating`\n", + "- **Vector fields**: `text_embedding` (3 dimensions) and `image_embedding` (2 dimensions) for semantic search" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "schema = {\n", + " \"index\": {\n", + " \"name\": \"advanced_queries\",\n", + " \"prefix\": \"products\",\n", + " \"storage_type\": \"hash\",\n", + " },\n", + " \"fields\": [\n", + " {\"name\": \"product_id\", \"type\": \"tag\"},\n", + " {\"name\": \"category\", \"type\": \"tag\"},\n", + " {\"name\": \"brief_description\", \"type\": \"text\"},\n", + " {\"name\": \"full_description\", \"type\": \"text\"},\n", + " {\"name\": \"price\", \"type\": \"numeric\"},\n", + " {\"name\": \"rating\", \"type\": \"numeric\"},\n", + " {\n", + " \"name\": \"text_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": 3,\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"image_embedding\",\n", + " \"type\": \"vector\",\n", + " \"attrs\": {\n", + " \"dims\": 2,\n", + " \"distance_metric\": \"cosine\",\n", + " \"algorithm\": \"flat\",\n", + " \"datatype\": \"float32\"\n", + " }\n", + " }\n", + " ],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Index and Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 6 products into the index\n" + ] + } + ], + "source": [ + "from redisvl.index import SearchIndex\n", + "\n", + "# Create the search index\n", + "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\")\n", + "\n", + "# Create the index and load data\n", + "index.create(overwrite=True)\n", + "keys = index.load(data)\n", + "\n", + "print(f\"Loaded {len(keys)} products into the index\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. TextQuery: Full Text Search\n", + "\n", + "The `TextQuery` class enables full text search with advanced scoring algorithms. It's ideal for keyword-based search with relevance ranking.\n", + "\n", + "### Basic Text Search\n", + "\n", + "Let's search for products related to \"running shoes\":" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_descriptioncategoryprice
5.953989333038773prod_1comfortable running shoes for athletesfootwear89.99
2.085315593627535prod_5basketball shoes with excellent ankle supportfootwear139.99
2.0410082774474088prod_2lightweight running jacket with water resistanceouterwear129.99
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from redisvl.query import TextQuery\n", + "\n", + "# Create a text query\n", + "text_query = TextQuery(\n", + " text=\"running shoes\",\n", + " text_field_name=\"brief_description\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(text_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text Search with Different Scoring Algorithms\n", + "\n", + "RedisVL supports multiple text scoring algorithms. Let's compare `BM25STD` and `TFIDF`:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with BM25 scoring:\n" + ] + }, + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_descriptionprice
6.031534703977659prod_1comfortable running shoes for athletes89.99
2.085315593627535prod_5basketball shoes with excellent ankle support139.99
1.5268074873573214prod_4yoga mat with extra cushioning for comfort39.99
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# BM25 standard scoring (default)\n", + "bm25_query = TextQuery(\n", + " text=\"comfortable shoes\",\n", + " text_field_name=\"brief_description\",\n", + " text_scorer=\"BM25STD\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"price\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"Results with BM25 scoring:\")\n", + "results = index.query(bm25_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with TFIDF scoring:\n" + ] + }, + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_descriptionprice
2.3333333333333335prod_1comfortable running shoes for athletes89.99
2.0prod_5basketball shoes with excellent ankle support139.99
1.0prod_4yoga mat with extra cushioning for comfort39.99
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# TFIDF scoring\n", + "tfidf_query = TextQuery(\n", + " text=\"comfortable shoes\",\n", + " text_field_name=\"brief_description\",\n", + " text_scorer=\"TFIDF\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"price\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"Results with TFIDF scoring:\")\n", + "results = index.query(tfidf_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text Search with Filters\n", + "\n", + "Combine text search with filters to narrow results:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_descriptioncategoryprice
3.9314935770863046prod_1comfortable running shoes for athletesfootwear89.99
3.1279733904413027prod_5basketball shoes with excellent ankle supportfootwear139.99
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from redisvl.query.filter import Tag, Num\n", + "\n", + "# Search for \"shoes\" only in the footwear category\n", + "filtered_text_query = TextQuery(\n", + " text=\"shoes\",\n", + " text_field_name=\"brief_description\",\n", + " filter_expression=Tag(\"category\") == \"footwear\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(filtered_text_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_descriptionprice
3.1541404034996914prod_1comfortable running shoes for athletes89.99
1.5268074873573214prod_4yoga mat with extra cushioning for comfort39.99
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Search for products under $100\n", + "price_filtered_query = TextQuery(\n", + " text=\"comfortable\",\n", + " text_field_name=\"brief_description\",\n", + " filter_expression=Num(\"price\") < 100,\n", + " return_fields=[\"product_id\", \"brief_description\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(price_filtered_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text Search with Multiple Fields and Weights\n", + "\n", + "You can search across multiple text fields with different weights to prioritize certain fields.\n", + "Here we'll prioritize the `brief_description` field and make text similarity in that field twice as important as text similarity in `full_description`:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_description
5.035440025836444prod_1comfortable running shoes for athletes
2.085315593627535prod_5basketball shoes with excellent ankle support
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "weighted_query = TextQuery(\n", + " text=\"shoes\",\n", + " text_field_name={\"brief_description\": 1.0, \"full_description\": 0.5},\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(weighted_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Text Search with Custom Stopwords\n", + "\n", + "Stopwords are common words that are filtered out before processing the query. You can specify which language's default stopwords should be filtered out, like `english`, `french`, or `german`. You can also define your own list of stopwords:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_description
5.953989333038773prod_1comfortable running shoes for athletes
2.085315593627535prod_5basketball shoes with excellent ankle support
2.0410082774474088prod_2lightweight running jacket with water resistance
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Use English stopwords (default)\n", + "query_with_stopwords = TextQuery(\n", + " text=\"the best shoes for running\",\n", + " text_field_name=\"brief_description\",\n", + " stopwords=\"english\", # Common words like \"the\", \"for\" will be removed\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(query_with_stopwords)\n", + "result_print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_description
3.1541404034996914prod_1comfortable running shoes for athletes
3.0864038416103prod_3professional tennis racket for competitive players
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Use custom stopwords\n", + "custom_stopwords_query = TextQuery(\n", + " text=\"professional equipment for athletes\",\n", + " text_field_name=\"brief_description\",\n", + " stopwords=[\"for\", \"with\"], # Only these words will be filtered\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(custom_stopwords_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_description
5.953989333038773prod_1comfortable running shoes for athletes
2.085315593627535prod_5basketball shoes with excellent ankle support
2.0410082774474088prod_2lightweight running jacket with water resistance
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# No stopwords\n", + "no_stopwords_query = TextQuery(\n", + " text=\"the best shoes for running\",\n", + " text_field_name=\"brief_description\",\n", + " stopwords=None, # All words will be included\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(no_stopwords_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. HybridQuery: Combining Text and Vector Search\n", + "\n", + "The `HybridQuery` combines text search and vector similarity to provide the best of both worlds:\n", + "- **Text search**: Finds exact keyword matches\n", + "- **Vector search**: Captures semantic similarity\n", + "\n", + "Results are scored using a weighted combination:\n", + "\n", + "```\n", + "hybrid_score = (alpha) * vector_score + (1 - alpha) * text_score\n", + "```\n", + "\n", + "Where `alpha` controls the balance between vector and text search (default: 0.7)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Basic Hybrid Query\n", + "\n", + "Let's search for \"running\" with both text and semantic search:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701985.953989333042.48619677905
0.00985252857208prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357142.085315593631.32214629309
0.00985252857208prod_2lightweight running jacket with water resistanceouterwear129.990.9950737357142.041008277451.30885409823
0.0038834810257prod_4yoga mat with extra cushioning for comfortaccessories39.990.99805825948700.698640781641
0.236237406731prod_6swimming goggles with anti-fog coatingaccessories24.990.88188129663500.617316907644
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from redisvl.query import HybridQuery\n", + "\n", + "# Create a hybrid query\n", + "hybrid_query = HybridQuery(\n", + " text=\"running shoes\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.1, 0.2, 0.1], # Query vector\n", + " vector_field_name=\"text_embedding\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(hybrid_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adjusting the Alpha Parameter\n", + "\n", + "The `alpha` parameter controls the weight between vector and text search:\n", + "- `alpha=1.0`: Pure vector search\n", + "- `alpha=0.0`: Pure text search\n", + "- `alpha=0.7` (default): 70% vector, 30% text" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with alpha=0.9 (vector-heavy):\n" + ] + }, + { + "data": { + "text/html": [ + "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.526807487361.05268080238
0.00136888027191prod_5basketball shoes with excellent ankle support0.99931555986400.899384003878
0.00136888027191prod_2lightweight running jacket with water resistance0.99931555986400.899384003878
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# More emphasis on vector search (alpha=0.9)\n", + "vector_heavy_query = HybridQuery(\n", + " text=\"comfortable\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.15, 0.25, 0.15],\n", + " vector_field_name=\"text_embedding\",\n", + " alpha=0.9, # 90% vector, 10% text\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"Results with alpha=0.9 (vector-heavy):\")\n", + "results = index.query(vector_heavy_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hybrid Query with Filters\n", + "\n", + "You can also combine hybrid search with filters:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005963.086403841611.62592119421
0.411657452583prod_5basketball shoes with excellent ankle supportfootwear139.990.79417127370800.555919891596
0.411657452583prod_2lightweight running jacket with water resistanceouterwear129.990.79417127370800.555919891596
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Hybrid search with a price filter\n", + "filtered_hybrid_query = HybridQuery(\n", + " text=\"professional equipment\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.9, 0.1, 0.05],\n", + " vector_field_name=\"text_embedding\",\n", + " filter_expression=Num(\"price\") > 100,\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(filtered_hybrid_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Different Text Scorers\n", + "\n", + "HybridQuery supports the same text scoring algorithms as TextQuery:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
0prod_5basketball shoes with excellent ankle support152.2
0prod_2lightweight running jacket with water resistance100.7
0.00136888027191prod_4yoga mat with extra cushioning for comfort0.99931555986400.699520891905
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Hybrid query with TFIDF scorer\n", + "hybrid_tfidf = HybridQuery(\n", + " text=\"shoes support\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.12, 0.18, 0.12],\n", + " vector_field_name=\"text_embedding\",\n", + " text_scorer=\"TFIDF\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(hybrid_tfidf)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. MultiVectorQuery: Multi-Vector Search\n", + "\n", + "The `MultiVectorQuery` allows you to search over multiple vector fields simultaneously. This is useful when you have different types of embeddings (e.g., text and image embeddings) and want to find results that match across multiple modalities.\n", + "\n", + "The final score is calculated as a weighted combination:\n", + "\n", + "```\n", + "combined_score = w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ...\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Basic Multi-Vector Query\n", + "\n", + "First, we need to import the `Vector` class to define our query vectors:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategoryscore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear0.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear0.9950737357140.9986668527130.996151670814
0.009852528572080.0118260979652prod_2lightweight running jacket with water resistanceouterwear0.9950737357140.9940869510170.994777700305
0.00388348102570.210647821426prod_4yoga mat with extra cushioning for comfortaccessories0.9980582594870.8946760892870.967043608427
0.2362374067310.639005899429prod_6swimming goggles with anti-fog coatingaccessories0.8818812966350.6804970502850.82146602273
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from redisvl.query import MultiVectorQuery, Vector\n", + "\n", + "# Define multiple vectors for the query\n", + "text_vector = Vector(\n", + " vector=[0.1, 0.2, 0.1],\n", + " field_name=\"text_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.7 # 70% weight for text embedding\n", + ")\n", + "\n", + "image_vector = Vector(\n", + " vector=[0.8, 0.1],\n", + " field_name=\"image_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.3 # 30% weight for image embedding\n", + ")\n", + "\n", + "# Create a multi-vector query\n", + "multi_vector_query = MultiVectorQuery(\n", + " vectors=[text_vector, image_vector],\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(multi_vector_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adjusting Vector Weights\n", + "\n", + "You can adjust the weights to prioritize different vector fields:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with emphasis on image similarity:\n" + ] + }, + { + "data": { + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategoryscore_0score_1combined_score
-1.19209289551e-070prod_3professional tennis racket for competitive playersequipment1.000000059611.00000001192
0.145393729210.00900757312775prod_6swimming goggles with anti-fog coatingaccessories0.9273031353950.9954962134360.981857597828
0.4366961717610.219131231308prod_4yoga mat with extra cushioning for comfortaccessories0.781651914120.8904343843460.868677890301
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# More emphasis on image similarity\n", + "text_vec = Vector(\n", + " vector=[0.9, 0.1, 0.05],\n", + " field_name=\"text_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.2 # 20% weight\n", + ")\n", + "\n", + "image_vec = Vector(\n", + " vector=[0.1, 0.9],\n", + " field_name=\"image_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.8 # 80% weight\n", + ")\n", + "\n", + "image_heavy_query = MultiVectorQuery(\n", + " vectors=[text_vec, image_vec],\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"Results with emphasis on image similarity:\")\n", + "results = index.query(image_heavy_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi-Vector Query with Filters\n", + "\n", + "Combine multi-vector search with filters to narrow results:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategorypricescore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357140.9986668527130.996510982513
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Multi-vector search with category filter\n", + "text_vec = Vector(\n", + " vector=[0.1, 0.2, 0.1],\n", + " field_name=\"text_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.6\n", + ")\n", + "\n", + "image_vec = Vector(\n", + " vector=[0.8, 0.1],\n", + " field_name=\"image_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.4\n", + ")\n", + "\n", + "filtered_multi_query = MultiVectorQuery(\n", + " vectors=[text_vec, image_vec],\n", + " filter_expression=Tag(\"category\") == \"footwear\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5\n", + ")\n", + "\n", + "results = index.query(filtered_multi_query)\n", + "result_print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparing Query Types\n", + "\n", + "Let's compare the three query types side by side:" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TextQuery Results (keyword-based):\n" + ] + }, + { + "data": { + "text/html": [ + "
scoreproduct_idbrief_description
2.8773943004779676prod_1comfortable running shoes for athletes
2.085315593627535prod_5basketball shoes with excellent ankle support
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# TextQuery - keyword-based search\n", + "text_q = TextQuery(\n", + " text=\"shoes\",\n", + " text_field_name=\"brief_description\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"TextQuery Results (keyword-based):\")\n", + "result_print(index.query(text_q))\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HybridQuery Results (text + vector):\n" + ] + }, + { + "data": { + "text/html": [ + "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
5.96046447754e-08prod_1comfortable running shoes for athletes0.9999999701982.877394300481.56321826928
0.0038834810257prod_4yoga mat with extra cushioning for comfort0.99805825948700.698640781641
0.00985252857208prod_2lightweight running jacket with water resistance0.99507373571400.696551615
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# HybridQuery - combines text and vector search\n", + "hybrid_q = HybridQuery(\n", + " text=\"shoes\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.1, 0.2, 0.1],\n", + " vector_field_name=\"text_embedding\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"HybridQuery Results (text + vector):\")\n", + "result_print(index.query(hybrid_q))\n", + "print()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultiVectorQuery Results (multiple vectors):\n" + ] + }, + { + "data": { + "text/html": [ + "
distance_0distance_1product_idbrief_descriptionscore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletes0.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle support0.9950737357140.9986668527130.996870294213
0.009852528572080.0118260979652prod_2lightweight running jacket with water resistance0.9950737357140.9940869510170.994580343366
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# MultiVectorQuery - searches multiple vector fields\n", + "mv_text = Vector(\n", + " vector=[0.1, 0.2, 0.1],\n", + " field_name=\"text_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.5\n", + ")\n", + "\n", + "mv_image = Vector(\n", + " vector=[0.8, 0.1],\n", + " field_name=\"image_embedding\",\n", + " dtype=\"float32\",\n", + " weight=0.5\n", + ")\n", + "\n", + "multi_q = MultiVectorQuery(\n", + " vectors=[mv_text, mv_image],\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3\n", + ")\n", + "\n", + "print(\"MultiVectorQuery Results (multiple vectors):\")\n", + "result_print(index.query(multi_q))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Best Practices\n", + "\n", + "### When to Use Each Query Type:\n", + "\n", + "1. **`TextQuery`**:\n", + " - When you need precise keyword matching\n", + " - For traditional search engine functionality\n", + " - When text relevance scoring is important\n", + " - Example: Product search, document retrieval\n", + "\n", + "2. **`HybridQuery`**:\n", + " - When you want to combine keyword and semantic search\n", + " - For improved search quality over pure text or vector search\n", + " - When you have both text and vector representations of your data\n", + " - Example: E-commerce search, content recommendation\n", + "\n", + "3. **`MultiVectorQuery`**:\n", + " - When you have multiple types of embeddings (text, image, audio, etc.)\n", + " - For multi-modal search applications\n", + " - When you want to balance multiple semantic signals\n", + " - Example: Image-text search, cross-modal retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup\n", + "index.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "redisvl-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 2b3ff292..ca900d63 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -21,4 +21,5 @@ User guides provide helpful resources for using RedisVL and its different compon 06_rerankers 07_message_history 08_semantic_router +11_advanced_queries ```