From e03ae4d3768a820f37847eb7b8dde4607ee0de34 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Thu, 6 Nov 2025 16:30:17 -0800 Subject: [PATCH 1/3] renames HybridQuery to AggregateHybridQuery --- redisvl/query/__init__.py | 2 + redisvl/query/aggregate.py | 31 +++++++-- tests/integration/test_aggregation.py | 71 ++++++++++++++++----- tests/unit/test_aggregation_types.py | 92 +++++++++++++++++++++------ 4 files changed, 156 insertions(+), 40 deletions(-) diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py index 8cae93b2..b561fe60 100644 --- a/redisvl/query/__init__.py +++ b/redisvl/query/__init__.py @@ -1,4 +1,5 @@ from redisvl.query.aggregate import ( + AggregateHybridQuery, AggregationQuery, HybridQuery, MultiVectorQuery, @@ -25,6 +26,7 @@ "CountQuery", "TextQuery", "AggregationQuery", + "AggregateHybridQuery", "HybridQuery", "MultiVectorQuery", "Vector", diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py index 2aecbbd7..0085e7bf 100644 --- a/redisvl/query/aggregate.py +++ b/redisvl/query/aggregate.py @@ -1,3 +1,4 @@ +import warnings from typing import Any, Dict, List, Optional, Set, Tuple, Union from pydantic import BaseModel, field_validator, model_validator @@ -53,20 +54,20 @@ def __init__(self, query_string): super().__init__(query_string) -class HybridQuery(AggregationQuery): +class AggregateHybridQuery(AggregationQuery): """ - HybridQuery combines text and vector search in Redis. + AggregateHybridQuery combines text and vector search in Redis. It allows you to perform a hybrid search using both text and vector similarity. It scores documents based on a weighted combination of text and vector similarity. .. code-block:: python - from redisvl.query import HybridQuery + from redisvl.query import AggregateHybridQuery from redisvl.index import SearchIndex index = SearchIndex.from_yaml("path/to/index.yaml") - query = HybridQuery( + query = AggregateHybridQuery( text="example text", text_field_name="text_field", vector=[0.1, 0.2, 0.3], @@ -105,7 +106,7 @@ def __init__( text_weights: Optional[Dict[str, float]] = None, ): """ - Instantiates a HybridQuery object. + Instantiates a AggregateHybridQuery object. Args: text (str): The text to search for. @@ -313,6 +314,26 @@ def __str__(self) -> str: return " ".join([str(x) for x in self.build_args()]) +class HybridQuery(AggregateHybridQuery): + """Backward compatibility wrapper for AggregateHybridQuery. + + .. deprecated:: + HybridQuery is a backward compatibility wrapper around AggregateHybridQuery + and will eventually be replaced with a new hybrid query implementation. + to maintain current functionality please use AggregateHybridQuery directly.", + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "HybridQuery is a backward compatibility wrapper around AggregateHybridQuery " + "and will eventually be replaced with a new hybrid query implementation. " + "to maintain current functionality please use AggregateHybridQuery directly.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(*args, **kwargs) + + class MultiVectorQuery(AggregationQuery): """ MultiVectorQuery allows for search over multiple vector fields in a document simultaneously. diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py index 924c3157..d7dec532 100644 --- a/tests/integration/test_aggregation.py +++ b/tests/integration/test_aggregation.py @@ -1,7 +1,7 @@ import pytest from redisvl.index import SearchIndex -from redisvl.query import HybridQuery, MultiVectorQuery, Vector +from redisvl.query import AggregateHybridQuery, HybridQuery, MultiVectorQuery, Vector from redisvl.query.filter import FilterExpression, Geo, GeoRadius, Num, Tag, Text from redisvl.redis.utils import array_to_buffer from tests.conftest import skip_if_redis_version_below @@ -89,7 +89,7 @@ def test_hybrid_query(index): vector_field = "user_embedding" return_fields = ["user", "credit_score", "age", "job", "location", "description"] - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -115,7 +115,7 @@ def test_hybrid_query(index): assert doc["job"] in ["engineer", "doctor", "dermatologist", "CEO", "dentist"] assert doc["credit_score"] in ["high", "low", "medium"] - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -141,7 +141,7 @@ def test_empty_query_string(): # test if text is empty with pytest.raises(ValueError): - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -151,7 +151,7 @@ def test_empty_query_string(): # test if text becomes empty after stopwords are removed text = "with a for but and" # will all be removed as default stopwords with pytest.raises(ValueError): - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -169,7 +169,7 @@ def test_hybrid_query_with_filter(index): return_fields = ["user", "credit_score", "age", "job", "location", "description"] filter_expression = (Tag("credit_score") == ("high")) & (Num("age") > 30) - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -195,7 +195,7 @@ def test_hybrid_query_with_geo_filter(index): return_fields = ["user", "credit_score", "age", "job", "location", "description"] filter_expression = Geo("location") == GeoRadius(-122.4194, 37.7749, 1000, "m") - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -219,7 +219,7 @@ def test_hybrid_query_alpha(index, alpha): vector = [0.1, 0.1, 0.5] vector_field = "user_embedding" - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -247,7 +247,7 @@ def test_hybrid_query_stopwords(index): vector_field = "user_embedding" alpha = 0.5 - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -282,7 +282,7 @@ def test_hybrid_query_with_text_filter(index): filter_expression = Text(text_field) == ("medical") # make sure we can still apply filters to the same text field we are querying - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -300,7 +300,7 @@ def test_hybrid_query_with_text_filter(index): filter_expression = (Text(text_field) == ("medical")) & ( (Text(text_field) != ("research")) ) - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -330,7 +330,7 @@ def test_hybrid_query_word_weights(index, scorer): weights = {"medical": 3.4, "cancers": 5} # test we can run a query with text weights - weighted_query = HybridQuery( + weighted_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -344,7 +344,7 @@ def test_hybrid_query_word_weights(index, scorer): assert len(weighted_results) == 7 # test that weights do change the scores on results - unweighted_query = HybridQuery( + unweighted_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -363,7 +363,7 @@ def test_hybrid_query_word_weights(index, scorer): # test that weights do change the document score and order of results weights = {"medical": 5, "cancers": 3.4} # switch the weights - weighted_query = HybridQuery( + weighted_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -377,7 +377,7 @@ def test_hybrid_query_word_weights(index, scorer): assert weighted_results != unweighted_results # test assigning weights on construction is equivalent to setting them on the query object - new_query = HybridQuery( + new_query = AggregateHybridQuery( text=text, text_field_name=text_field, vector=vector, @@ -743,3 +743,44 @@ def test_multivector_query_mixed_index(index): assert ( float(r["combined_score"]) - score <= 0.0001 ) # allow for small floating point error + + +def test_hybrid_query_backward_compatibility(index): + skip_if_redis_version_below(index.client, "7.2.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["user", "credit_score", "age", "job", "location", "description"] + + hybrid_query = AggregateHybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + return_fields=return_fields, + ) + + results = index.query(hybrid_query) + assert len(results) == 7 + for result in results: + assert result["user"] in [ + "john", + "derrick", + "nancy", + "tyler", + "tim", + "taimur", + "joe", + "mary", + ] + + with pytest.warns(DeprecationWarning): + _ = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + return_fields=return_fields, + ) diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py index a4462116..49eb1529 100644 --- a/tests/unit/test_aggregation_types.py +++ b/tests/unit/test_aggregation_types.py @@ -1,13 +1,33 @@ +import warnings +from contextlib import contextmanager + import pytest from redis.commands.search.aggregation import AggregateRequest from redis.commands.search.query import Query from redis.commands.search.result import Result from redisvl.index.index import process_results -from redisvl.query.aggregate import HybridQuery, MultiVectorQuery, Vector +from redisvl.query.aggregate import ( + AggregateHybridQuery, + HybridQuery, + MultiVectorQuery, + Vector, +) from redisvl.query.filter import Tag from redisvl.redis.utils import array_to_buffer + +@contextmanager +def assert_no_warnings(): + """Context manager that asserts no warnings are emitted.""" + with warnings.catch_warnings(record=True) as caught_warnings: + warnings.simplefilter("always") + yield + if caught_warnings: + warning_messages = [str(w.message) for w in caught_warnings] + pytest.fail(f"Expected no warnings, but got: {warning_messages}") + + # Sample data for testing sample_vector = [0.1, 0.2, 0.3, 0.4] sample_text = "the toon squad play basketball against a gang of aliens" @@ -22,7 +42,7 @@ def test_aggregate_hybrid_query(): text_field_name = "description" vector_field_name = "embedding" - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=sample_text, text_field_name=text_field_name, vector=sample_vector, @@ -52,7 +72,7 @@ def test_aggregate_hybrid_query(): stopwords = [] dialect = 2 - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=sample_text, text_field_name=text_field_name, vector=sample_vector, @@ -79,12 +99,12 @@ def test_aggregate_hybrid_query(): assert hybrid_query.stopwords == set() # Test stopwords are configurable - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( sample_text, text_field_name, sample_vector, vector_field_name, stopwords=None ) assert hybrid_query.stopwords == set([]) - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( sample_text, text_field_name, sample_vector, @@ -93,7 +113,7 @@ def test_aggregate_hybrid_query(): ) assert hybrid_query.stopwords == set(["the", "a", "of"]) - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( sample_text, text_field_name, sample_vector, @@ -103,7 +123,7 @@ def test_aggregate_hybrid_query(): assert hybrid_query.stopwords != set([]) with pytest.raises(ValueError): - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( sample_text, text_field_name, sample_vector, @@ -112,7 +132,7 @@ def test_aggregate_hybrid_query(): ) with pytest.raises(TypeError): - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( sample_text, text_field_name, sample_vector, @@ -122,9 +142,9 @@ def test_aggregate_hybrid_query(): def test_hybrid_query_with_string_filter(): - """Test that HybridQuery correctly includes string filter expressions in query string. + """Test that AggregateHybridQuery correctly includes string filter expressions in query string. - This test ensures that when a string filter expression is passed to HybridQuery, + This test ensures that when a string filter expression is passed to AggregateHybridQuery, it's properly included in the generated query string and not set to empty. Regression test for bug where string filters were being ignored. """ @@ -134,7 +154,7 @@ def test_hybrid_query_with_string_filter(): # Test with string filter expression - should include filter in query string string_filter = "@category:{tech|science|engineering}" - hybrid_query = HybridQuery( + hybrid_query = AggregateHybridQuery( text=text, text_field_name=text_field_name, vector=sample_vector, @@ -152,7 +172,7 @@ def test_hybrid_query_with_string_filter(): # Test with FilterExpression - should also work (existing functionality) filter_expression = Tag("category") == "tech" - hybrid_query_with_filter_expr = HybridQuery( + hybrid_query_with_filter_expr = AggregateHybridQuery( text=text, text_field_name=text_field_name, vector=sample_vector, @@ -172,7 +192,7 @@ def test_hybrid_query_with_string_filter(): assert "AND @category:{tech}" in query_string_with_filter_expr # Test with no filter - should only have text search - hybrid_query_no_filter = HybridQuery( + hybrid_query_no_filter = AggregateHybridQuery( text=text, text_field_name=text_field_name, vector=sample_vector, @@ -184,7 +204,7 @@ def test_hybrid_query_with_string_filter(): assert "AND" not in query_string_no_filter # Test with wildcard filter - should only have text search (no AND clause) - hybrid_query_wildcard = HybridQuery( + hybrid_query_wildcard = AggregateHybridQuery( text=text, text_field_name=text_field_name, vector=sample_vector, @@ -202,7 +222,7 @@ def test_hybrid_query_text_weights(): vector = [0.1, 0.1, 0.5] vector_field = "user_embedding" - query = HybridQuery( + query = AggregateHybridQuery( text="query string alpha bravo delta tango alpha", text_field_name="description", vector=vector, @@ -217,7 +237,7 @@ def test_hybrid_query_text_weights(): # raise an error if weights are not positive floats with pytest.raises(ValueError): - _ = HybridQuery( + _ = AggregateHybridQuery( text="sample text query", text_field_name="description", vector=vector, @@ -226,7 +246,7 @@ def test_hybrid_query_text_weights(): ) with pytest.raises(ValueError): - _ = HybridQuery( + _ = AggregateHybridQuery( text="sample text query", text_field_name="description", vector=vector, @@ -235,7 +255,7 @@ def test_hybrid_query_text_weights(): ) # no error if weights dictionary is empty or None - query = HybridQuery( + query = AggregateHybridQuery( text="sample text query", text_field_name="description", vector=vector, @@ -244,7 +264,7 @@ def test_hybrid_query_text_weights(): ) assert query - query = HybridQuery( + query = AggregateHybridQuery( text="sample text query", text_field_name="description", vector=vector, @@ -254,7 +274,7 @@ def test_hybrid_query_text_weights(): assert query # no error if the words in weights dictionary don't appear in query - query = HybridQuery( + query = AggregateHybridQuery( text="sample text query", text_field_name="description", vector=vector, @@ -409,3 +429,35 @@ def test_vector_object_handles_byte_conversion(): byte_string = array_to_buffer(sample_vector, datatype) vec = Vector(vector=byte_string, field_name="field 1") assert vec.vector == byte_string + + +def test_hybrid_query_backward_compatibility(): + # test that HybridQuery is a backward compatibility wrapper for AggregateHybridQuery + with pytest.warns(DeprecationWarning): + hybrid_query = HybridQuery( + text="sample text query", + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + ) + + # Verify HybridQuery is actually an instance of AggregateHybridQuery + assert isinstance(hybrid_query, AggregateHybridQuery) + + # Verify AggregateHybridQuery does not emit warnings + with assert_no_warnings(): + aggregate_query = AggregateHybridQuery( + text="sample text query", + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + ) + + # Verify that creating another HybridQuery also warns + with pytest.warns(DeprecationWarning): + another_hybrid_query = HybridQuery( + text="sample text query", + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + ) From eb9f13da3cedde6e2c8b20d98f9c7d8b13440777 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Thu, 6 Nov 2025 16:42:45 -0800 Subject: [PATCH 2/3] renames HybridQuery in docs --- docs/user_guide/11_advanced_queries.ipynb | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/user_guide/11_advanced_queries.ipynb b/docs/user_guide/11_advanced_queries.ipynb index 1c737f21..a8d56fdb 100644 --- a/docs/user_guide/11_advanced_queries.ipynb +++ b/docs/user_guide/11_advanced_queries.ipynb @@ -9,7 +9,7 @@ "In this notebook, we will explore advanced query types available in RedisVL:\n", "\n", "1. **`TextQuery`**: Full text search with advanced scoring\n", - "2. **`HybridQuery`**: Combines text and vector search for hybrid retrieval\n", + "2. **`AggregateHybridQuery`**: Combines text and vector search for hybrid retrieval\n", "3. **`MultiVectorQuery`**: Search over multiple vector fields simultaneously\n", "\n", "These query types are powerful tools for building sophisticated search applications that go beyond simple vector similarity search.\n", @@ -550,9 +550,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. HybridQuery: Combining Text and Vector Search\n", + "## 2. AggregateHybridQuery: Combining Text and Vector Search\n", "\n", - "The `HybridQuery` combines text search and vector similarity to provide the best of both worlds:\n", + "The `AggregateHybridQuery` combines text search and vector similarity to provide the best of both worlds:\n", "- **Text search**: Finds exact keyword matches\n", "- **Vector search**: Captures semantic similarity\n", "\n", @@ -569,7 +569,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Basic Hybrid Query\n", + "### Basic Aggregate Hybrid Query\n", "\n", "Let's search for \"running\" with both text and semantic search:" ] @@ -593,10 +593,10 @@ } ], "source": [ - "from redisvl.query import HybridQuery\n", + "from redisvl.query import AggregateHybridQuery\n", "\n", "# Create a hybrid query\n", - "hybrid_query = HybridQuery(\n", + "hybrid_query = AggregateHybridQuery(\n", " text=\"running shoes\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.1, 0.2, 0.1], # Query vector\n", @@ -648,7 +648,7 @@ ], "source": [ "# More emphasis on vector search (alpha=0.9)\n", - "vector_heavy_query = HybridQuery(\n", + "vector_heavy_query = AggregateHybridQuery(\n", " text=\"comfortable\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.15, 0.25, 0.15],\n", @@ -667,7 +667,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Hybrid Query with Filters\n", + "### Aggregate Hybrid Query with Filters\n", "\n", "You can also combine hybrid search with filters:" ] @@ -692,7 +692,7 @@ ], "source": [ "# Hybrid search with a price filter\n", - "filtered_hybrid_query = HybridQuery(\n", + "filtered_hybrid_query = AggregateHybridQuery(\n", " text=\"professional equipment\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.9, 0.1, 0.05],\n", @@ -712,7 +712,7 @@ "source": [ "### Using Different Text Scorers\n", "\n", - "HybridQuery supports the same text scoring algorithms as TextQuery:" + "AggregateHybridQuery supports the same text scoring algorithms as TextQuery:" ] }, { @@ -734,8 +734,8 @@ } ], "source": [ - "# Hybrid query with TFIDF scorer\n", - "hybrid_tfidf = HybridQuery(\n", + "# Aggregate Hybrid query with TFIDF scorer\n", + "hybrid_tfidf = AggregateHybridQuery(\n", " text=\"shoes support\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.12, 0.18, 0.12],\n", @@ -999,7 +999,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "HybridQuery Results (text + vector):\n" + "AggregateHybridQuery Results (text + vector):\n" ] }, { @@ -1023,8 +1023,8 @@ } ], "source": [ - "# HybridQuery - combines text and vector search\n", - "hybrid_q = HybridQuery(\n", + "# AggregateHybridQuery - combines text and vector search\n", + "hybrid_q = AggregateHybridQuery(\n", " text=\"shoes\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.1, 0.2, 0.1],\n", @@ -1033,7 +1033,7 @@ " num_results=3\n", ")\n", "\n", - "print(\"HybridQuery Results (text + vector):\")\n", + "print(\"AggregateHybridQuery Results (text + vector):\")\n", "result_print(index.query(hybrid_q))\n", "print()" ] @@ -1103,7 +1103,7 @@ " - When text relevance scoring is important\n", " - Example: Product search, document retrieval\n", "\n", - "2. **`HybridQuery`**:\n", + "2. **`AggregateHybridQuery`**:\n", " - When you want to combine keyword and semantic search\n", " - For improved search quality over pure text or vector search\n", " - When you have both text and vector representations of your data\n", From c68f010868a8414fc45f83238a8934f92c3bb526 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek <165097110+justin-cechmanek@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:58:31 -0800 Subject: [PATCH 3/3] minor typo in doc string Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- redisvl/query/aggregate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py index 0085e7bf..89371849 100644 --- a/redisvl/query/aggregate.py +++ b/redisvl/query/aggregate.py @@ -320,14 +320,14 @@ class HybridQuery(AggregateHybridQuery): .. deprecated:: HybridQuery is a backward compatibility wrapper around AggregateHybridQuery and will eventually be replaced with a new hybrid query implementation. - to maintain current functionality please use AggregateHybridQuery directly.", + To maintain current functionality please use AggregateHybridQuery directly.", """ def __init__(self, *args, **kwargs): warnings.warn( "HybridQuery is a backward compatibility wrapper around AggregateHybridQuery " "and will eventually be replaced with a new hybrid query implementation. " - "to maintain current functionality please use AggregateHybridQuery directly.", + "To maintain current functionality please use AggregateHybridQuery directly.", DeprecationWarning, stacklevel=2, )