diff --git a/docs/api/query.rst b/docs/api/query.rst
index c2ba04f9..9d65dc9b 100644
--- a/docs/api/query.rst
+++ b/docs/api/query.rst
@@ -47,6 +47,11 @@ HybridQuery
:show-inheritance:
:exclude-members: add_filter,get_args,highlight,return_field,summarize
+.. note::
+ The ``stopwords`` parameter in :class:`HybridQuery` (and :class:`AggregateHybridQuery`) controls query-time stopword filtering (client-side).
+ For index-level stopwords configuration (server-side), see :class:`redisvl.schema.IndexInfo.stopwords`.
+ Using query-time stopwords with index-level ``STOPWORDS 0`` is counterproductive.
+
TextQuery
================
@@ -61,6 +66,11 @@ TextQuery
:show-inheritance:
:exclude-members: add_filter,get_args,highlight,return_field,summarize
+.. note::
+ The ``stopwords`` parameter in :class:`TextQuery` controls query-time stopword filtering (client-side).
+ For index-level stopwords configuration (server-side), see :class:`redisvl.schema.IndexInfo.stopwords`.
+ Using query-time stopwords with index-level ``STOPWORDS 0`` is counterproductive.
+
FilterQuery
===========
diff --git a/docs/api/schema.rst b/docs/api/schema.rst
index 7f38d63a..c5b8ab68 100644
--- a/docs/api/schema.rst
+++ b/docs/api/schema.rst
@@ -31,6 +31,47 @@ IndexSchema
:exclude-members: generate_fields,validate_and_create_fields,redis_fields
+Index-Level Stopwords Configuration
+====================================
+
+The :class:`IndexInfo` class supports index-level stopwords configuration through
+the ``stopwords`` field. This controls which words are filtered during indexing
+(server-side), as opposed to query-time filtering (client-side).
+
+**Configuration Options:**
+
+- ``None`` (default): Use Redis default stopwords (~300 common words)
+- ``[]`` (empty list): Disable stopwords completely (``STOPWORDS 0``)
+- Custom list: Specify your own stopwords (e.g., ``["the", "a", "an"]``)
+
+**Example:**
+
+.. code-block:: python
+
+ from redisvl.schema import IndexSchema
+
+ # Disable stopwords to search for phrases like "Bank of Glasberliner"
+ schema = IndexSchema.from_dict({
+ "index": {
+ "name": "company-idx",
+ "prefix": "company",
+ "stopwords": [] # STOPWORDS 0
+ },
+ "fields": [
+ {"name": "name", "type": "text"}
+ ]
+ })
+
+**Important Notes:**
+
+- Index-level stopwords affect what gets indexed (server-side)
+- Query-time stopwords (in :class:`TextQuery` and :class:`AggregateHybridQuery`) affect what gets searched (client-side)
+- Using query-time stopwords with index-level ``STOPWORDS 0`` is counterproductive
+
+For detailed information about stopwords configuration and best practices, see the
+Advanced Queries user guide (``docs/user_guide/11_advanced_queries.ipynb``).
+
+
Defining Fields
===============
diff --git a/docs/user_guide/11_advanced_queries.ipynb b/docs/user_guide/11_advanced_queries.ipynb
index a8d56fdb..831857d7 100644
--- a/docs/user_guide/11_advanced_queries.ipynb
+++ b/docs/user_guide/11_advanced_queries.ipynb
@@ -30,8 +30,15 @@
},
{
"cell_type": "code",
- "execution_count": 25,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:12.222169Z",
+ "iopub.status.busy": "2025-11-21T00:42:12.222058Z",
+ "iopub.status.idle": "2025-11-21T00:42:12.301776Z",
+ "shell.execute_reply": "2025-11-21T00:42:12.301163Z"
+ }
+ },
"outputs": [],
"source": [
"import numpy as np\n",
@@ -117,8 +124,15 @@
},
{
"cell_type": "code",
- "execution_count": 26,
- "metadata": {},
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:12.303593Z",
+ "iopub.status.busy": "2025-11-21T00:42:12.303450Z",
+ "iopub.status.idle": "2025-11-21T00:42:12.305709Z",
+ "shell.execute_reply": "2025-11-21T00:42:12.305407Z"
+ }
+ },
"outputs": [],
"source": [
"schema = {\n",
@@ -167,17 +181,16 @@
},
{
"cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loaded 6 products into the index\n"
- ]
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:12.306952Z",
+ "iopub.status.busy": "2025-11-21T00:42:12.306869Z",
+ "iopub.status.idle": "2025-11-21T00:42:12.416481Z",
+ "shell.execute_reply": "2025-11-21T00:42:12.415926Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"from redisvl.index import SearchIndex\n",
"\n",
@@ -206,22 +219,16 @@
},
{
"cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
| score | product_id | brief_description | category | price |
|---|
| 5.953989333038773 | prod_1 | comfortable running shoes for athletes | footwear | 89.99 |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support | footwear | 139.99 |
| 2.0410082774474088 | prod_2 | lightweight running jacket with water resistance | outerwear | 129.99 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:12.433591Z",
+ "iopub.status.busy": "2025-11-21T00:42:12.433464Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.709475Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.708647Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"from redisvl.query import TextQuery\n",
"\n",
@@ -248,29 +255,16 @@
},
{
"cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Results with BM25 scoring:\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description | price |
|---|
| 6.031534703977659 | prod_1 | comfortable running shoes for athletes | 89.99 |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support | 139.99 |
| 1.5268074873573214 | prod_4 | yoga mat with extra cushioning for comfort | 39.99 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.711396Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.711221Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.749216Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.748398Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# BM25 standard scoring (default)\n",
"bm25_query = TextQuery(\n",
@@ -288,29 +282,16 @@
},
{
"cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Results with TFIDF scoring:\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description | price |
|---|
| 2.3333333333333335 | prod_1 | comfortable running shoes for athletes | 89.99 |
| 2.0 | prod_5 | basketball shoes with excellent ankle support | 139.99 |
| 1.0 | prod_4 | yoga mat with extra cushioning for comfort | 39.99 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.750799Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.750686Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.754896Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.754345Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# TFIDF scoring\n",
"tfidf_query = TextQuery(\n",
@@ -337,22 +318,16 @@
},
{
"cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description | category | price |
|---|
| 3.9314935770863046 | prod_1 | comfortable running shoes for athletes | footwear | 89.99 |
| 3.1279733904413027 | prod_5 | basketball shoes with excellent ankle support | footwear | 139.99 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.756368Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.756224Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.760388Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.759844Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"from redisvl.query.filter import Tag, Num\n",
"\n",
@@ -371,22 +346,16 @@
},
{
"cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description | price |
|---|
| 3.1541404034996914 | prod_1 | comfortable running shoes for athletes | 89.99 |
| 1.5268074873573214 | prod_4 | yoga mat with extra cushioning for comfort | 39.99 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.761654Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.761566Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.765694Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.765316Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# Search for products under $100\n",
"price_filtered_query = TextQuery(\n",
@@ -413,22 +382,16 @@
},
{
"cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description |
|---|
| 5.035440025836444 | prod_1 | comfortable running shoes for athletes |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.767228Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.767102Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.771059Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.770555Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"weighted_query = TextQuery(\n",
" text=\"shoes\",\n",
@@ -452,22 +415,16 @@
},
{
"cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description |
|---|
| 5.953989333038773 | prod_1 | comfortable running shoes for athletes |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support |
| 2.0410082774474088 | prod_2 | lightweight running jacket with water resistance |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.772513Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.772419Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.776286Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.775861Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# Use English stopwords (default)\n",
"query_with_stopwords = TextQuery(\n",
@@ -484,22 +441,16 @@
},
{
"cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description |
|---|
| 3.1541404034996914 | prod_1 | comfortable running shoes for athletes |
| 3.0864038416103 | prod_3 | professional tennis racket for competitive players |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.777294Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.777220Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.781329Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.780713Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# Use custom stopwords\n",
"custom_stopwords_query = TextQuery(\n",
@@ -516,22 +467,16 @@
},
{
"cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description |
|---|
| 5.953989333038773 | prod_1 | comfortable running shoes for athletes |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support |
| 2.0410082774474088 | prod_2 | lightweight running jacket with water resistance |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.782401Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.782323Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.787197Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.786617Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# No stopwords\n",
"no_stopwords_query = TextQuery(\n",
@@ -569,29 +514,222 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Basic Aggregate Hybrid Query\n",
+ "### Index-Level Stopwords Configuration\n",
"\n",
- "Let's search for \"running\" with both text and semantic search:"
+ "The previous example showed **query-time stopwords** using `TextQuery.stopwords`, which filters words from the query before searching. RedisVL also supports **index-level stopwords** configuration, which determines which words are indexed in the first place.\n",
+ "\n",
+ "**Key Difference:**\n",
+ "- **Query-time stopwords** (`TextQuery.stopwords`): Filters words from your search query (client-side)\n",
+ "- **Index-level stopwords** (`IndexInfo.stopwords`): Controls which words get indexed in Redis (server-side)\n",
+ "\n",
+ "**Three Configuration Modes:**\n",
+ "\n",
+ "1. **`None` (default)**: Use Redis's default stopwords list\n",
+ "2. **`[]` (empty list)**: Disable stopwords completely (`STOPWORDS 0` in FT.CREATE)\n",
+ "3. **`[\"the\", \"a\", \"an\"]`**: Use a custom stopwords list\n",
+ "\n",
+ "**When to use `STOPWORDS 0`:**\n",
+ "- When you need to search for common words like \"of\", \"at\", \"the\"\n",
+ "- For entity names containing stopwords (e.g., \"Bank of Glasberliner\", \"University of Glasberliner\")\n",
+ "- When working with structured data where every word matters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.788835Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.788717Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.795247Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.794662Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Create a schema with index-level stopwords disabled\n",
+ "from redisvl.index import SearchIndex\n",
+ "\n",
+ "stopwords_schema = {\n",
+ " \"index\": {\n",
+ " \"name\": \"company_index\",\n",
+ " \"prefix\": \"company:\",\n",
+ " \"storage_type\": \"hash\",\n",
+ " \"stopwords\": [] # STOPWORDS 0 - disable stopwords completely\n",
+ " },\n",
+ " \"fields\": [\n",
+ " {\"name\": \"company_name\", \"type\": \"text\"},\n",
+ " {\"name\": \"description\", \"type\": \"text\"}\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "# Create index using from_dict (handles schema creation internally)\n",
+ "company_index = SearchIndex.from_dict(stopwords_schema, redis_url=\"redis://localhost:6379\")\n",
+ "company_index.create(overwrite=True, drop=True)\n",
+ "\n",
+ "print(f\"Index created with STOPWORDS 0: {company_index}\")"
]
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.796880Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.796745Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.802750Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.802098Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Load sample data with company names containing common stopwords\n",
+ "companies = [\n",
+ " {\"company_name\": \"Bank of Glasberliner\", \"description\": \"Major financial institution\"},\n",
+ " {\"company_name\": \"University of Glasberliner\", \"description\": \"Public university system\"},\n",
+ " {\"company_name\": \"Department of Glasberliner Affairs\", \"description\": \"A government agency\"},\n",
+ " {\"company_name\": \"Glasberliner FC\", \"description\": \"Football Club\"},\n",
+ " {\"company_name\": \"The Home Market\", \"description\": \"Home improvement retailer\"},\n",
+ "]\n",
+ "\n",
+ "for i, company in enumerate(companies):\n",
+ " company_index.load([company], keys=[f\"company:{i}\"])\n",
+ "\n",
+ "print(f\"✓ Loaded {len(companies)} companies\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.804059Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.803942Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.807026Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.806491Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Search for \"Bank of Glasberliner\" - with STOPWORDS 0, \"of\" is indexed and searchable\n",
+ "from redisvl.query import FilterQuery\n",
+ "\n",
+ "query = FilterQuery(\n",
+ " filter_expression='@company_name:(Bank of Glasberliner)',\n",
+ " return_fields=[\"company_name\", \"description\"],\n",
+ ")\n",
+ "\n",
+ "results = company_index.search(query.query, query_params=query.params)\n",
+ "\n",
+ "print(f\"Found {len(results.docs)} results for 'Bank of Glasberliner':\")\n",
+ "for doc in results.docs:\n",
+ " print(f\" - {doc.company_name}: {doc.description}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| vector_distance | product_id | brief_description | category | price | vector_similarity | text_score | hybrid_score |
|---|
| 5.96046447754e-08 | prod_1 | comfortable running shoes for athletes | footwear | 89.99 | 0.999999970198 | 5.95398933304 | 2.48619677905 |
| 0.00985252857208 | prod_5 | basketball shoes with excellent ankle support | footwear | 139.99 | 0.995073735714 | 2.08531559363 | 1.32214629309 |
| 0.00985252857208 | prod_2 | lightweight running jacket with water resistance | outerwear | 129.99 | 0.995073735714 | 2.04100827745 | 1.30885409823 |
| 0.0038834810257 | prod_4 | yoga mat with extra cushioning for comfort | accessories | 39.99 | 0.998058259487 | 0 | 0.698640781641 |
| 0.236237406731 | prod_6 | swimming goggles with anti-fog coating | accessories | 24.99 | 0.881881296635 | 0 | 0.617316907644 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "source": [
+ "**Comparison: With vs Without Stopwords**\n",
+ "\n",
+ "If we had used the default stopwords (not specifying `stopwords` in the schema), the word \"of\" would be filtered out during indexing. This means:\n",
+ "\n",
+ "- ❌ Searching for `\"Bank of Glasberliner\"` might not find exact matches\n",
+ "- ❌ The phrase would be indexed as `\"Bank Berlin\"` (without \"of\")\n",
+ "- ✅ With `STOPWORDS 0`, all words including \"of\" are indexed\n",
+ "\n",
+ "**Custom Stopwords Example:**\n",
+ "\n",
+ "You can also provide a custom list of stopwords:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.808543Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.808418Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.810612Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.810083Z"
}
+ },
+ "outputs": [],
+ "source": [
+ "# Example: Create index with custom stopwords\n",
+ "custom_stopwords_schema = {\n",
+ " \"index\": {\n",
+ " \"name\": \"custom_stopwords_index\",\n",
+ " \"prefix\": \"custom:\",\n",
+ " \"stopwords\": [\"inc\", \"llc\", \"corp\"] # Filter out legal entity suffixes\n",
+ " },\n",
+ " \"fields\": [\n",
+ " {\"name\": \"name\", \"type\": \"text\"}\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "# This would create an index where \"inc\", \"llc\", \"corp\" are not indexed\n",
+ "print(\"Custom stopwords:\", custom_stopwords_schema[\"index\"][\"stopwords\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**YAML Format:**\n",
+ "\n",
+ "You can also define stopwords in YAML schema files:\n",
+ "\n",
+ "```yaml\n",
+ "version: '0.1.0'\n",
+ "\n",
+ "index:\n",
+ " name: company_index\n",
+ " prefix: company:\n",
+ " storage_type: hash\n",
+ " stopwords: [] # Disable stopwords (STOPWORDS 0)\n",
+ "\n",
+ "fields:\n",
+ " - name: company_name\n",
+ " type: text\n",
+ " - name: description\n",
+ " type: text\n",
+ "```\n",
+ "\n",
+ "Or with custom stopwords:\n",
+ "\n",
+ "```yaml\n",
+ "index:\n",
+ " stopwords:\n",
+ " - the\n",
+ " - a\n",
+ " - an\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {},
+ "source": [
+ "# Cleanup\n",
+ "company_index.delete(drop=True)\n",
+ "print(\"✓ Cleaned up company_index\")"
],
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Basic Aggregate Hybrid Query\n",
+ "\n",
+ "Let's search for \"running\" with both text and semantic search:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {},
"source": [
"from redisvl.query import AggregateHybridQuery\n",
"\n",
@@ -607,7 +745,9 @@
"\n",
"results = index.query(hybrid_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -623,29 +763,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Results with alpha=0.9 (vector-heavy):\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| vector_distance | product_id | brief_description | vector_similarity | text_score | hybrid_score |
|---|
| -1.19209289551e-07 | prod_4 | yoga mat with extra cushioning for comfort | 1.0000000596 | 1.52680748736 | 1.05268080238 |
| 0.00136888027191 | prod_5 | basketball shoes with excellent ankle support | 0.999315559864 | 0 | 0.899384003878 |
| 0.00136888027191 | prod_2 | lightweight running jacket with water resistance | 0.999315559864 | 0 | 0.899384003878 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"# More emphasis on vector search (alpha=0.9)\n",
"vector_heavy_query = AggregateHybridQuery(\n",
@@ -661,7 +779,9 @@
"print(\"Results with alpha=0.9 (vector-heavy):\")\n",
"results = index.query(vector_heavy_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -674,22 +794,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| vector_distance | product_id | brief_description | category | price | vector_similarity | text_score | hybrid_score |
|---|
| -1.19209289551e-07 | prod_3 | professional tennis racket for competitive players | equipment | 199.99 | 1.0000000596 | 3.08640384161 | 1.62592119421 |
| 0.411657452583 | prod_5 | basketball shoes with excellent ankle support | footwear | 139.99 | 0.794171273708 | 0 | 0.555919891596 |
| 0.411657452583 | prod_2 | lightweight running jacket with water resistance | outerwear | 129.99 | 0.794171273708 | 0 | 0.555919891596 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"# Hybrid search with a price filter\n",
"filtered_hybrid_query = AggregateHybridQuery(\n",
@@ -704,7 +809,9 @@
"\n",
"results = index.query(filtered_hybrid_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -717,22 +824,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| vector_distance | product_id | brief_description | vector_similarity | text_score | hybrid_score |
|---|
| 0 | prod_5 | basketball shoes with excellent ankle support | 1 | 5 | 2.2 |
| 0 | prod_2 | lightweight running jacket with water resistance | 1 | 0 | 0.7 |
| 0.00136888027191 | prod_4 | yoga mat with extra cushioning for comfort | 0.999315559864 | 0 | 0.699520891905 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"# Aggregate Hybrid query with TFIDF scorer\n",
"hybrid_tfidf = AggregateHybridQuery(\n",
@@ -747,7 +839,9 @@
"\n",
"results = index.query(hybrid_tfidf)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -775,22 +869,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| distance_0 | distance_1 | product_id | brief_description | category | score_0 | score_1 | combined_score |
|---|
| 5.96046447754e-08 | 5.96046447754e-08 | prod_1 | comfortable running shoes for athletes | footwear | 0.999999970198 | 0.999999970198 | 0.999999970198 |
| 0.00985252857208 | 0.00266629457474 | prod_5 | basketball shoes with excellent ankle support | footwear | 0.995073735714 | 0.998666852713 | 0.996151670814 |
| 0.00985252857208 | 0.0118260979652 | prod_2 | lightweight running jacket with water resistance | outerwear | 0.995073735714 | 0.994086951017 | 0.994777700305 |
| 0.0038834810257 | 0.210647821426 | prod_4 | yoga mat with extra cushioning for comfort | accessories | 0.998058259487 | 0.894676089287 | 0.967043608427 |
| 0.236237406731 | 0.639005899429 | prod_6 | swimming goggles with anti-fog coating | accessories | 0.881881296635 | 0.680497050285 | 0.82146602273 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"from redisvl.query import MultiVectorQuery, Vector\n",
"\n",
@@ -818,7 +897,9 @@
"\n",
"results = index.query(multi_vector_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -831,29 +912,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Results with emphasis on image similarity:\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| distance_0 | distance_1 | product_id | brief_description | category | score_0 | score_1 | combined_score |
|---|
| -1.19209289551e-07 | 0 | prod_3 | professional tennis racket for competitive players | equipment | 1.0000000596 | 1 | 1.00000001192 |
| 0.14539372921 | 0.00900757312775 | prod_6 | swimming goggles with anti-fog coating | accessories | 0.927303135395 | 0.995496213436 | 0.981857597828 |
| 0.436696171761 | 0.219131231308 | prod_4 | yoga mat with extra cushioning for comfort | accessories | 0.78165191412 | 0.890434384346 | 0.868677890301 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"# More emphasis on image similarity\n",
"text_vec = Vector(\n",
@@ -879,7 +938,9 @@
"print(\"Results with emphasis on image similarity:\")\n",
"results = index.query(image_heavy_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -892,22 +953,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "| distance_0 | distance_1 | product_id | brief_description | category | price | score_0 | score_1 | combined_score |
|---|
| 5.96046447754e-08 | 5.96046447754e-08 | prod_1 | comfortable running shoes for athletes | footwear | 89.99 | 0.999999970198 | 0.999999970198 | 0.999999970198 |
| 0.00985252857208 | 0.00266629457474 | prod_5 | basketball shoes with excellent ankle support | footwear | 139.99 | 0.995073735714 | 0.998666852713 | 0.996510982513 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"# Multi-vector search with category filter\n",
"text_vec = Vector(\n",
@@ -933,7 +979,9 @@
"\n",
"results = index.query(filtered_multi_query)\n",
"result_print(results)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -946,36 +994,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "TextQuery Results (keyword-based):\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| score | product_id | brief_description |
|---|
| 2.8773943004779676 | prod_1 | comfortable running shoes for athletes |
| 2.085315593627535 | prod_5 | basketball shoes with excellent ankle support |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
"source": [
"# TextQuery - keyword-based search\n",
"text_q = TextQuery(\n",
@@ -988,40 +1007,22 @@
"print(\"TextQuery Results (keyword-based):\")\n",
"result_print(index.query(text_q))\n",
"print()"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "AggregateHybridQuery Results (text + vector):\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| vector_distance | product_id | brief_description | vector_similarity | text_score | hybrid_score |
|---|
| 5.96046447754e-08 | prod_1 | comfortable running shoes for athletes | 0.999999970198 | 2.87739430048 | 1.56321826928 |
| 0.0038834810257 | prod_4 | yoga mat with extra cushioning for comfort | 0.998058259487 | 0 | 0.698640781641 |
| 0.00985252857208 | prod_2 | lightweight running jacket with water resistance | 0.995073735714 | 0 | 0.696551615 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.860414Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.860347Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.864887Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.864461Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# AggregateHybridQuery - combines text and vector search\n",
"hybrid_q = AggregateHybridQuery(\n",
@@ -1040,29 +1041,16 @@
},
{
"cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "MultiVectorQuery Results (multiple vectors):\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "| distance_0 | distance_1 | product_id | brief_description | score_0 | score_1 | combined_score |
|---|
| 5.96046447754e-08 | 5.96046447754e-08 | prod_1 | comfortable running shoes for athletes | 0.999999970198 | 0.999999970198 | 0.999999970198 |
| 0.00985252857208 | 0.00266629457474 | prod_5 | basketball shoes with excellent ankle support | 0.995073735714 | 0.998666852713 | 0.996870294213 |
| 0.00985252857208 | 0.0118260979652 | prod_2 | lightweight running jacket with water resistance | 0.995073735714 | 0.994086951017 | 0.994580343366 |
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-21T00:42:13.865922Z",
+ "iopub.status.busy": "2025-11-21T00:42:13.865857Z",
+ "iopub.status.idle": "2025-11-21T00:42:13.869441Z",
+ "shell.execute_reply": "2025-11-21T00:42:13.868990Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"# MultiVectorQuery - searches multiple vector fields\n",
"mv_text = Vector(\n",
@@ -1118,13 +1106,13 @@
},
{
"cell_type": "code",
- "execution_count": 47,
"metadata": {},
- "outputs": [],
"source": [
"# Cleanup\n",
"index.delete()"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
}
],
"metadata": {
@@ -1142,8 +1130,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.0"
+ "pygments_lexer": "ipython3"
}
},
"nbformat": 4,
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
index 3865879d..dcfaaab6 100644
--- a/redisvl/index/index.py
+++ b/redisvl/index/index.py
@@ -79,7 +79,9 @@
BaseVectorQuery,
CountQuery,
FilterQuery,
+ TextQuery,
)
+from redisvl.query.aggregate import AggregateHybridQuery
from redisvl.query.filter import FilterExpression
from redisvl.redis.connection import (
RedisConnectionFactory,
@@ -248,6 +250,34 @@ def _validate_query(self, query: BaseQuery) -> None:
"Vector field using 'flat' algorithm does not support EF_RUNTIME query parameter."
)
+ # Warn if using query-time stopwords with index-level STOPWORDS 0
+ if isinstance(query, (TextQuery, AggregateHybridQuery)):
+ index_stopwords = self.schema.index.stopwords
+ query_stopwords = query.stopwords
+
+ # Check if index has STOPWORDS 0 (empty list) and query has stopwords configured
+ # Note: query.stopwords is a set, and when any falsy value (None, False, '', 0, [], etc.)
+ # is passed to TextQuery/AggregateHybridQuery, it becomes an empty set. So we check if the set is non-empty.
+ if (
+ index_stopwords is not None
+ and len(index_stopwords) == 0
+ and len(query_stopwords) > 0
+ ):
+ query_type = (
+ "TextQuery"
+ if isinstance(query, TextQuery)
+ else "AggregateHybridQuery"
+ )
+ warnings.warn(
+ f"Query-time stopwords are configured but the index has STOPWORDS 0 (stopwords = []). "
+ "This is counterproductive: all words including common words like 'of', 'the', 'a' are indexed, "
+ "but your query-time stopwords will filter them from the search query. "
+ "This makes your search less precise than it could be. "
+ f"Consider setting stopwords=None (or any falsy value) in {query_type} to search for all indexed words.",
+ UserWarning,
+ stacklevel=3,
+ )
+
@property
def name(self) -> str:
"""The name of the Redis search index."""
@@ -601,17 +631,22 @@ def create(self, overwrite: bool = False, drop: bool = False) -> None:
definition = IndexDefinition(
prefix=[self.schema.index.prefix], index_type=self._storage.type
)
+ # Extract stopwords from schema
+ stopwords = self.schema.index.stopwords
+
if isinstance(self._redis_client, RedisCluster):
cluster_create_index(
index_name=self.name,
client=self._redis_client,
fields=redis_fields,
definition=definition,
+ stopwords=stopwords,
)
else:
self._redis_client.ft(self.name).create_index(
fields=redis_fields,
definition=definition,
+ stopwords=stopwords,
)
except redis.exceptions.RedisError as e:
raise RedisSearchError(
@@ -1384,17 +1419,22 @@ async def create(self, overwrite: bool = False, drop: bool = False) -> None:
definition = IndexDefinition(
prefix=[self.schema.index.prefix], index_type=self._storage.type
)
+ # Extract stopwords from schema
+ stopwords = self.schema.index.stopwords
+
if isinstance(client, AsyncRedisCluster):
await async_cluster_create_index(
index_name=self.schema.index.name,
client=client,
fields=redis_fields,
definition=definition,
+ stopwords=stopwords,
)
else:
await client.ft(self.schema.index.name).create_index(
fields=redis_fields,
definition=definition,
+ stopwords=stopwords,
)
except redis.exceptions.RedisError as e:
raise RedisSearchError(
diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index 89371849..299de0ce 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -128,6 +128,10 @@ def __init__(
provided then a default set of stopwords for that language will be used. if a list,
set, or tuple of strings is provided then those will be used as stopwords.
Defaults to "english". if set to "None" then no stopwords will be removed.
+
+ Note: This parameter controls query-time stopword filtering (client-side).
+ For index-level stopwords configuration (server-side), see IndexInfo.stopwords.
+ Using query-time stopwords with index-level STOPWORDS 0 is counterproductive.
dialect (int, optional): The Redis dialect version. Defaults to 2.
text_weights (Optional[Dict[str, float]]): The importance weighting of individual words
within the query text. Defaults to None, as no modifications will be made to the
diff --git a/redisvl/query/query.py b/redisvl/query/query.py
index d7443584..1237c07f 100644
--- a/redisvl/query/query.py
+++ b/redisvl/query/query.py
@@ -1061,10 +1061,14 @@ def __init__(
params (Optional[Dict[str, Any]], optional): The parameters for the query.
Defaults to None.
stopwords (Optional[Union[str, Set[str]]): The set of stop words to remove
- from the query text. If a language like 'english' or 'spanish' is provided
+ from the query text (client-side filtering). If a language like 'english' or 'spanish' is provided
a default set of stopwords for that language will be used. Users may specify
their own stop words by providing a List or Set of words. if set to None,
then no words will be removed. Defaults to 'english'.
+
+ Note: This parameter controls query-time stopword filtering (client-side).
+ For index-level stopwords configuration (server-side), see IndexInfo.stopwords.
+ Using query-time stopwords with index-level STOPWORDS 0 is counterproductive.
text_weights (Optional[Dict[str, float]]): The importance weighting of individual words
within the query text. Defaults to None, as no modifications will be made to the
text_scorer score.
diff --git a/redisvl/redis/connection.py b/redisvl/redis/connection.py
index 7b5951d5..6d8ff96f 100644
--- a/redisvl/redis/connection.py
+++ b/redisvl/redis/connection.py
@@ -204,6 +204,17 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
prefixes = prefixes[0]
storage_type = index_info["index_definition"][1].lower()
+ # Parse stopwords if present in FT.INFO output
+ # stopwords_list is only present when explicitly set (STOPWORDS 0 or custom list)
+ # If not present, we use None to indicate default Redis behavior
+ stopwords = None
+ if "stopwords_list" in index_info:
+ # Convert bytes to strings if needed
+ stopwords_list = index_info["stopwords_list"]
+ stopwords = [
+ sw.decode("utf-8") if isinstance(sw, bytes) else sw for sw in stopwords_list
+ ]
+
index_fields = index_info["attributes"]
def parse_vector_attrs(attrs):
@@ -411,8 +422,12 @@ def parse_attrs(attrs, field_type=None):
# append field
schema_fields.append(field)
+ index_dict = {"name": index_name, "prefix": prefixes, "storage_type": storage_type}
+ if stopwords is not None:
+ index_dict["stopwords"] = stopwords
+
return {
- "index": {"name": index_name, "prefix": prefixes, "storage_type": storage_type},
+ "index": index_dict,
"fields": schema_fields,
}
diff --git a/redisvl/schema/schema.py b/redisvl/schema/schema.py
index c97d9708..8a9ec974 100644
--- a/redisvl/schema/schema.py
+++ b/redisvl/schema/schema.py
@@ -1,8 +1,7 @@
-import re
from collections.abc import Mapping, Sequence
from enum import Enum
from pathlib import Path
-from typing import Any, Dict, List, Literal, Union
+from typing import Any, Dict, List, Literal, Optional, Union
import yaml
from pydantic import BaseModel, Field, model_validator
@@ -31,7 +30,7 @@ class StorageType(Enum):
class IndexInfo(BaseModel):
"""Index info includes the essential details regarding index settings,
- such as its name, prefix, key separator, and storage type in Redis.
+ such as its name, prefix, key separator, storage type, and stopwords in Redis.
In yaml format, the index info section looks like:
@@ -42,6 +41,7 @@ class IndexInfo(BaseModel):
prefix: user
key_separtor: ':'
storage_type: json
+ stopwords: [] # Disable stopwords (STOPWORDS 0)
In dict format, the index info section looks like:
@@ -51,7 +51,8 @@ class IndexInfo(BaseModel):
"name": "user-index",
"prefix": "user",
"key_separator": ":",
- "storage_type": "json"
+ "storage_type": "json",
+ "stopwords": ["the", "a", "an"] # Custom stopwords
}}
"""
@@ -64,6 +65,9 @@ class IndexInfo(BaseModel):
"""The separator character used in designing Redis keys."""
storage_type: StorageType = StorageType.HASH
"""The storage type used in Redis (e.g., 'hash' or 'json')."""
+ stopwords: Optional[List[str]] = None
+ """Index-level stopwords configuration. None (default) uses Redis default stopwords,
+ empty list [] disables stopwords (STOPWORDS 0), or provide a custom list of stopwords."""
class IndexSchema(BaseModel):
diff --git a/tests/integration/test_stopwords_integration.py b/tests/integration/test_stopwords_integration.py
new file mode 100644
index 00000000..14ebc742
--- /dev/null
+++ b/tests/integration/test_stopwords_integration.py
@@ -0,0 +1,192 @@
+"""Integration tests for stopwords support."""
+
+import pytest
+
+from redisvl.index import SearchIndex
+from redisvl.query import FilterQuery
+from redisvl.schema import IndexSchema
+
+
+@pytest.fixture
+def stopwords_disabled_schema():
+ """Schema with stopwords disabled (STOPWORDS 0)."""
+ return {
+ "index": {
+ "name": "test_stopwords_disabled",
+ "prefix": "test_sw_disabled:",
+ "storage_type": "hash",
+ "stopwords": [], # STOPWORDS 0
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ {"name": "description", "type": "text"},
+ ],
+ }
+
+
+@pytest.fixture
+def custom_stopwords_schema():
+ """Schema with custom stopwords list."""
+ return {
+ "index": {
+ "name": "test_custom_stopwords",
+ "prefix": "test_sw_custom:",
+ "storage_type": "hash",
+ "stopwords": ["the", "a", "an"],
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+
+@pytest.fixture
+def default_stopwords_schema():
+ """Schema with default stopwords (no stopwords field)."""
+ return {
+ "index": {
+ "name": "test_default_stopwords",
+ "prefix": "test_sw_default:",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+
+@pytest.fixture
+def stopwords_disabled_index(client, stopwords_disabled_schema):
+ """Index fixture with stopwords disabled."""
+ schema = IndexSchema.from_dict(stopwords_disabled_schema)
+ index = SearchIndex(schema, redis_client=client)
+ index.create(overwrite=True, drop=True)
+
+ yield index
+
+ index.delete(drop=True)
+
+
+@pytest.fixture
+def custom_stopwords_index(client, custom_stopwords_schema):
+ """Index fixture with custom stopwords."""
+ schema = IndexSchema.from_dict(custom_stopwords_schema)
+ index = SearchIndex(schema, redis_client=client)
+ index.create(overwrite=True, drop=True)
+
+ yield index
+
+ index.delete(drop=True)
+
+
+@pytest.fixture
+def default_stopwords_index(client, default_stopwords_schema):
+ """Index fixture with default stopwords."""
+ schema = IndexSchema.from_dict(default_stopwords_schema)
+ index = SearchIndex(schema, redis_client=client)
+ index.create(overwrite=True, drop=True)
+
+ yield index
+
+ index.delete(drop=True)
+
+
+def test_create_index_with_stopwords_disabled(client, stopwords_disabled_index):
+ """Test creating an index with STOPWORDS 0."""
+ # Verify index was created
+ assert stopwords_disabled_index.exists()
+
+ # Get FT.INFO and verify stopwords_list is empty
+ info = client.ft(stopwords_disabled_index.name).info()
+ assert "stopwords_list" in info
+ assert info["stopwords_list"] == []
+
+
+def test_create_index_with_custom_stopwords(client, custom_stopwords_index):
+ """Test creating an index with custom stopwords list."""
+ # Verify index was created
+ assert custom_stopwords_index.exists()
+
+ # Get FT.INFO and verify stopwords_list matches
+ info = client.ft(custom_stopwords_index.name).info()
+ assert "stopwords_list" in info
+
+ # Convert bytes to strings for comparison
+ stopwords_list = [
+ sw.decode("utf-8") if isinstance(sw, bytes) else sw
+ for sw in info["stopwords_list"]
+ ]
+ assert set(stopwords_list) == {"the", "a", "an"}
+
+
+def test_create_index_with_default_stopwords(default_stopwords_index):
+ """Test creating an index with default stopwords (no STOPWORDS clause)."""
+ # Verify index was created
+ assert default_stopwords_index.exists()
+
+ # When no STOPWORDS clause is used, Redis doesn't include stopwords_list in FT.INFO
+ # (or it may include the default list depending on Redis version)
+ # We just verify the index was created successfully with default behavior
+
+
+def test_from_existing_preserves_stopwords_disabled(client, stopwords_disabled_index):
+ """Test that from_existing() correctly reconstructs stopwords=[] configuration."""
+ # Reconstruct from existing
+ reconstructed_index = SearchIndex.from_existing(
+ stopwords_disabled_index.name, redis_client=client
+ )
+
+ # Verify stopwords configuration was preserved
+ assert reconstructed_index.schema.index.stopwords == []
+
+
+def test_from_existing_preserves_custom_stopwords(client, custom_stopwords_index):
+ """Test that from_existing() correctly reconstructs custom stopwords configuration."""
+ # Reconstruct from existing
+ reconstructed_index = SearchIndex.from_existing(
+ custom_stopwords_index.name, redis_client=client
+ )
+
+ # Verify stopwords configuration was preserved
+ assert set(reconstructed_index.schema.index.stopwords) == {"the", "a", "an"}
+
+
+def test_from_existing_default_stopwords(client, default_stopwords_index):
+ """Test that from_existing() handles default stopwords (no stopwords_list in FT.INFO)."""
+ # Reconstruct from existing
+ reconstructed_index = SearchIndex.from_existing(
+ default_stopwords_index.name, redis_client=client
+ )
+
+ # Verify stopwords is None (default behavior)
+ assert reconstructed_index.schema.index.stopwords is None
+
+
+def test_stopwords_disabled_allows_searching_common_words(
+ client, stopwords_disabled_index
+):
+ """Test that STOPWORDS 0 allows searching for common stopwords like 'the', 'a', 'of'."""
+ # Add test data with common stopwords
+ test_data = [
+ {"title": "Bank of Glasberliner", "description": "A major bank"},
+ {"title": "The Great Gatsby", "description": "A classic novel"},
+ {
+ "title": "An Introduction to Python",
+ "description": "A programming guide",
+ },
+ ]
+
+ for i, data in enumerate(test_data):
+ key = f"test_sw_disabled:{i}"
+ client.hset(key, mapping=data)
+
+ # Search for "of" - should find "Bank of Glasberliner"
+ query = FilterQuery(
+ filter_expression="@title:(of)",
+ return_fields=["title"],
+ )
+ results = stopwords_disabled_index.search(query.query, query_params=query.params)
+
+ # With STOPWORDS 0, "of" should be indexed and searchable
+ assert len(results.docs) > 0
+ assert any("of" in doc.title.lower() for doc in results.docs)
diff --git a/tests/unit/test_convert_index_info.py b/tests/unit/test_convert_index_info.py
index c4cf0db1..2a4dc36d 100644
--- a/tests/unit/test_convert_index_info.py
+++ b/tests/unit/test_convert_index_info.py
@@ -1,7 +1,5 @@
"""Unit tests for convert_index_info_to_schema function."""
-import pytest
-
from redisvl.redis.connection import convert_index_info_to_schema
@@ -110,3 +108,67 @@ def test_convert_index_info_with_fields():
assert result["fields"][0]["type"] == "tag"
assert result["fields"][1]["name"] == "text"
assert result["fields"][1]["type"] == "text"
+
+
+def test_convert_index_info_stopwords_disabled():
+ """Test converting index info with STOPWORDS 0 (disabled stopwords)."""
+ index_info = {
+ "index_name": "test_stopwords_disabled",
+ "index_definition": [
+ "key_type",
+ "HASH",
+ "prefixes",
+ ["test_sw:"],
+ ],
+ "attributes": [],
+ "stopwords_list": [], # STOPWORDS 0
+ }
+
+ result = convert_index_info_to_schema(index_info)
+
+ assert result["index"]["name"] == "test_stopwords_disabled"
+ assert result["index"]["stopwords"] == []
+
+
+def test_convert_index_info_custom_stopwords():
+ """Test converting index info with custom stopwords list."""
+ index_info = {
+ "index_name": "test_custom_stopwords",
+ "index_definition": [
+ "key_type",
+ "HASH",
+ "prefixes",
+ ["test_csw:"],
+ ],
+ "attributes": [],
+ "stopwords_list": [b"the", b"a", b"an"], # Custom stopwords (as bytes)
+ }
+
+ result = convert_index_info_to_schema(index_info)
+
+ assert result["index"]["name"] == "test_custom_stopwords"
+ assert result["index"]["stopwords"] == ["the", "a", "an"]
+
+
+def test_convert_index_info_default_stopwords():
+ """Test converting index info with default stopwords (no stopwords_list key).
+
+ When no STOPWORDS clause is specified in FT.CREATE, Redis uses its default
+ stopwords list, and FT.INFO does not include a stopwords_list key.
+ """
+ index_info = {
+ "index_name": "test_default_stopwords",
+ "index_definition": [
+ "key_type",
+ "HASH",
+ "prefixes",
+ ["test_dsw:"],
+ ],
+ "attributes": [],
+ # No stopwords_list key - indicates default behavior
+ }
+
+ result = convert_index_info_to_schema(index_info)
+
+ assert result["index"]["name"] == "test_default_stopwords"
+ assert "stopwords" not in result["index"] # Should not be present
diff --git a/tests/unit/test_field_modifier_ordering.py b/tests/unit/test_field_modifier_ordering.py
index fad097fc..8f77a610 100644
--- a/tests/unit/test_field_modifier_ordering.py
+++ b/tests/unit/test_field_modifier_ordering.py
@@ -309,11 +309,11 @@ def test_empty_suffix(self):
assert field.args_suffix == []
-class TestMLPCommandsScenario:
- """Test the exact scenario from mlp_commands.txt."""
+class TestFieldModifierScenario:
+ """Test field modifier ordering scenario."""
def test_work_experience_summary_field(self):
- """Test TextField with INDEXMISSING SORTABLE UNF (mlp_commands.txt scenario)."""
+ """Test TextField with INDEXMISSING SORTABLE UNF (field modifier scenario)."""
field = TextField(
name="work_experience_summary",
attrs={"index_missing": True, "sortable": True, "unf": True},
@@ -321,11 +321,11 @@ def test_work_experience_summary_field(self):
redis_field = field.as_redis_field()
suffix = redis_field.args_suffix
- # Verify exact order from mlp_commands.txt
+ # Verify exact order from field modifier requirements
assert suffix == ["INDEXMISSING", "SORTABLE", "UNF"]
- def test_mlp_scenario_redis_args(self):
- """Test that redis_args() produces correct command for mlp_commands.txt scenario."""
+ def test_field_modifier_scenario_redis_args(self):
+ """Test that redis_args() produces correct command for field modifier scenario."""
field = TextField(
name="work_experience_summary",
attrs={"index_missing": True, "sortable": True, "unf": True},
diff --git a/tests/unit/test_stopwords_schema.py b/tests/unit/test_stopwords_schema.py
new file mode 100644
index 00000000..87c807a1
--- /dev/null
+++ b/tests/unit/test_stopwords_schema.py
@@ -0,0 +1,202 @@
+"""Unit tests for stopwords support in IndexSchema."""
+
+import tempfile
+
+import yaml
+
+from redisvl.schema import IndexSchema
+
+
+def test_index_schema_stopwords_none_default():
+ """Test IndexSchema with no stopwords specified (default behavior)."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+
+ assert schema.index.name == "test_index"
+ assert schema.index.stopwords is None # Default
+
+
+def test_index_schema_stopwords_disabled():
+ """Test IndexSchema with stopwords disabled (STOPWORDS 0)."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ "stopwords": [], # Empty list = STOPWORDS 0
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+
+ assert schema.index.name == "test_index"
+ assert schema.index.stopwords == []
+
+
+def test_index_schema_custom_stopwords():
+ """Test IndexSchema with custom stopwords list."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ "stopwords": ["the", "a", "an"],
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+
+ assert schema.index.name == "test_index"
+ assert schema.index.stopwords == ["the", "a", "an"]
+
+
+def test_index_schema_stopwords_from_yaml_disabled():
+ """Test IndexSchema from YAML with stopwords disabled."""
+ yaml_content = """
+version: '0.1.0'
+
+index:
+ name: test_yaml_index
+ prefix: test_yaml
+ storage_type: hash
+ stopwords: []
+
+fields:
+ - name: title
+ type: text
+"""
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+ f.write(yaml_content)
+ yaml_path = f.name
+
+ try:
+ schema = IndexSchema.from_yaml(yaml_path)
+ assert schema.index.name == "test_yaml_index"
+ assert schema.index.stopwords == []
+ finally:
+ import os
+
+ os.unlink(yaml_path)
+
+
+def test_index_schema_stopwords_from_yaml_custom():
+ """Test IndexSchema from YAML with custom stopwords."""
+ yaml_content = """
+version: '0.1.0'
+
+index:
+ name: test_yaml_index
+ prefix: test_yaml
+ storage_type: hash
+ stopwords:
+ - the
+ - a
+ - an
+
+fields:
+ - name: title
+ type: text
+"""
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+ f.write(yaml_content)
+ yaml_path = f.name
+
+ try:
+ schema = IndexSchema.from_yaml(yaml_path)
+ assert schema.index.name == "test_yaml_index"
+ assert schema.index.stopwords == ["the", "a", "an"]
+ finally:
+ import os
+
+ os.unlink(yaml_path)
+
+
+def test_index_schema_to_dict_preserves_stopwords():
+ """Test that to_dict() preserves stopwords configuration."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ "stopwords": ["the", "a"],
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+ result_dict = schema.to_dict()
+
+ assert result_dict["index"]["stopwords"] == ["the", "a"]
+
+
+def test_index_schema_to_dict_omits_none_stopwords():
+ """Test that to_dict() omits stopwords when None (default)."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+ result_dict = schema.to_dict()
+
+ # stopwords should not be in the dict when None (default behavior)
+ assert "stopwords" not in result_dict["index"]
+
+
+def test_index_schema_to_yaml_preserves_stopwords():
+ """Test that to_yaml() preserves stopwords configuration."""
+ schema_dict = {
+ "index": {
+ "name": "test_index",
+ "prefix": "test",
+ "storage_type": "hash",
+ "stopwords": [], # STOPWORDS 0
+ },
+ "fields": [
+ {"name": "title", "type": "text"},
+ ],
+ }
+
+ schema = IndexSchema.from_dict(schema_dict)
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+ yaml_path = f.name
+
+ try:
+ schema.to_yaml(yaml_path)
+
+ # Read back and verify
+ with open(yaml_path, "r") as f:
+ yaml_data = yaml.safe_load(f)
+
+ assert yaml_data["index"]["stopwords"] == []
+ finally:
+ import os
+
+ os.unlink(yaml_path)