diff --git a/redisvl/index/index.py b/redisvl/index/index.py index 8d21c383..3dc1732b 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -245,8 +245,10 @@ def name(self) -> str: @property def prefix(self) -> str: """The optional key prefix that comes before a unique key value in - forming a Redis key.""" - return self.schema.index.prefix + forming a Redis key. If multiple prefixes are configured, returns the + first one.""" + prefix = self.schema.index.prefix + return prefix[0] if isinstance(prefix, list) else prefix @property def key_separator(self) -> str: @@ -329,7 +331,7 @@ def key(self, id: str) -> str: """ return self._storage._key( id=id, - prefix=self.schema.index.prefix, + prefix=self.prefix, key_separator=self.schema.index.key_separator, ) diff --git a/redisvl/index/storage.py b/redisvl/index/storage.py index 8cf682f3..6143d525 100644 --- a/redisvl/index/storage.py +++ b/redisvl/index/storage.py @@ -114,9 +114,13 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st except KeyError: raise ValueError(f"Key field {id_field} not found in record {obj}") + # Normalize prefix: use first prefix if multiple are configured + prefix = self.index_schema.index.prefix + normalized_prefix = prefix[0] if isinstance(prefix, list) else prefix + return self._key( key_value, - prefix=self.index_schema.index.prefix, + prefix=normalized_prefix, key_separator=self.index_schema.index.key_separator, ) diff --git a/redisvl/redis/connection.py b/redisvl/redis/connection.py index 00715f89..0dce70c2 100644 --- a/redisvl/redis/connection.py +++ b/redisvl/redis/connection.py @@ -133,31 +133,73 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]: Dict[str, Any]: Schema dictionary. """ index_name = index_info["index_name"] - prefixes = index_info["index_definition"][3][0] + prefixes = index_info["index_definition"][3] + # Normalize single-element prefix lists to string for backward compatibility + if isinstance(prefixes, list) and len(prefixes) == 1: + prefixes = prefixes[0] storage_type = index_info["index_definition"][1].lower() index_fields = index_info["attributes"] def parse_vector_attrs(attrs): # Parse vector attributes from Redis FT.INFO output - # Attributes start at position 6 as key-value pairs + # Format varies significantly between Redis versions: + # - Redis 6.2.6-v9: [... "VECTOR"] - no params returned by FT.INFO + # - Redis 6.2.x: [... "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...] + # Position 6: algorithm value (e.g., "FLAT" or "HNSW") + # Position 7: param count + # Position 8+: key-value pairs + # - Redis 7.x+: [... "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...] + # Position 6+: all key-value pairs + + # Check if we have any attributes beyond the type declaration + if len(attrs) <= 6: + # Redis 6.2.6-v9 or similar: no vector params in FT.INFO + # Return None to signal we can't parse this field properly + return None + vector_attrs = {} + start_pos = 6 + + # Detect format: if position 6 looks like an algorithm value (not a key), + # we're dealing with the older format + if len(attrs) > 6: + pos6_str = str(attrs[6]).upper() + # Check if position 6 is an algorithm value (FLAT, HNSW) vs a key (ALGORITHM, TYPE, DIM) + if pos6_str in ("FLAT", "HNSW"): + # Old format (Redis 6.2.x): position 6 is algorithm value, position 7 is param count + # Store the algorithm + vector_attrs["algorithm"] = pos6_str + # Skip to position 8 where key-value pairs start + start_pos = 8 + try: - for i in range(6, len(attrs), 2): + for i in range(start_pos, len(attrs), 2): if i + 1 < len(attrs): key = str(attrs[i]).lower() vector_attrs[key] = attrs[i + 1] except (IndexError, TypeError, ValueError): + # Silently continue - we'll validate required fields below pass # Normalize to expected field names normalized = {} - # Handle dims/dim field + # Handle dims/dim field - REQUIRED for vector fields if "dim" in vector_attrs: normalized["dims"] = int(vector_attrs.pop("dim")) elif "dims" in vector_attrs: normalized["dims"] = int(vector_attrs["dims"]) + else: + # If dims is missing from normal parsing, try scanning the raw attrs + # This handles edge cases where the format is unexpected + for i in range(6, len(attrs) - 1): + if str(attrs[i]).upper() in ("DIM", "DIMS"): + try: + normalized["dims"] = int(attrs[i + 1]) + break + except (ValueError, IndexError): + pass # Handle distance_metric field if "distance_metric" in vector_attrs: @@ -178,10 +220,18 @@ def parse_vector_attrs(attrs): normalized["datatype"] = vector_attrs["data_type"].lower() elif "datatype" in vector_attrs: normalized["datatype"] = vector_attrs["datatype"].lower() + elif "type" in vector_attrs: + # Sometimes it's just "type" instead of "data_type" + normalized["datatype"] = vector_attrs["type"].lower() else: # Default to float32 if missing normalized["datatype"] = "float32" + # Validate that we have required dims + if "dims" not in normalized: + # Could not parse dims - this field is not properly supported + return None + return normalized def parse_attrs(attrs, field_type=None): @@ -234,7 +284,12 @@ def parse_attrs(attrs, field_type=None): field["path"] = field_attrs[1] # parse field attrs if field_attrs[5] == "VECTOR": - field["attrs"] = parse_vector_attrs(field_attrs) + attrs = parse_vector_attrs(field_attrs) + if attrs is None: + # Vector field attributes cannot be parsed on this Redis version + # Skip this field - it cannot be properly reconstructed + continue + field["attrs"] = attrs else: field["attrs"] = parse_attrs(field_attrs, field_type=field_attrs[5]) # append field diff --git a/redisvl/schema/schema.py b/redisvl/schema/schema.py index eb45f8d0..c97d9708 100644 --- a/redisvl/schema/schema.py +++ b/redisvl/schema/schema.py @@ -58,8 +58,8 @@ class IndexInfo(BaseModel): name: str """The unique name of the index.""" - prefix: str = "rvl" - """The prefix used for Redis keys associated with this index.""" + prefix: Union[str, List[str]] = "rvl" + """The prefix(es) used for Redis keys associated with this index. Can be a single string or a list of strings.""" key_separator: str = ":" """The separator character used in designing Redis keys.""" storage_type: StorageType = StorageType.HASH diff --git a/tests/integration/test_async_search_index.py b/tests/integration/test_async_search_index.py index 6594a1ff..416e4662 100644 --- a/tests/integration/test_async_search_index.py +++ b/tests/integration/test_async_search_index.py @@ -151,7 +151,24 @@ async def test_search_index_from_existing_complex(async_client): except Exception as e: pytest.skip(str(e)) - assert async_index2.schema == async_index.schema + # Verify index metadata matches + assert async_index2.schema.index.name == async_index.schema.index.name + assert async_index2.schema.index.prefix == async_index.schema.index.prefix + assert ( + async_index2.schema.index.storage_type == async_index.schema.index.storage_type + ) + + # Verify non-vector fields are present + for field_name in ["user", "credit_score", "job", "age"]: + assert field_name in async_index2.schema.fields + assert ( + async_index2.schema.fields[field_name].type + == async_index.schema.fields[field_name].type + ) + + # Vector field may not be present on older Redis versions + if "user_embedding" in async_index2.schema.fields: + assert async_index2.schema.fields["user_embedding"].type == "vector" def test_search_index_no_prefix(index_schema): diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py index 36b3aa3b..2cef1040 100644 --- a/tests/integration/test_search_index.py +++ b/tests/integration/test_search_index.py @@ -150,7 +150,97 @@ def test_search_index_from_existing_complex(client): except Exception as e: pytest.skip(str(e)) - assert index.schema == index2.schema + # Verify index metadata matches + assert index2.schema.index.name == index.schema.index.name + assert index2.schema.index.prefix == index.schema.index.prefix + assert index2.schema.index.storage_type == index.schema.index.storage_type + + # Verify non-vector fields are present + for field_name in ["user", "credit_score", "job", "age"]: + assert field_name in index2.schema.fields + assert ( + index2.schema.fields[field_name].type + == index.schema.fields[field_name].type + ) + + # Vector field may not be present on older Redis versions + if "user_embedding" in index2.schema.fields: + assert index2.schema.fields["user_embedding"].type == "vector" + + +def test_search_index_from_existing_multiple_prefixes(client): + """Test that from_existing correctly handles indices with multiple prefixes (issue #258).""" + from redis.commands.search.field import TextField, VectorField + + index_name = "test_multi_prefix" + + # Create index manually using redis-py with multiple prefixes + # This simulates an index created with: FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: ... + try: + # Clean up any existing index + try: + client.ft(index_name).dropindex(delete_documents=True) + except Exception: + pass + + # Create index using raw FT.CREATE command with multiple prefixes + # FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: SCHEMA user TAG text TEXT ... + client.execute_command( + "FT.CREATE", + index_name, + "ON", + "HASH", + "PREFIX", + "3", + "prefix_a:", + "prefix_b:", + "prefix_c:", + "SCHEMA", + "user", + "TAG", + "text", + "TEXT", + "embedding", + "VECTOR", + "FLAT", + "6", + "TYPE", + "FLOAT32", + "DIM", + "3", + "DISTANCE_METRIC", + "COSINE", + ) + + # Now test from_existing - this is where the bug was + loaded_index = SearchIndex.from_existing(index_name, redis_client=client) + + # Verify all prefixes are preserved (this was failing before fix) + # Before the fix, only "prefix_a:" would be returned + assert loaded_index.schema.index.prefix == [ + "prefix_a:", + "prefix_b:", + "prefix_c:", + ], "Multiple prefixes should be preserved when loading existing index" + + # Verify the index name and storage type + assert loaded_index.schema.index.name == index_name + assert loaded_index.schema.index.storage_type.value == "hash" + + # Verify TAG and TEXT fields are present + assert "user" in loaded_index.schema.fields + assert "text" in loaded_index.schema.fields + + # Verify vector field if present + if "embedding" in loaded_index.schema.fields: + assert loaded_index.schema.fields["embedding"].type == "vector" + + finally: + # Cleanup + try: + client.ft(index_name).dropindex(delete_documents=True) + except Exception: + pass def test_search_index_no_prefix(index_schema): diff --git a/tests/unit/test_convert_index_info.py b/tests/unit/test_convert_index_info.py new file mode 100644 index 00000000..c4cf0db1 --- /dev/null +++ b/tests/unit/test_convert_index_info.py @@ -0,0 +1,112 @@ +"""Unit tests for convert_index_info_to_schema function.""" + +import pytest + +from redisvl.redis.connection import convert_index_info_to_schema + + +def test_convert_index_info_single_prefix(): + """Test converting index info with a single prefix. + + Single-element prefix lists are normalized to strings for backward compatibility. + """ + index_info = { + "index_name": "test_index", + "index_definition": [ + "key_type", + "HASH", + "prefixes", + ["prefix_a"], + ], + "attributes": [], + } + + result = convert_index_info_to_schema(index_info) + + assert result["index"]["name"] == "test_index" + assert result["index"]["prefix"] == "prefix_a" # normalized to string + assert result["index"]["storage_type"] == "hash" + + +def test_convert_index_info_multiple_prefixes(): + """Test converting index info with multiple prefixes (issue #258).""" + index_info = { + "index_name": "test_index", + "index_definition": [ + "key_type", + "HASH", + "prefixes", + ["prefix_a", "prefix_b", "prefix_c"], + ], + "attributes": [], + } + + result = convert_index_info_to_schema(index_info) + + assert result["index"]["name"] == "test_index" + assert result["index"]["prefix"] == ["prefix_a", "prefix_b", "prefix_c"] + assert result["index"]["storage_type"] == "hash" + + +def test_convert_index_info_json_storage(): + """Test converting index info with JSON storage type. + + Single-element prefix lists are normalized to strings for backward compatibility. + """ + index_info = { + "index_name": "test_json_index", + "index_definition": [ + "key_type", + "JSON", + "prefixes", + ["json_prefix"], + ], + "attributes": [], + } + + result = convert_index_info_to_schema(index_info) + + assert result["index"]["name"] == "test_json_index" + assert result["index"]["prefix"] == "json_prefix" # normalized to string + assert result["index"]["storage_type"] == "json" + + +def test_convert_index_info_with_fields(): + """Test converting index info with field definitions.""" + index_info = { + "index_name": "test_index", + "index_definition": [ + "key_type", + "HASH", + "prefixes", + ["prefix_a", "prefix_b"], + ], + "attributes": [ + [ + "identifier", + "user", + "attribute", + "user", + "type", + "TAG", + ], + [ + "identifier", + "text", + "attribute", + "text", + "type", + "TEXT", + ], + ], + } + + result = convert_index_info_to_schema(index_info) + + assert result["index"]["name"] == "test_index" + assert result["index"]["prefix"] == ["prefix_a", "prefix_b"] + assert len(result["fields"]) == 2 + assert result["fields"][0]["name"] == "user" + assert result["fields"][0]["type"] == "tag" + assert result["fields"][1]["name"] == "text" + assert result["fields"][1]["type"] == "text"