Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions redisvl/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,10 @@ def name(self) -> str:
@property
def prefix(self) -> str:
"""The optional key prefix that comes before a unique key value in
forming a Redis key."""
return self.schema.index.prefix
forming a Redis key. If multiple prefixes are configured, returns the
first one."""
prefix = self.schema.index.prefix
return prefix[0] if isinstance(prefix, list) else prefix

@property
def key_separator(self) -> str:
Expand Down Expand Up @@ -329,7 +331,7 @@ def key(self, id: str) -> str:
"""
return self._storage._key(
id=id,
prefix=self.schema.index.prefix,
prefix=self.prefix,
key_separator=self.schema.index.key_separator,
)

Expand Down
6 changes: 5 additions & 1 deletion redisvl/index/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,13 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st
except KeyError:
raise ValueError(f"Key field {id_field} not found in record {obj}")

# Normalize prefix: use first prefix if multiple are configured
prefix = self.index_schema.index.prefix
normalized_prefix = prefix[0] if isinstance(prefix, list) else prefix

return self._key(
key_value,
prefix=self.index_schema.index.prefix,
prefix=normalized_prefix,
key_separator=self.index_schema.index.key_separator,
)

Expand Down
65 changes: 60 additions & 5 deletions redisvl/redis/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,31 +133,73 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
Dict[str, Any]: Schema dictionary.
"""
index_name = index_info["index_name"]
prefixes = index_info["index_definition"][3][0]
prefixes = index_info["index_definition"][3]
# Normalize single-element prefix lists to string for backward compatibility
if isinstance(prefixes, list) and len(prefixes) == 1:
prefixes = prefixes[0]
storage_type = index_info["index_definition"][1].lower()

index_fields = index_info["attributes"]

def parse_vector_attrs(attrs):
# Parse vector attributes from Redis FT.INFO output
# Attributes start at position 6 as key-value pairs
# Format varies significantly between Redis versions:
# - Redis 6.2.6-v9: [... "VECTOR"] - no params returned by FT.INFO
# - Redis 6.2.x: [... "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...]
# Position 6: algorithm value (e.g., "FLAT" or "HNSW")
# Position 7: param count
# Position 8+: key-value pairs
# - Redis 7.x+: [... "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...]
# Position 6+: all key-value pairs

# Check if we have any attributes beyond the type declaration
if len(attrs) <= 6:
# Redis 6.2.6-v9 or similar: no vector params in FT.INFO
# Return None to signal we can't parse this field properly
return None

vector_attrs = {}
start_pos = 6

# Detect format: if position 6 looks like an algorithm value (not a key),
# we're dealing with the older format
if len(attrs) > 6:
pos6_str = str(attrs[6]).upper()
# Check if position 6 is an algorithm value (FLAT, HNSW) vs a key (ALGORITHM, TYPE, DIM)
if pos6_str in ("FLAT", "HNSW"):
# Old format (Redis 6.2.x): position 6 is algorithm value, position 7 is param count
# Store the algorithm
vector_attrs["algorithm"] = pos6_str
# Skip to position 8 where key-value pairs start
start_pos = 8

try:
for i in range(6, len(attrs), 2):
for i in range(start_pos, len(attrs), 2):
if i + 1 < len(attrs):
key = str(attrs[i]).lower()
vector_attrs[key] = attrs[i + 1]
except (IndexError, TypeError, ValueError):
# Silently continue - we'll validate required fields below
pass

# Normalize to expected field names
normalized = {}

# Handle dims/dim field
# Handle dims/dim field - REQUIRED for vector fields
if "dim" in vector_attrs:
normalized["dims"] = int(vector_attrs.pop("dim"))
elif "dims" in vector_attrs:
normalized["dims"] = int(vector_attrs["dims"])
else:
# If dims is missing from normal parsing, try scanning the raw attrs
# This handles edge cases where the format is unexpected
for i in range(6, len(attrs) - 1):
if str(attrs[i]).upper() in ("DIM", "DIMS"):
try:
normalized["dims"] = int(attrs[i + 1])
break
except (ValueError, IndexError):
pass

# Handle distance_metric field
if "distance_metric" in vector_attrs:
Expand All @@ -178,10 +220,18 @@ def parse_vector_attrs(attrs):
normalized["datatype"] = vector_attrs["data_type"].lower()
elif "datatype" in vector_attrs:
normalized["datatype"] = vector_attrs["datatype"].lower()
elif "type" in vector_attrs:
# Sometimes it's just "type" instead of "data_type"
normalized["datatype"] = vector_attrs["type"].lower()
else:
# Default to float32 if missing
normalized["datatype"] = "float32"

# Validate that we have required dims
if "dims" not in normalized:
# Could not parse dims - this field is not properly supported
return None

return normalized

def parse_attrs(attrs, field_type=None):
Expand Down Expand Up @@ -234,7 +284,12 @@ def parse_attrs(attrs, field_type=None):
field["path"] = field_attrs[1]
# parse field attrs
if field_attrs[5] == "VECTOR":
field["attrs"] = parse_vector_attrs(field_attrs)
attrs = parse_vector_attrs(field_attrs)
if attrs is None:
# Vector field attributes cannot be parsed on this Redis version
# Skip this field - it cannot be properly reconstructed
continue
field["attrs"] = attrs
else:
field["attrs"] = parse_attrs(field_attrs, field_type=field_attrs[5])
# append field
Expand Down
4 changes: 2 additions & 2 deletions redisvl/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class IndexInfo(BaseModel):

name: str
"""The unique name of the index."""
prefix: str = "rvl"
"""The prefix used for Redis keys associated with this index."""
prefix: Union[str, List[str]] = "rvl"
"""The prefix(es) used for Redis keys associated with this index. Can be a single string or a list of strings."""
key_separator: str = ":"
"""The separator character used in designing Redis keys."""
storage_type: StorageType = StorageType.HASH
Expand Down
19 changes: 18 additions & 1 deletion tests/integration/test_async_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,24 @@ async def test_search_index_from_existing_complex(async_client):
except Exception as e:
pytest.skip(str(e))

assert async_index2.schema == async_index.schema
# Verify index metadata matches
assert async_index2.schema.index.name == async_index.schema.index.name
assert async_index2.schema.index.prefix == async_index.schema.index.prefix
assert (
async_index2.schema.index.storage_type == async_index.schema.index.storage_type
)

# Verify non-vector fields are present
for field_name in ["user", "credit_score", "job", "age"]:
assert field_name in async_index2.schema.fields
assert (
async_index2.schema.fields[field_name].type
== async_index.schema.fields[field_name].type
)

# Vector field may not be present on older Redis versions
if "user_embedding" in async_index2.schema.fields:
assert async_index2.schema.fields["user_embedding"].type == "vector"


def test_search_index_no_prefix(index_schema):
Expand Down
92 changes: 91 additions & 1 deletion tests/integration/test_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,97 @@ def test_search_index_from_existing_complex(client):
except Exception as e:
pytest.skip(str(e))

assert index.schema == index2.schema
# Verify index metadata matches
assert index2.schema.index.name == index.schema.index.name
assert index2.schema.index.prefix == index.schema.index.prefix
assert index2.schema.index.storage_type == index.schema.index.storage_type

# Verify non-vector fields are present
for field_name in ["user", "credit_score", "job", "age"]:
assert field_name in index2.schema.fields
assert (
index2.schema.fields[field_name].type
== index.schema.fields[field_name].type
)

# Vector field may not be present on older Redis versions
if "user_embedding" in index2.schema.fields:
assert index2.schema.fields["user_embedding"].type == "vector"


def test_search_index_from_existing_multiple_prefixes(client):
"""Test that from_existing correctly handles indices with multiple prefixes (issue #258)."""
from redis.commands.search.field import TextField, VectorField

index_name = "test_multi_prefix"

# Create index manually using redis-py with multiple prefixes
# This simulates an index created with: FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: ...
try:
# Clean up any existing index
try:
client.ft(index_name).dropindex(delete_documents=True)
except Exception:
pass

# Create index using raw FT.CREATE command with multiple prefixes
# FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: SCHEMA user TAG text TEXT ...
client.execute_command(
"FT.CREATE",
index_name,
"ON",
"HASH",
"PREFIX",
"3",
"prefix_a:",
"prefix_b:",
"prefix_c:",
"SCHEMA",
"user",
"TAG",
"text",
"TEXT",
"embedding",
"VECTOR",
"FLAT",
"6",
"TYPE",
"FLOAT32",
"DIM",
"3",
"DISTANCE_METRIC",
"COSINE",
)

# Now test from_existing - this is where the bug was
loaded_index = SearchIndex.from_existing(index_name, redis_client=client)

# Verify all prefixes are preserved (this was failing before fix)
# Before the fix, only "prefix_a:" would be returned
assert loaded_index.schema.index.prefix == [
"prefix_a:",
"prefix_b:",
"prefix_c:",
], "Multiple prefixes should be preserved when loading existing index"

# Verify the index name and storage type
assert loaded_index.schema.index.name == index_name
assert loaded_index.schema.index.storage_type.value == "hash"

# Verify TAG and TEXT fields are present
assert "user" in loaded_index.schema.fields
assert "text" in loaded_index.schema.fields

# Verify vector field if present
if "embedding" in loaded_index.schema.fields:
assert loaded_index.schema.fields["embedding"].type == "vector"

finally:
# Cleanup
try:
client.ft(index_name).dropindex(delete_documents=True)
except Exception:
pass


def test_search_index_no_prefix(index_schema):
Expand Down
112 changes: 112 additions & 0 deletions tests/unit/test_convert_index_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Unit tests for convert_index_info_to_schema function."""

import pytest

from redisvl.redis.connection import convert_index_info_to_schema


def test_convert_index_info_single_prefix():
"""Test converting index info with a single prefix.

Single-element prefix lists are normalized to strings for backward compatibility.
"""
index_info = {
"index_name": "test_index",
"index_definition": [
"key_type",
"HASH",
"prefixes",
["prefix_a"],
],
"attributes": [],
}

result = convert_index_info_to_schema(index_info)

assert result["index"]["name"] == "test_index"
assert result["index"]["prefix"] == "prefix_a" # normalized to string
assert result["index"]["storage_type"] == "hash"


def test_convert_index_info_multiple_prefixes():
"""Test converting index info with multiple prefixes (issue #258)."""
index_info = {
"index_name": "test_index",
"index_definition": [
"key_type",
"HASH",
"prefixes",
["prefix_a", "prefix_b", "prefix_c"],
],
"attributes": [],
}

result = convert_index_info_to_schema(index_info)

assert result["index"]["name"] == "test_index"
assert result["index"]["prefix"] == ["prefix_a", "prefix_b", "prefix_c"]
assert result["index"]["storage_type"] == "hash"


def test_convert_index_info_json_storage():
"""Test converting index info with JSON storage type.

Single-element prefix lists are normalized to strings for backward compatibility.
"""
index_info = {
"index_name": "test_json_index",
"index_definition": [
"key_type",
"JSON",
"prefixes",
["json_prefix"],
],
"attributes": [],
}

result = convert_index_info_to_schema(index_info)

assert result["index"]["name"] == "test_json_index"
assert result["index"]["prefix"] == "json_prefix" # normalized to string
assert result["index"]["storage_type"] == "json"


def test_convert_index_info_with_fields():
"""Test converting index info with field definitions."""
index_info = {
"index_name": "test_index",
"index_definition": [
"key_type",
"HASH",
"prefixes",
["prefix_a", "prefix_b"],
],
"attributes": [
[
"identifier",
"user",
"attribute",
"user",
"type",
"TAG",
],
[
"identifier",
"text",
"attribute",
"text",
"type",
"TEXT",
],
],
}

result = convert_index_info_to_schema(index_info)

assert result["index"]["name"] == "test_index"
assert result["index"]["prefix"] == ["prefix_a", "prefix_b"]
assert len(result["fields"]) == 2
assert result["fields"][0]["name"] == "user"
assert result["fields"][0]["type"] == "tag"
assert result["fields"][1]["name"] == "text"
assert result["fields"][1]["type"] == "text"
Loading