Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 44 additions & 45 deletions dotnet/src/VectorData/Redis/RedisJsonMapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,70 +27,61 @@ internal sealed class RedisJsonMapper<TConsumerDataModel>(
/// <inheritdoc />
public (string Key, JsonNode Node) MapFromDataToStorageModel(TConsumerDataModel dataModel, int recordIndex, IReadOnlyList<Embedding>?[]? generatedEmbeddings)
{
// Convert the provided record into a JsonNode object and try to get the key field for it.
// Since we already checked that the key field is a string in the constructor, and that it exists on the model,
// the only edge case we have to be concerned about is if the key field is null.
var jsonNode = JsonSerializer.SerializeToNode(dataModel, jsonSerializerOptions)!.AsObject();
// Extract the key. The constructor has already validated that the key property is a string.
var keyValue = model.KeyProperty.GetValueAsObject(dataModel) as string
?? throw new InvalidOperationException($"Missing key field '{this._keyPropertyStorageName}' on provided record of type {typeof(TConsumerDataModel).FullName}.");

if (!(jsonNode.TryGetPropertyValue(this._keyPropertyStorageName, out var keyField) && keyField is JsonValue jsonValue))
// Build the JSON payload from the model's data properties only, so that properties on the POCO that are not
// part of the vector-store schema (no [VectorStoreData]/[VectorStoreVector]/[VectorStoreKey] attribute and not
// in the collection definition) are not persisted in Redis.
var jsonNode = new JsonObject();

foreach (var dataProperty in model.DataProperties)
{
throw new InvalidOperationException($"Missing key field '{this._keyPropertyStorageName}' on provided record of type {typeof(TConsumerDataModel).FullName}.");
var value = dataProperty.GetValueAsObject(dataModel);
jsonNode.Add(
dataProperty.StorageName,
value is null
? null
: JsonSerializer.SerializeToNode(value, dataProperty.Type, jsonSerializerOptions));
}

// Remove the key field from the JSON object since we don't want to store it in the redis payload.
var keyValue = jsonValue.ToString();
jsonNode.Remove(this._keyPropertyStorageName);

// Go over the vector properties; inject any generated embeddings to overwrite the JSON serialized above.
// Also, for Embedding<T> properties we also need to overwrite with a simple array (since Embedding<T> gets serialized as a complex object).
for (var i = 0; i < model.VectorProperties.Count; i++)
{
var property = model.VectorProperties[i];

Embedding? embedding = generatedEmbeddings?[i]?[recordIndex] is Embedding ge ? ge : null;
var vector = generatedEmbeddings?[i]?[recordIndex] is Embedding ge
? (object)ge
: property.GetValueAsObject(dataModel);

if (embedding is null)
if (vector is null)
{
switch (Nullable.GetUnderlyingType(property.Type) ?? property.Type)
{
case var t when t == typeof(ReadOnlyMemory<float>):
case var t2 when t2 == typeof(float[]):
case var t3 when t3 == typeof(ReadOnlyMemory<double>):
case var t4 when t4 == typeof(double[]):
// The .NET vector property is a ReadOnlyMemory<T> or T[] (not an Embedding), which means that JsonSerializer
// already serialized it correctly above.
// In addition, there's no generated embedding (which would be an Embedding which we'd need to handle manually).
// So there's nothing for us to do.
continue;

case var t when t == typeof(Embedding<float>):
case var t1 when t1 == typeof(Embedding<double>):
embedding = (Embedding)property.GetValueAsObject(dataModel)!;
break;

default:
throw new UnreachableException();
}
jsonNode[property.StorageName] = null;
continue;
}

var jsonArray = new JsonArray();

switch (embedding)
switch (vector)
{
case ReadOnlyMemory<float> m:
AppendVector(jsonArray, m.Span);
break;
case Embedding<float> e:
foreach (var item in e.Vector.Span)
{
jsonArray.Add(JsonValue.Create(item));
}
AppendVector(jsonArray, e.Vector.Span);
break;
case float[] a:
AppendVector(jsonArray, a.AsSpan());
break;
case ReadOnlyMemory<double> m:
AppendVector(jsonArray, m.Span);
break;

case Embedding<double> e:
foreach (var item in e.Vector.Span)
{
jsonArray.Add(JsonValue.Create(item));
}
AppendVector(jsonArray, e.Vector.Span);
break;
case double[] a:
AppendVector(jsonArray, a.AsSpan());
break;

default:
throw new UnreachableException();
}
Comment thread
jluocsa marked this conversation as resolved.
Expand Down Expand Up @@ -158,4 +149,12 @@ public TConsumerDataModel MapFromStorageToDataModel((string Key, JsonNode Node)

return JsonSerializer.Deserialize<TConsumerDataModel>(jsonObject, jsonSerializerOptions)!;
}

private static void AppendVector<T>(JsonArray jsonArray, ReadOnlySpan<T> span)
{
foreach (var item in span)
{
jsonArray.Add(JsonValue.Create(item));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,10 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition)
}

[Theory]
[InlineData(true, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4],"notAnnotated":null}""")]
[InlineData(true, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""")]
[InlineData(false, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4],"notAnnotated":null}""")]
[InlineData(false, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""")]
[InlineData(true, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4]}""")]
[InlineData(true, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4]}""")]
[InlineData(false, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4]}""")]
[InlineData(false, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4]}""")]
public async Task CanUpsertRecordAsync(bool useDefinition, bool useCustomJsonSerializerOptions, string expectedUpsertedJson)
{
// Arrange
Expand All @@ -320,7 +320,6 @@ public async Task CanUpsertRecordAsync(bool useDefinition, bool useCustomJsonSer
await sut.UpsertAsync(model);

// Assert
// TODO: Fix issue where NotAnnotated is being included in the JSON.
var expectedArgs = new object[] { TestRecordKey1, "$", expectedUpsertedJson };
this._redisDatabaseMock
.Verify(
Expand All @@ -346,8 +345,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition)
await sut.UpsertAsync([model1, model2]);

// Assert
// TODO: Fix issue where NotAnnotated is being included in the JSON.
var expectedArgs = new object[] { TestRecordKey1, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""" };
var expectedArgs = new object[] { TestRecordKey1, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4]}""", TestRecordKey2, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4]}""" };
this._redisDatabaseMock
.Verify(
x => x.ExecuteAsync(
Expand Down
20 changes: 12 additions & 8 deletions dotnet/test/VectorData/Redis.UnitTests/RedisJsonMapperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ public void MapsAllFieldsFromDataToStorageModel()
Assert.NotNull(actual.Node);
Assert.Equal("test key", actual.Key);
var jsonObject = actual.Node.AsObject();
Assert.Equal("data 1", jsonObject?["Data1"]?.ToString());
Assert.Equal("data 2", jsonObject?["Data2"]?.ToString());
Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject?["Vector1"]?.AsArray().GetValues<float>().ToArray());
Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject?["Vector2"]?.AsArray().GetValues<float>().ToArray());
Assert.NotNull(jsonObject);
Assert.Equal("data 1", jsonObject["Data1"]?.ToString());
Assert.Equal("data 2", jsonObject["Data2"]?.ToString());
Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject["Vector1"]?.AsArray().GetValues<float>().ToArray());
Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject["Vector2"]?.AsArray().GetValues<float>().ToArray());
Assert.False(jsonObject.ContainsKey("NotAnnotated"));
}

[Fact]
Expand All @@ -52,10 +54,12 @@ public void MapsAllFieldsFromDataToStorageModelWithCustomSerializerOptions()
Assert.NotNull(actual.Node);
Assert.Equal("test key", actual.Key);
var jsonObject = actual.Node.AsObject();
Assert.Equal("data 1", jsonObject?["data1"]?.ToString());
Assert.Equal("data 2", jsonObject?["data2"]?.ToString());
Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject?["vector1"]?.AsArray().GetValues<float>().ToArray());
Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject?["vector2"]?.AsArray().GetValues<float>().ToArray());
Assert.NotNull(jsonObject);
Assert.Equal("data 1", jsonObject["data1"]?.ToString());
Assert.Equal("data 2", jsonObject["data2"]?.ToString());
Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject["vector1"]?.AsArray().GetValues<float>().ToArray());
Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject["vector2"]?.AsArray().GetValues<float>().ToArray());
Assert.False(jsonObject.ContainsKey("notAnnotated"));
}

[Fact]
Expand Down
Loading