Skip to content

Commit

Permalink
Change separator symbol from "_" to "-" (#11)
Browse files Browse the repository at this point in the history
KM uses the dash "-" symbol as the standard char across different
storage connectors, so this change aligns Postgres behavior with KM.
This is mostly visible when creating and fetching index names, because
"_" is automatically turned into "-", so the index name used in input
might differ from the one received in output, e.g. when listing indexes.

Other:
- bump version to 0.3
- auto normalize "_" to "-" also in table name prefix config
- update docs and configs changing "km_" to "km-"
- clean up some test tables left over after running functional tests
  • Loading branch information
dluc committed Dec 24, 2023
1 parent 82b5e3b commit 99dae8f
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 11 deletions.
2 changes: 1 addition & 1 deletion NUGET.md
Expand Up @@ -79,7 +79,7 @@ DB table for each memory index.

Table names have a configurable **prefix**, used to filter out other tables that
might be present. The prefix is mandatory, cannot be empty, we suggest using
the default `km_` prefix.
the default `km-` prefix.

Overall we recommend not mixing external tables in the same DB used for
Kernel Memory.
Expand Down
2 changes: 1 addition & 1 deletion PostgresMemoryStorage/Db/PostgresSchema.cs
Expand Up @@ -8,7 +8,7 @@ internal static class PostgresSchema
{
public const string PlaceholdersTags = "{{$tags}}";

private static readonly Regex s_validNameRegex = new(@"^[a-zA-Z0-9_]+$");
private static readonly Regex s_validNameRegex = new(@"^[a-zA-Z0-9\-]+$");

public static void ValidateSchemaName(string name)
{
Expand Down
4 changes: 2 additions & 2 deletions PostgresMemoryStorage/PostgresConfig.cs
Expand Up @@ -44,7 +44,7 @@ public class PostgresConfig
/// <summary>
/// Default prefix used for table names
/// </summary>
public const string DefaultTableNamePrefix = "km_";
public const string DefaultTableNamePrefix = "km-";

/// <summary>
/// Connection string required to connect to Postgres
Expand All @@ -60,7 +60,7 @@ public class PostgresConfig
/// Mandatory prefix to add to tables created by KM.
/// This is used to distinguish KM tables from others in the same schema.
/// </summary>
/// <remarks>Default value is set to "km_" but can be override when creating a config.</remarks>
/// <remarks>Default value is set to "km-" but can be override when creating a config.</remarks>
public string TableNamePrefix { get; set; } = DefaultTableNamePrefix;

/// <summary>
Expand Down
20 changes: 17 additions & 3 deletions PostgresMemoryStorage/PostgresMemory.cs
Expand Up @@ -45,6 +45,9 @@ public class PostgresMemory : IMemoryDb, IDisposable
throw new PostgresException("Embedding generator not configured");
}

// Normalize underscore and check for invalid symbols
config.TableNamePrefix = NormalizeTableNamePrefix(config.TableNamePrefix);

this._db = new PostgresDbClient(config, this._log);
}

Expand Down Expand Up @@ -230,8 +233,9 @@ protected virtual void Dispose(bool disposing)

#region private ================================================================================

// Note: "-" is allowed in Postgres, but we normalize it to "_" for consistency with other DBs
private static readonly Regex s_replaceIndexNameCharsRegex = new(@"[\s|\\|/|.|\-|:]");
// Note: "_" is allowed in Postgres, but we normalize it to "-" for consistency with other DBs
private static readonly Regex s_replaceIndexNameCharsRegex = new(@"[\s|\\|/|.|_|:]");
private const string ValidSeparator = "-";

private static string NormalizeIndexName(string index)
{
Expand All @@ -240,13 +244,23 @@ private static string NormalizeIndexName(string index)
index = Constants.DefaultIndex;
}

index = s_replaceIndexNameCharsRegex.Replace(index.Trim().ToLowerInvariant(), "_");
index = s_replaceIndexNameCharsRegex.Replace(index.Trim().ToLowerInvariant(), ValidSeparator);

PostgresSchema.ValidateTableName(index);

return index;
}

private static string NormalizeTableNamePrefix(string? name)
{
if (name == null) { return string.Empty; }

name = s_replaceIndexNameCharsRegex.Replace(name.Trim().ToLowerInvariant(), ValidSeparator);
PostgresSchema.ValidateTableNamePrefix(name);

return name;
}

private (string sql, Dictionary<string, object> unsafeSqlUserValues) PrepareSql(
ICollection<MemoryFilter>? filters = null)
{
Expand Down
4 changes: 3 additions & 1 deletion README.md
Expand Up @@ -103,7 +103,9 @@ DB table for each memory index.

Table names have a configurable **prefix**, used to filter out other tables that
might be present. The prefix is mandatory, cannot be empty, we suggest using
the default `km_` prefix.
the default `km-` prefix. Note that the Postgres connector automatically converts
`_` underscores to `-` dashes to have a consistent behavior with other storage
types supported by Kernel Memory.

Overall we recommend not mixing external tables in the same DB used for
Kernel Memory.
Expand Down
2 changes: 1 addition & 1 deletion nuget-package.props
@@ -1,7 +1,7 @@
<Project>
<PropertyGroup>
<!-- Central version prefix - applies to all nuget packages. -->
<Version>0.2.0</Version>
<Version>0.3.0</Version>

<!-- Default description and tags. Packages can override. -->
<Authors>Microsoft</Authors>
Expand Down
2 changes: 1 addition & 1 deletion tests/FunctionalTests/ConcurrencyTests.cs
Expand Up @@ -93,8 +93,8 @@ public async Task CreateDeleteIndexConcurrencyTest(string type)
public async Task UpsertConcurrencyTest()
{
var concurrency = 20;
var indexName = "upsert_test";
var vectorSize = 4;
var indexName = "upsert_test" + Guid.NewGuid().ToString("D");

using var target = new PostgresMemory(this.PostgresConfiguration, new FakeEmbeddingGenerator());

Expand Down
38 changes: 38 additions & 0 deletions tests/FunctionalTests/IndexCreationTests.cs
Expand Up @@ -30,5 +30,43 @@ public async Task ItNormalizesIndexNames()
// Act - Assert no exception occurs
await memory.ImportTextAsync("something", index: indexNameWithDashes);
await memory.ImportTextAsync("something", index: indexNameWithUnderscores);

// Cleanup
await memory.DeleteIndexAsync(indexNameWithDashes);
await memory.DeleteIndexAsync(indexNameWithUnderscores);
}

[Theory]
[InlineData("postgres")]
[InlineData("simple_volatile")]
[InlineData("az_ai_search")]
public async Task ItListsIndexes(string memoryType)
{
// Arrange
string indexNameWithDashes = "name-with-dashes";
string indexNameWithUnderscores = "name_with_underscore";
string indexNameWithUnderscoresNormalized = "name-with-underscore";

var memory = new KernelMemoryBuilder()
.WithPostgres(this.PostgresConfiguration)
.WithSimpleFileStorage(new SimpleFileStorageConfig { StorageType = FileSystemTypes.Volatile, Directory = "_files" })
// .WithOpenAI(this.OpenAIConfiguration)
.WithAzureOpenAITextGeneration(this.AzureOpenAITextConfiguration)
.WithAzureOpenAITextEmbeddingGeneration(this.AzureOpenAIEmbeddingConfiguration)
.Build();

// Act
await memory.ImportTextAsync("something", index: indexNameWithDashes);
await memory.ImportTextAsync("something", index: indexNameWithUnderscores);
var list = (await memory.ListIndexesAsync()).ToList();

// Clean up before exceptions can occur
await memory.DeleteIndexAsync(indexNameWithDashes);
await memory.DeleteIndexAsync(indexNameWithUnderscores);

// Assert
Assert.True(list.Any(x => x.Name == indexNameWithDashes));
Assert.False(list.Any(x => x.Name == indexNameWithUnderscores));
Assert.True(list.Any(x => x.Name == indexNameWithUnderscoresNormalized));
}
}
2 changes: 1 addition & 1 deletion tests/TestApplication/appsettings.json
Expand Up @@ -10,7 +10,7 @@
"ConnectionString": "Host=localhost;Port=5432;Username=postgres;Password=",
// Mandatory prefix to add to the name of table managed by KM,
// e.g. to exclude other tables in the same schema.
"TableNamePrefix": "km_",
"TableNamePrefix": "km-",
},
"AzureOpenAIText": {
// "ApiKey" or "AzureIdentity"
Expand Down

0 comments on commit 99dae8f

Please sign in to comment.