Skip to content

Commit

Permalink
Merged PR 728101: BlobLifetimeManager supports multiple universes/nam…
Browse files Browse the repository at this point in the history
…espaces, reading from the change feed, and checkpointing

As preparation for checkpointing, which requires that garbage collection happens for the whole storage account at once, it makes sense to first support garbage collecting multiple namespaces/universes in a single garbage collection run. The idea is that instead of accessing the DB directly, there is an IAccessor, which limits the view of the database to only a given namespace. In practice, what this means is that each accessor will have a unique set of RocksDb column families that it accesses. Other than that, the logic to create/manage the database stays the same.

Another change is that we can now update our view of the world in subsequent runs via reading Azure Storage's change feed. This is extremely important since otherwise, nothing works: on the first run, since we touch everything, nothing is evictable; and on the second run, such a long time has passed that without updating our view of things, we might be deleting blobs with new references.

Finally, after both these changes, I also implemented checkpointing. The checkpoint and all its data will live in different containers in the 0th shard of the cache, as different-sized caches _are different caches_, regardless of whether they share accounts. Ideally, we won't have this ever since we're the ones resharding, but even today we already have that problem since some of our tests are not using all 100 accounts we've provisioned.
  • Loading branch information
JuanCarlosGI committed Aug 15, 2023
1 parent b64183f commit 911efd8
Show file tree
Hide file tree
Showing 37 changed files with 2,184 additions and 625 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ export function getAzureBlobStorageSdkPackagesWithoutNetStandard() : (Managed.Ma
importFrom("Azure.Storage.Common").pkg,
importFrom("Azure.Core").pkg,
importFrom("Azure.Storage.Blobs.Batch").pkg,
importFrom("Azure.Storage.Blobs.ChangeFeed").pkg,
];
}

Expand Down
30 changes: 30 additions & 0 deletions Public/Src/Cache/ContentStore/Distributed/Blob/AbsoluteBlobPath.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System;
using System.Text.RegularExpressions;

namespace BuildXL.Cache.ContentStore.Distributed.Blob
{
/// <summary>
/// This absolute path is gotten from the Azure Blob change feed. It uniquely identifies a blob within the cache.
/// </summary>
public readonly record struct AbsoluteBlobPath(BlobCacheStorageAccountName Account, BlobCacheContainerName Container, BlobPath Path)
{
private readonly static Regex BlobChangeFeedEventSubjectRegex = new(@"/blobServices/default/containers/(?<container>[^/]+)/blobs/(?<path>.+)", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant);

public static AbsoluteBlobPath ParseFromChangeEventSubject(BlobCacheStorageAccountName account, string subject)
{
var match = BlobChangeFeedEventSubjectRegex.Match(subject);
if (!match.Success)
{
throw new ArgumentException($"Failed to match {nameof(BlobChangeFeedEventSubjectRegex)} to {subject}", nameof(subject));
}

var container = BlobCacheContainerName.Parse(match.Groups["container"].Value);
var path = new BlobPath(match.Groups["path"].Value, relative: false);

return new(Account: account, Container: container, Path: path);
}
}
}
14 changes: 14 additions & 0 deletions Public/Src/Cache/ContentStore/Distributed/Blob/BlobNamespaceId.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace BuildXL.Cache.ContentStore.Distributed.Blob
{
/// <summary>
/// This uniquely describes a namespace in a blob cache. Each namespace is garbage-collected
/// as a separate cache from other namespaces
/// </summary>
public readonly record struct BlobNamespaceId(string Universe, string Namespace)
{
public override string ToString() => $"{Universe}-{Namespace}";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Linq;
using BuildXL.Cache.ContentStore.Interfaces.Secrets;

#nullable enable

namespace BuildXL.Cache.ContentStore.Distributed.Blob
{
public class EnvironmentVariableCacheSecretsProvider : StaticBlobCacheSecretsProvider
{
public EnvironmentVariableCacheSecretsProvider(string environmentVariableName)
: base(ExtractCredsFromEnvironmentVariable(environmentVariableName))
{
}

public static Dictionary<BlobCacheStorageAccountName, AzureStorageCredentials> ExtractCredsFromEnvironmentVariable(string environmentVariableName)
{
var connectionStringsString = Environment.GetEnvironmentVariable(environmentVariableName);
if (string.IsNullOrEmpty(connectionStringsString))
{
throw new ArgumentException($"Connections strings for the L3 cache must be provided via the {environmentVariableName} environment variable " +
$"in the format of comma-separated strings.");
}

var connectionStrings = connectionStringsString.Split(',');
var creds = connectionStrings.Select(connString => new AzureStorageCredentials(new PlainTextSecret(connString))).ToArray();
return creds.ToDictionary(
cred => BlobCacheStorageAccountName.Parse(cred.GetAccountName()),
cred => cred);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Threading.Tasks;
using BuildXL.Cache.ContentStore.Interfaces.Secrets;
using BuildXL.Cache.ContentStore.Tracing.Internal;
using BuildXL.Utilities.Collections;

#nullable enable

Expand All @@ -19,6 +21,5 @@ public interface IBlobCacheSecretsProvider
/// </summary>
public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(
OperationContext context,
BlobCacheStorageAccountName account,
BlobCacheContainerName container);
BlobCacheStorageAccountName account);
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ internal static BlobCacheContainerName[] GenerateContainerNames(string universe,
}).ToArray();
}

internal static (string Metadata, string Content) GenerateMatrix(ShardingScheme scheme)
public static (string Metadata, string Content) GenerateMatrix(ShardingScheme scheme)
{
// The matrix here ensures that metadata does not overlap across sharding schemes. Basically, whenever we add
// or remove shards (or change the sharding algorithm), we will get a new salt. This salt will force us to use
Expand Down Expand Up @@ -206,7 +206,7 @@ private Task<Result<BlobContainerClient>> CreateClientAsync(OperationContext con
Tracer,
async context =>
{
var credentials = await _configuration.SecretsProvider.RetrieveBlobCredentialsAsync(context, account, container);
var credentials = await _configuration.SecretsProvider.RetrieveBlobCredentialsAsync(context, account);
BlobClientOptions blobClientOptions = new(BlobClientOptions.ServiceVersion.V2021_02_12)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public enum ShardingAlgorithm
/// <summary>
/// Specifies a sharding scheme.
/// </summary>
public record ShardingScheme(ShardingAlgorithm Scheme, List<BlobCacheStorageAccountName> Accounts)
public record ShardingScheme(ShardingAlgorithm Scheme, IReadOnlyList<BlobCacheStorageAccountName> Accounts)
{
public IShardingScheme<int, BlobCacheStorageAccountName> Create()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#nullable enable
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using BuildXL.Cache.ContentStore.Interfaces.Secrets;
using BuildXL.Cache.ContentStore.Tracing;
Expand All @@ -17,23 +18,28 @@ public class StaticBlobCacheSecretsProvider : IBlobCacheSecretsProvider
{
protected static Tracer Tracer { get; } = new(nameof(StaticBlobCacheSecretsProvider));

public IReadOnlyList<BlobCacheStorageAccountName> ConfiguredAccounts => _accounts;

private readonly AzureStorageCredentials? _fallback;
private readonly IReadOnlyDictionary<BlobCacheStorageAccountName, AzureStorageCredentials> _credentials = new Dictionary<BlobCacheStorageAccountName, AzureStorageCredentials>();
private readonly IReadOnlyList<BlobCacheStorageAccountName> _accounts;

public StaticBlobCacheSecretsProvider(IReadOnlyDictionary<BlobCacheStorageAccountName, AzureStorageCredentials> credentials, AzureStorageCredentials? fallback = null)
{
_credentials = credentials;
_accounts = _credentials.Keys.ToArray();
_fallback = fallback;
}

public StaticBlobCacheSecretsProvider(AzureStorageCredentials fallback)
{
_fallback = fallback;
_accounts = _credentials.Keys.ToArray();
}

public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationContext context, BlobCacheStorageAccountName account, BlobCacheContainerName container)
public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationContext context, BlobCacheStorageAccountName account)
{
Tracer.Info(context, $"Fetching credentials. Account=[{account}] Container=[{container}]");
Tracer.Info(context, $"Fetching credentials. Account=[{account}]");

if (_credentials.TryGetValue(account, out var credentials))
{
Expand All @@ -45,6 +51,6 @@ public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationConte
return Task.FromResult(_fallback);
}

throw new KeyNotFoundException($"Credentials are unavailable for storage account {account} and container {container}");
throw new KeyNotFoundException($"Credentials are unavailable for storage account {account}");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public sealed class CheckpointManager : StartupShutdownComponentBase

/// <inheritdoc />
public CheckpointManager(
ContentLocationDatabase database,
ICheckpointable database,
ICheckpointRegistry checkpointRegistry,
CentralStorage storage,
CheckpointManagerConfiguration configuration,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Text.RegularExpressions;
using Azure;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.ChangeFeed;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Auth;
using Microsoft.WindowsAzure.Storage.Blob;
Expand Down Expand Up @@ -130,6 +131,28 @@ public BlobServiceClient CreateBlobServiceClient(BlobClientOptions? blobClientOp
};
}

/// <nodoc />
public BlobChangeFeedClient CreateBlobChangeFeedClient(BlobClientOptions? blobClientOptions = null, BlobChangeFeedClientOptions? changeFeedClientOptions = null)
{
// We default to this specific version because tests run against the Azurite emulator. The emulator doesn't
// currently support any higher version than this, and we won't upgrade it because it's build process is
// weird as hell and they don't just provide binaries.
blobClientOptions ??= new BlobClientOptions(BlobClientOptions.ServiceVersion.V2021_02_12);

changeFeedClientOptions ??= new BlobChangeFeedClientOptions();

return _secret switch
{
PlainTextSecret plainText => new BlobChangeFeedClient(connectionString: plainText.Secret, blobClientOptions, changeFeedClientOptions),
UpdatingSasToken sasToken => new BlobChangeFeedClient(
serviceUri: new Uri($"https://{sasToken.Token.StorageAccount}.blob.core.windows.net/"),
credential: CreateV12StorageCredentialsFromSasToken(sasToken),
blobClientOptions,
changeFeedClientOptions),
_ => throw new NotImplementedException($"Unknown secret type `{_secret.GetType()}`")
};
}

/// <nodoc />
public BlobContainerClient CreateContainerClient(string containerName, BlobClientOptions? blobClientOptions = null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace App {
@@public
export const exe = !BuildXLSdk.Flags.isMicrosoftInternal ? undefined : BuildXLSdk.executable({
export const exe = BuildXLSdk.executable({
assemblyName: "BuildXL.Cache.BlobLifetimeManager",
sources: globR(d`.`,"*.cs"),
references: [
Expand All @@ -12,10 +12,11 @@ namespace App {

importFrom("BuildXL.Cache.ContentStore").Distributed.dll,
importFrom("BuildXL.Cache.ContentStore").Interfaces.dll,
importFrom("BuildXL.Cache.ContentStore").Hashing.dll,
importFrom("BuildXL.Cache.ContentStore").Library.dll,
importFrom("BuildXL.Cache.ContentStore").UtilitiesCore.dll,
importFrom("BuildXL.Cache.MemoizationStore").Interfaces.dll,

importFrom("BuildXL.Utilities").dll,

...importFrom("BuildXL.Cache.ContentStore").getAzureBlobStorageSdkPackages(true),
],
tools: {
csc: {
Expand Down
Loading

0 comments on commit 911efd8

Please sign in to comment.