Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
namespace Microsoft.ComponentDetection.Common;

using System;
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;
using Microsoft.ComponentDetection.Contracts;
Expand All @@ -40,20 +42,39 @@ public static class DockerReferenceUtility
private const string LEGACYDEFAULTDOMAIN = "index.docker.io";
private const string OFFICIALREPOSITORYNAME = "library";

// Characters that only appear in an image reference as part of an unresolved templating
// token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
// {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders
// (e.g. #imageTag#) and is never valid in a resolved docker reference.
private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#'];
// Delimiters that only appear in an image reference as part of an unresolved templating
// token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
// {{ .Values.tag }}). These are recognized templating syntaxes expected in un-rendered manifests,
// so TryParseImageReference skips them (logging a warning) rather than treating them as invalid.
// A token wrapped in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way.
// When no templating token is present, stray invalid characters (e.g. a single '#' or '!') are reported
// via GetInvalidReferenceCharacters.
private static readonly char[] TemplateDelimiters = ['$', '{', '}'];

// Matches token-replacement placeholders that wrap an identifier in double underscores,
// e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository
// name and surface as a noisy parse failure instead of being skipped as a templated value.
private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__");

// Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#,
// #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or
// '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token
// replacement and similar), so it is skipped (and may be logged as a warning) instead of
// surfacing as a misleading docker-reference parse failure. The backreference requires the closing delimiter to match
// the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters.
Comment thread
jpinz marked this conversation as resolved.
private static readonly Regex DelimiterWrappedTokenRegex = new(@"([#!])[^#!]+\1");

// Every character permitted anywhere in a docker reference per the grammar at the top of this
// file: alphanumerics, the separators '.', '_' and '-', the path separator '/', the tag/port
// and digest separators ':' and '@', and the digest-algorithm separator '+'. Anything else
// (e.g. '#', '!') comes from unsubstituted template tokens and is reported as invalid.
Comment thread
grvillic marked this conversation as resolved.
private static readonly SearchValues<char> ValidReferenceChars = SearchValues.Create(
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-/:@+");

/// <summary>
/// Returns true if the reference contains unresolved variable or templating placeholders,
/// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>#imageTag#</c>, or <c>__IMAGE_TAG__</c>.
/// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>__IMAGE_TAG__</c>, <c>#imageTag#</c>, or
/// <c>!imageTag!</c>.
/// Such references are not real, resolvable images, so they should be skipped before calling
/// <see cref="ParseFamiliarName"/> or <see cref="ParseQualifiedName"/> and treated as
/// unresolved values rather than reported as parse failures.
Expand All @@ -62,11 +83,14 @@ public static class DockerReferenceUtility
/// <returns><c>true</c> if the reference contains variable placeholder characters; otherwise <c>false</c>.</returns>
public static bool HasUnresolvedVariables(string reference) =>
reference.IndexOfAny(TemplateDelimiters) >= 0 ||
DoubleUnderscoreTokenRegex.IsMatch(reference);
DoubleUnderscoreTokenRegex.IsMatch(reference) ||
DelimiterWrappedTokenRegex.IsMatch(reference);

/// <summary>
/// Attempts to parse an image reference string into a <see cref="DockerReference"/>.
/// Returns <c>null</c> if the reference contains unresolved variables or cannot be parsed.
/// Returns <c>null</c> if the reference contains unresolved variables, contains characters that
/// are not valid in a docker reference, or otherwise cannot be parsed. A warning is logged in
/// every skip/failure case so that references which are not scanned remain visible in logs.
/// </summary>
/// <param name="imageReference">The image reference string to parse.</param>
/// <param name="logger">Optional logger for recording parse failures.</param>
Expand All @@ -75,6 +99,19 @@ public static bool HasUnresolvedVariables(string reference) =>
{
if (HasUnresolvedVariables(imageReference))
{
logger?.LogWarning(
"Skipping image reference '{ImageReference}' because it contains one or more unresolved template tokens or variable placeholders.",
imageReference);
return null;
}

var invalidCharacters = GetInvalidReferenceCharacters(imageReference);
if (invalidCharacters.Length > 0)
{
logger?.LogWarning(
"Skipping image reference '{ImageReference}' because it contains character(s) that are not valid in a docker reference: {InvalidCharacters}",
imageReference,
invalidCharacters);
return null;
}

Expand All @@ -92,7 +129,7 @@ public static bool HasUnresolvedVariables(string reference) =>
/// <summary>
/// Parses an image reference and registers it with the recorder if valid.
/// Skips references with unresolved variables or that cannot be parsed,
/// logging a warning for parse failures so that remaining entries continue to be processed.
/// logging a warning in each skipped case so that remaining entries continue to be processed.
/// </summary>
/// <param name="imageReference">The image reference string to parse.</param>
/// <param name="recorder">The component recorder to register the image with.</param>
Expand Down Expand Up @@ -244,6 +281,44 @@ public static DockerReference ParseAll(string name)
return ParseFamiliarName(name);
}

/// <summary>
/// Returns the distinct characters in <paramref name="reference"/> that are not valid in any
/// part of a docker reference (domain, repository, tag, or digest) as a comma-separated string,
/// or an empty string when every character is valid. Characters such as <c>#</c> and <c>!</c>
/// commonly appear in unsubstituted template tokens and otherwise surface as misleading
/// "must be lowercase" or "invalid reference format" parse errors.
/// </summary>
/// <param name="reference">The image reference string to inspect.</param>
/// <returns>A comma-separated list of invalid characters, or an empty string if there are none.</returns>
private static string GetInvalidReferenceCharacters(string reference)
{
// Vectorized happy-path check: the overwhelmingly common case is an all-valid reference,
// for which this returns without allocating. Only gather the offending characters when
// at least one is present.
var span = reference.AsSpan();
if (!span.ContainsAnyExcept(ValidReferenceChars))
{
return string.Empty;
}

SortedSet<char> invalid = [];
foreach (var c in span)
{
if (!ValidReferenceChars.Contains(c))
{
invalid.Add(c);
}
}

var invalidStrings = new List<string>(invalid.Count);
foreach (var c in invalid)
{
invalidStrings.Add($"'{c}'");
}

return string.Join(", ", invalidStrings);
}

private static DockerReference CreateDockerReference(Reference options)
{
return DockerReference.CreateDockerReference(options.Repository, options.Domain, options.Digest, options.Tag);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,19 @@ public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens()
[TestMethod]
public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens()
{
// A token wrapped in matching '#' (e.g. #imageTag#) is treated as an unresolved template
// variable and skipped silently rather than reported as an invalid character.
DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsTrueForExclamationDelimitedTokens()
{
// A token wrapped in matching '!' (e.g. !imageTag!) is treated as an unresolved template
// variable and skipped silently rather than reported as an invalid character.
DockerReferenceUtility.HasUnresolvedVariables("!cs_containerRegistryLoginServerUrl!/coreservicesaksservice_!cs_aks_workloadName!/!serviceName!:!imageTag!").Should().BeTrue();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsFalseForPlainReference()
{
Expand Down Expand Up @@ -327,7 +337,13 @@ public void TryParseImageReference_ReturnsNullForHashDelimitedTokens()
}

[TestMethod]
public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
public void TryParseImageReference_ReturnsNullForExclamationDelimitedTokens()
{
DockerReferenceUtility.TryParseImageReference("!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!").Should().BeNull();
}

[TestMethod]
public void TryParseImageReference_LogsWarningForTemplatedReference()
{
var logger = new Mock<ILogger>();

Expand All @@ -336,12 +352,94 @@ public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
result.Should().BeNull();
logger.Verify(
l => l.Log(
It.IsAny<LogLevel>(),
LogLevel.Warning,
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Once);
}

[TestMethod]
public void TryParseImageReference_LogsWarningForHashDelimitedTokens()
{
var logger = new Mock<ILogger>();

var result = DockerReferenceUtility.TryParseImageReference(
"#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#",
logger.Object);

result.Should().BeNull();
logger.Verify(
l => l.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Once);
}

[TestMethod]
public void TryParseImageReference_LogsWarningForExclamationDelimitedTokens()
{
var logger = new Mock<ILogger>();

var result = DockerReferenceUtility.TryParseImageReference(
"!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!",
logger.Object);

result.Should().BeNull();
logger.Verify(
l => l.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Never);
Times.Once);
}

[TestMethod]
public void TryParseImageReference_ReturnsNullForExclamationCharacter()
{
DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest").Should().BeNull();
}

[TestMethod]
public void TryParseImageReference_LogsWarningForExclamationCharacter()
{
var logger = new Mock<ILogger>();

var result = DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest", logger.Object);

result.Should().BeNull();
logger.Verify(
l => l.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Once);
}

[TestMethod]
public void TryParseImageReference_LogsWarningForInvalidCharacterInTag()
{
var logger = new Mock<ILogger>();

var result = DockerReferenceUtility.TryParseImageReference("mcr.microsoft.com/dotnet/sdk:8.0#preview", logger.Object);

result.Should().BeNull();
logger.Verify(
l => l.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Once);
}

[TestMethod]
Expand Down
Loading