diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs index 83eba3413..e3558cb6c 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs @@ -27,6 +27,8 @@ namespace Microsoft.ComponentDetection.Common; using System; +using System.Buffers; +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Text.RegularExpressions; using Microsoft.ComponentDetection.Contracts; @@ -40,20 +42,39 @@ public static class DockerReferenceUtility private const string LEGACYDEFAULTDOMAIN = "index.docker.io"; private const string OFFICIALREPOSITORYNAME = "library"; - // Characters that only appear in an image reference as part of an unresolved templating - // token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR}, - // {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders - // (e.g. #imageTag#) and is never valid in a resolved docker reference. - private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#']; + // Delimiters that only appear in an image reference as part of an unresolved templating + // token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR}, + // {{ .Values.tag }}). These are recognized templating syntaxes expected in un-rendered manifests, + // so TryParseImageReference skips them (logging a warning) rather than treating them as invalid. + // A token wrapped in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. + // When no templating token is present, stray invalid characters (e.g. a single '#' or '!') are reported + // via GetInvalidReferenceCharacters. + private static readonly char[] TemplateDelimiters = ['$', '{', '}']; // Matches token-replacement placeholders that wrap an identifier in double underscores, // e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository // name and surface as a noisy parse failure instead of being skipped as a templated value. private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__"); + // Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#, + // #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or + // '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token + // replacement and similar), so it is skipped (and may be logged as a warning) instead of + // surfacing as a misleading docker-reference parse failure. The backreference requires the closing delimiter to match + // the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters. + private static readonly Regex DelimiterWrappedTokenRegex = new(@"([#!])[^#!]+\1"); + + // Every character permitted anywhere in a docker reference per the grammar at the top of this + // file: alphanumerics, the separators '.', '_' and '-', the path separator '/', the tag/port + // and digest separators ':' and '@', and the digest-algorithm separator '+'. Anything else + // (e.g. '#', '!') comes from unsubstituted template tokens and is reported as invalid. + private static readonly SearchValues ValidReferenceChars = SearchValues.Create( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-/:@+"); + /// /// Returns true if the reference contains unresolved variable or templating placeholders, - /// e.g. ${VAR}, {{ .Values.tag }}, #imageTag#, or __IMAGE_TAG__. + /// e.g. ${VAR}, {{ .Values.tag }}, __IMAGE_TAG__, #imageTag#, or + /// !imageTag!. /// Such references are not real, resolvable images, so they should be skipped before calling /// or and treated as /// unresolved values rather than reported as parse failures. @@ -62,11 +83,14 @@ public static class DockerReferenceUtility /// true if the reference contains variable placeholder characters; otherwise false. public static bool HasUnresolvedVariables(string reference) => reference.IndexOfAny(TemplateDelimiters) >= 0 || - DoubleUnderscoreTokenRegex.IsMatch(reference); + DoubleUnderscoreTokenRegex.IsMatch(reference) || + DelimiterWrappedTokenRegex.IsMatch(reference); /// /// Attempts to parse an image reference string into a . - /// Returns null if the reference contains unresolved variables or cannot be parsed. + /// Returns null if the reference contains unresolved variables, contains characters that + /// are not valid in a docker reference, or otherwise cannot be parsed. A warning is logged in + /// every skip/failure case so that references which are not scanned remain visible in logs. /// /// The image reference string to parse. /// Optional logger for recording parse failures. @@ -75,6 +99,19 @@ public static bool HasUnresolvedVariables(string reference) => { if (HasUnresolvedVariables(imageReference)) { + logger?.LogWarning( + "Skipping image reference '{ImageReference}' because it contains one or more unresolved template tokens or variable placeholders.", + imageReference); + return null; + } + + var invalidCharacters = GetInvalidReferenceCharacters(imageReference); + if (invalidCharacters.Length > 0) + { + logger?.LogWarning( + "Skipping image reference '{ImageReference}' because it contains character(s) that are not valid in a docker reference: {InvalidCharacters}", + imageReference, + invalidCharacters); return null; } @@ -92,7 +129,7 @@ public static bool HasUnresolvedVariables(string reference) => /// /// Parses an image reference and registers it with the recorder if valid. /// Skips references with unresolved variables or that cannot be parsed, - /// logging a warning for parse failures so that remaining entries continue to be processed. + /// logging a warning in each skipped case so that remaining entries continue to be processed. /// /// The image reference string to parse. /// The component recorder to register the image with. @@ -244,6 +281,44 @@ public static DockerReference ParseAll(string name) return ParseFamiliarName(name); } + /// + /// Returns the distinct characters in that are not valid in any + /// part of a docker reference (domain, repository, tag, or digest) as a comma-separated string, + /// or an empty string when every character is valid. Characters such as # and ! + /// commonly appear in unsubstituted template tokens and otherwise surface as misleading + /// "must be lowercase" or "invalid reference format" parse errors. + /// + /// The image reference string to inspect. + /// A comma-separated list of invalid characters, or an empty string if there are none. + private static string GetInvalidReferenceCharacters(string reference) + { + // Vectorized happy-path check: the overwhelmingly common case is an all-valid reference, + // for which this returns without allocating. Only gather the offending characters when + // at least one is present. + var span = reference.AsSpan(); + if (!span.ContainsAnyExcept(ValidReferenceChars)) + { + return string.Empty; + } + + SortedSet invalid = []; + foreach (var c in span) + { + if (!ValidReferenceChars.Contains(c)) + { + invalid.Add(c); + } + } + + var invalidStrings = new List(invalid.Count); + foreach (var c in invalid) + { + invalidStrings.Add($"'{c}'"); + } + + return string.Join(", ", invalidStrings); + } + private static DockerReference CreateDockerReference(Reference options) { return DockerReference.CreateDockerReference(options.Repository, options.Domain, options.Digest, options.Tag); diff --git a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs index 902e80b9d..f1bf880de 100644 --- a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs +++ b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs @@ -293,9 +293,19 @@ public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens() [TestMethod] public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens() { + // A token wrapped in matching '#' (e.g. #imageTag#) is treated as an unresolved template + // variable and skipped silently rather than reported as an invalid character. DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue(); } + [TestMethod] + public void HasUnresolvedVariables_ReturnsTrueForExclamationDelimitedTokens() + { + // A token wrapped in matching '!' (e.g. !imageTag!) is treated as an unresolved template + // variable and skipped silently rather than reported as an invalid character. + DockerReferenceUtility.HasUnresolvedVariables("!cs_containerRegistryLoginServerUrl!/coreservicesaksservice_!cs_aks_workloadName!/!serviceName!:!imageTag!").Should().BeTrue(); + } + [TestMethod] public void HasUnresolvedVariables_ReturnsFalseForPlainReference() { @@ -327,7 +337,13 @@ public void TryParseImageReference_ReturnsNullForHashDelimitedTokens() } [TestMethod] - public void TryParseImageReference_DoesNotLogWarningForTemplatedReference() + public void TryParseImageReference_ReturnsNullForExclamationDelimitedTokens() + { + DockerReferenceUtility.TryParseImageReference("!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!").Should().BeNull(); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForTemplatedReference() { var logger = new Mock(); @@ -336,12 +352,94 @@ public void TryParseImageReference_DoesNotLogWarningForTemplatedReference() result.Should().BeNull(); logger.Verify( l => l.Log( - It.IsAny(), + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForHashDelimitedTokens() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference( + "#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#", + logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForExclamationDelimitedTokens() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference( + "!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!", + logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>()), - Times.Never); + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_ReturnsNullForExclamationCharacter() + { + DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest").Should().BeNull(); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForExclamationCharacter() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest", logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForInvalidCharacterInTag() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference("mcr.microsoft.com/dotnet/sdk:8.0#preview", logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); } [TestMethod]