From bf47244b279c97309add379d0a4b83927568660d Mon Sep 17 00:00:00 2001 From: Julian Pinzer Date: Fri, 5 Jun 2026 16:00:51 -0400 Subject: [PATCH 1/4] Enhance DockerReferenceUtility to handle invalid characters and update tests for unresolved variables --- .../DockerReference/DockerReferenceUtility.cs | 87 +++++++++++++-- .../DockerReferenceUtilityTests.cs | 104 +++++++++++++++++- 2 files changed, 179 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs index 83eba3413..368341b97 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs @@ -27,6 +27,8 @@ namespace Microsoft.ComponentDetection.Common; using System; +using System.Buffers; +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Text.RegularExpressions; using Microsoft.ComponentDetection.Contracts; @@ -40,20 +42,39 @@ public static class DockerReferenceUtility private const string LEGACYDEFAULTDOMAIN = "index.docker.io"; private const string OFFICIALREPOSITORYNAME = "library"; - // Characters that only appear in an image reference as part of an unresolved templating - // token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR}, - // {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders - // (e.g. #imageTag#) and is never valid in a resolved docker reference. - private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#']; + // Delimiters that only appear in an image reference as part of an unresolved templating + // token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR}, + // {{ .Values.tag }}). These are recognized templating syntaxes that are expected in + // un-rendered manifests, so they are skipped silently rather than reported. A token wrapped + // in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. A + // stray invalid character that is not part of such a token (e.g. a single '#' or '!') is still + // reported via GetInvalidReferenceCharacters. + private static readonly char[] TemplateDelimiters = ['$', '{', '}']; // Matches token-replacement placeholders that wrap an identifier in double underscores, // e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository // name and surface as a noisy parse failure instead of being skipped as a templated value. private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__"); + // Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#, + // #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or + // '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token + // replacement and similar), so it is skipped silently instead of surfacing as a misleading + // docker-reference parse failure. The backreference requires the closing delimiter to match + // the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters. + private static readonly Regex DelimiterWrappedTokenRegex = new(@"([#!])[^#!]+\1"); + + // Every character permitted anywhere in a docker reference per the grammar at the top of this + // file: alphanumerics, the separators '.', '_' and '-', the path separator '/', the tag/port + // and digest separators ':' and '@', and the digest-algorithm separator '+'. Anything else + // (e.g. '#', '!') comes from unsubstituted template tokens and is reported as invalid. + private static readonly SearchValues ValidReferenceChars = SearchValues.Create( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-/:@+"); + /// /// Returns true if the reference contains unresolved variable or templating placeholders, - /// e.g. ${VAR}, {{ .Values.tag }}, #imageTag#, or __IMAGE_TAG__. + /// e.g. ${VAR}, {{ .Values.tag }}, __IMAGE_TAG__, #imageTag#, or + /// !imageTag!. /// Such references are not real, resolvable images, so they should be skipped before calling /// or and treated as /// unresolved values rather than reported as parse failures. @@ -62,11 +83,14 @@ public static class DockerReferenceUtility /// true if the reference contains variable placeholder characters; otherwise false. public static bool HasUnresolvedVariables(string reference) => reference.IndexOfAny(TemplateDelimiters) >= 0 || - DoubleUnderscoreTokenRegex.IsMatch(reference); + DoubleUnderscoreTokenRegex.IsMatch(reference) || + DelimiterWrappedTokenRegex.IsMatch(reference); /// /// Attempts to parse an image reference string into a . - /// Returns null if the reference contains unresolved variables or cannot be parsed. + /// Returns null if the reference contains unresolved variables, contains characters that + /// are not valid in a docker reference, or otherwise cannot be parsed. A warning is logged in + /// every skip/failure case so that references which are not scanned remain visible in logs. /// /// The image reference string to parse. /// Optional logger for recording parse failures. @@ -75,6 +99,19 @@ public static bool HasUnresolvedVariables(string reference) => { if (HasUnresolvedVariables(imageReference)) { + logger?.LogWarning( + "Skipping image reference '{ImageReference}' because it contains one or more unresolved template tokens or variable placeholders.", + imageReference); + return null; + } + + var invalidCharacters = GetInvalidReferenceCharacters(imageReference); + if (invalidCharacters.Length > 0) + { + logger?.LogWarning( + "Skipping image reference '{ImageReference}' because it contains character(s) that are not valid in a docker reference: {InvalidCharacters}", + imageReference, + invalidCharacters); return null; } @@ -92,7 +129,7 @@ public static bool HasUnresolvedVariables(string reference) => /// /// Parses an image reference and registers it with the recorder if valid. /// Skips references with unresolved variables or that cannot be parsed, - /// logging a warning for parse failures so that remaining entries continue to be processed. + /// logging a warning in each skipped case so that remaining entries continue to be processed. /// /// The image reference string to parse. /// The component recorder to register the image with. @@ -244,6 +281,38 @@ public static DockerReference ParseAll(string name) return ParseFamiliarName(name); } + /// + /// Returns the distinct characters in that are not valid in any + /// part of a docker reference (domain, repository, tag, or digest) as a comma-separated string, + /// or an empty string when every character is valid. Characters such as # and ! + /// commonly appear in unsubstituted template tokens and otherwise surface as misleading + /// "must be lowercase" or "invalid reference format" parse errors. + /// + /// The image reference string to inspect. + /// A comma-separated list of invalid characters, or an empty string if there are none. + private static string GetInvalidReferenceCharacters(string reference) + { + // Vectorized happy-path check: the overwhelmingly common case is an all-valid reference, + // for which this returns without allocating. Only gather the offending characters when + // at least one is present. + var span = reference.AsSpan(); + if (!span.ContainsAnyExcept(ValidReferenceChars)) + { + return string.Empty; + } + + SortedSet invalid = []; + foreach (var c in span) + { + if (!ValidReferenceChars.Contains(c)) + { + invalid.Add(c); + } + } + + return string.Join(", ", invalid); + } + private static DockerReference CreateDockerReference(Reference options) { return DockerReference.CreateDockerReference(options.Repository, options.Domain, options.Digest, options.Tag); diff --git a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs index 902e80b9d..f1bf880de 100644 --- a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs +++ b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs @@ -293,9 +293,19 @@ public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens() [TestMethod] public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens() { + // A token wrapped in matching '#' (e.g. #imageTag#) is treated as an unresolved template + // variable and skipped silently rather than reported as an invalid character. DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue(); } + [TestMethod] + public void HasUnresolvedVariables_ReturnsTrueForExclamationDelimitedTokens() + { + // A token wrapped in matching '!' (e.g. !imageTag!) is treated as an unresolved template + // variable and skipped silently rather than reported as an invalid character. + DockerReferenceUtility.HasUnresolvedVariables("!cs_containerRegistryLoginServerUrl!/coreservicesaksservice_!cs_aks_workloadName!/!serviceName!:!imageTag!").Should().BeTrue(); + } + [TestMethod] public void HasUnresolvedVariables_ReturnsFalseForPlainReference() { @@ -327,7 +337,13 @@ public void TryParseImageReference_ReturnsNullForHashDelimitedTokens() } [TestMethod] - public void TryParseImageReference_DoesNotLogWarningForTemplatedReference() + public void TryParseImageReference_ReturnsNullForExclamationDelimitedTokens() + { + DockerReferenceUtility.TryParseImageReference("!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!").Should().BeNull(); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForTemplatedReference() { var logger = new Mock(); @@ -336,12 +352,94 @@ public void TryParseImageReference_DoesNotLogWarningForTemplatedReference() result.Should().BeNull(); logger.Verify( l => l.Log( - It.IsAny(), + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForHashDelimitedTokens() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference( + "#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#", + logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForExclamationDelimitedTokens() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference( + "!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!", + logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>()), - Times.Never); + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_ReturnsNullForExclamationCharacter() + { + DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest").Should().BeNull(); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForExclamationCharacter() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest", logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); + } + + [TestMethod] + public void TryParseImageReference_LogsWarningForInvalidCharacterInTag() + { + var logger = new Mock(); + + var result = DockerReferenceUtility.TryParseImageReference("mcr.microsoft.com/dotnet/sdk:8.0#preview", logger.Object); + + result.Should().BeNull(); + logger.Verify( + l => l.Log( + LogLevel.Warning, + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny>()), + Times.Once); } [TestMethod] From e105781d24d1a20791bf24f67e3b4fda8f85e57e Mon Sep 17 00:00:00 2001 From: Julian Date: Fri, 5 Jun 2026 16:12:26 -0400 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../DockerReference/DockerReferenceUtility.cs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs index 368341b97..98dc059f6 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs @@ -44,11 +44,11 @@ public static class DockerReferenceUtility // Delimiters that only appear in an image reference as part of an unresolved templating // token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR}, - // {{ .Values.tag }}). These are recognized templating syntaxes that are expected in - // un-rendered manifests, so they are skipped silently rather than reported. A token wrapped - // in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. A - // stray invalid character that is not part of such a token (e.g. a single '#' or '!') is still - // reported via GetInvalidReferenceCharacters. + // {{ .Values.tag }}). These are recognized templating syntaxes expected in un-rendered manifests, + // so TryParseImageReference skips them (logging a warning) rather than treating them as invalid. + // A token wrapped in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. + // When no templating token is present, stray invalid characters (e.g. a single '#' or '!') are reported + // via GetInvalidReferenceCharacters. private static readonly char[] TemplateDelimiters = ['$', '{', '}']; // Matches token-replacement placeholders that wrap an identifier in double underscores, @@ -310,8 +310,13 @@ private static string GetInvalidReferenceCharacters(string reference) } } - return string.Join(", ", invalid); - } + var invalidStrings = new List(invalid.Count); + foreach (var c in invalid) + { + invalidStrings.Add($"'{c}'"); + } + + return string.Join(", ", invalidStrings); private static DockerReference CreateDockerReference(Reference options) { From ffe222dc455e0774bf788384c3fd98f98a1b39ed Mon Sep 17 00:00:00 2001 From: Julian Pinzer Date: Fri, 5 Jun 2026 16:15:43 -0400 Subject: [PATCH 3/4] fix missing closing curly brace --- .../DockerReference/DockerReferenceUtility.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs index 98dc059f6..684f4d742 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs @@ -317,6 +317,7 @@ private static string GetInvalidReferenceCharacters(string reference) } return string.Join(", ", invalidStrings); + } private static DockerReference CreateDockerReference(Reference options) { From 397444b3cc8a489361889c6c0fffecd3ed3653c8 Mon Sep 17 00:00:00 2001 From: Julian Date: Fri, 5 Jun 2026 16:16:22 -0400 Subject: [PATCH 4/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../DockerReference/DockerReferenceUtility.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs index 684f4d742..e3558cb6c 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs @@ -59,8 +59,8 @@ public static class DockerReferenceUtility // Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#, // #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or // '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token - // replacement and similar), so it is skipped silently instead of surfacing as a misleading - // docker-reference parse failure. The backreference requires the closing delimiter to match + // replacement and similar), so it is skipped (and may be logged as a warning) instead of + // surfacing as a misleading docker-reference parse failure. The backreference requires the closing delimiter to match // the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters. private static readonly Regex DelimiterWrappedTokenRegex = new(@"([#!])[^#!]+\1");