From 62bf783fb2b25ffea9fecfb2ae2dc70bb7f3161f Mon Sep 17 00:00:00 2001 From: Joe Mayo Date: Mon, 27 Apr 2026 17:15:19 -0700 Subject: [PATCH 1/4] Improve archive entry filename chars to allow unicode chars - add `PaArchivePath.IsValidSegment`, `TryValidateSegment`, `TryMakeValidSegment` - remove unneeded `IMsappArchive` - remove `MsappArchive.TryMakeSafeForEntryPathSegment` and `IsSafeForEntryPathSegment` as these are moved to `PaArchivePath`. - added additional tests and cases to cover allowing more characters Co-authored-by: Copilot --- .../Compression/PaArchivePathTests.cs | 147 ++++++++++- .../MsApp/MsappArchiveTests.cs | 118 --------- src/Persistence/Compression/PaArchivePath.cs | 234 +++++++++++++++++- .../Compression/PaArchivePathInvalidReason.cs | 9 +- src/Persistence/MsApp/IMsappArchive.cs | 41 --- src/Persistence/MsApp/MsappArchive.cs | 69 +----- 6 files changed, 378 insertions(+), 240 deletions(-) delete mode 100644 src/Persistence/MsApp/IMsappArchive.cs diff --git a/src/Persistence.Tests/Compression/PaArchivePathTests.cs b/src/Persistence.Tests/Compression/PaArchivePathTests.cs index ec23ab77..f12752ce 100644 --- a/src/Persistence.Tests/Compression/PaArchivePathTests.cs +++ b/src/Persistence.Tests/Compression/PaArchivePathTests.cs @@ -161,12 +161,13 @@ public void ValidDirectoryPathTest(string fullPath, string expectedRelativePathW [DataRow(@"dir1\file3 \", PaArchivePathInvalidReason.SegmentWithLeadingOrTrailingWhitespace)] [DataRow(@"dir1\ file3 ", PaArchivePathInvalidReason.SegmentWithLeadingOrTrailingWhitespace)] // Relative directory segments are illegal (See: Zip Path Traversal vulnerability) - [DataRow(@"parent\..\dir", PaArchivePathInvalidReason.IllegalSegment)] - [DataRow(@"malicious\..\..\..\path", PaArchivePathInvalidReason.IllegalSegment)] - [DataRow("""..\..\..\Windows\System32\drivers\etc\hosts""", PaArchivePathInvalidReason.IllegalSegment)] - [DataRow(@"./current/dir", PaArchivePathInvalidReason.IllegalSegment)] - [DataRow(@"current/./dir", PaArchivePathInvalidReason.IllegalSegment)] - [DataRow(@"illegal/win/filename.", PaArchivePathInvalidReason.IllegalSegment)] // Windows: cannot end with a period + [DataRow(@"parent\..\dir", PaArchivePathInvalidReason.RelativeSegment)] + [DataRow(@"malicious\..\..\..\path", PaArchivePathInvalidReason.RelativeSegment)] + [DataRow("""..\..\..\Windows\System32\drivers\etc\hosts""", PaArchivePathInvalidReason.RelativeSegment)] + [DataRow(@"./current/dir", PaArchivePathInvalidReason.RelativeSegment)] + [DataRow(@"current/./dir", PaArchivePathInvalidReason.RelativeSegment)] + [DataRow(@"illegal/win/filename.", PaArchivePathInvalidReason.SegmentEndsWithDot)] // Windows: cannot end with a period + [DataRow(@"illegal/win/segment./filename", PaArchivePathInvalidReason.SegmentEndsWithDot)] // Windows: cannot end with a period // extended-length paths are not allowed: [DataRow("""\\?\some\long\file\path""", PaArchivePathInvalidReason.InvalidPathChars)] public void InvalidPathTest(string fullName, PaArchivePathInvalidReason expectedReason) @@ -189,6 +190,9 @@ public void GetInvalidPathCharsTest() invalidChars.Should().NotBeEmpty(); invalidChars.Should().Contain(Path.GetInvalidPathChars(), "because all chars returned by Path.GetInvalidPathChars for the current platform should be included"); invalidChars.Should().Contain(Path.GetInvalidFileNameChars().Where(c => !PaArchivePath.GetAllSeparatorChars().Contains(c)), "because all chars returned by Path.GetInvalidFileNameChars (except for separator chars) for the current platform should be included"); + invalidChars.Should().Contain('\b', "BS char should not be allowed"); + invalidChars.Should().Contain('\t', "HT char should not be allowed"); + invalidChars.Should().Contain('\x007F', "DEL char should not be allowed, even though the Path.GetInvalidPathChars doesn't have it"); PaArchivePath.GetInvalidPathChars().Should().NotBeSameAs(invalidChars, "because we should be returning a new array instance each time to prevent callers from modifying the cached array"); } @@ -202,6 +206,137 @@ public void GetAllSeparatorCharsTest() PaArchivePath.GetAllSeparatorChars().Should().NotBeSameAs(allSeparatorChars, "because we should be returning a new array instance each time to prevent callers from modifying the cached array"); } + [TestMethod] + public void GetInvalidSegmentCharsTest() + { + var invalidSegmentChars = PaArchivePath.GetInvalidSegmentChars(); + invalidSegmentChars.Should().NotBeEmpty(); + invalidSegmentChars.Should().Contain(PaArchivePath.GetInvalidPathChars(), "because all chars from GetInvalidPathChars should be included"); + invalidSegmentChars.Should().Contain(PaArchivePath.GetAllSeparatorChars(), "because separator chars delimit segments and are therefore not valid within a segment"); + invalidSegmentChars.Should().Contain(Path.GetInvalidPathChars(), "because all chars returned by Path.GetInvalidPathChars for the current platform should be included"); + invalidSegmentChars.Should().Contain(Path.GetInvalidFileNameChars(), "because all chars returned by Path.GetInvalidFileNameChars for the current platform should be included"); + PaArchivePath.GetInvalidSegmentChars().Should().NotBeSameAs(invalidSegmentChars, "because we should be returning a new array instance each time to prevent callers from modifying the cached array"); + } + + [TestMethod] + // Valid segments + [DataRow("file.txt")] + [DataRow("SomeName")] + [DataRow(".hidden")] + [DataRow("some-dir_with555.common.chars")] + // Whitespace only + [DataRow("", PaArchivePathInvalidReason.WhitespaceOnlySegment)] + [DataRow(" ", PaArchivePathInvalidReason.WhitespaceOnlySegment)] + [DataRow(" ", PaArchivePathInvalidReason.WhitespaceOnlySegment)] + [DataRow("\t", PaArchivePathInvalidReason.WhitespaceOnlySegment)] + // Invalid chars (including separator chars) + [DataRow("foo|", null)] + // Illegal segments + [DataRow(".", null)] + [DataRow("..", null)] + [DataRow("...", null)] + [DataRow(" . ", null)] + [DataRow(" .. ", null)] + [DataRow(" ... ", null)] + [DataRow("ends-with-dot.", "ends-with-dot")] + [DataRow("ends-with-dots...", "ends-with-dots")] + public void TryMakeValidSegmentTest(string segment, string? expectedValidSegment) + { + PaArchivePath.TryMakeValidSegment(segment, out var validSegment) + .Should().Be(expectedValidSegment is not null); + validSegment.Should().Be(expectedValidSegment); + } + + [TestMethod] + // Already valid (returned as-is) + [DataRow("file.txt", "file.txt")] + [DataRow(".hidden", ".hidden")] + [DataRow("foo- -bar.txt", "foo- -bar.txt")] + [DataRow("foo-éñü-bar.txt", "foo-éñü-bar.txt")] + [DataRow("foo-あア-bar.txt", "foo-あア-bar.txt")] + [DataRow("foo-中文-bar.txt", "foo-中文-bar.txt")] + // Invalid chars are replaced + [DataRow("foo|", "___")] + // Illegal segments + [DataRow(".", "_")] + [DataRow("..", "._")] + [DataRow("...", ".._")] + [DataRow(" . ", "_._")] + [DataRow(" .. ", "_.._")] + [DataRow(" ... ", "_..._")] + [DataRow("ends-with-dot.", "ends-with-dot_")] + [DataRow("ends-with-dots...", "ends-with-dots.._")] + public void TryMakeValidSegment_WithReplacementCharTest(string segment, string? expectedValidSegment) + { + PaArchivePath.TryMakeValidSegment(segment, out var validSegment, invalidCharReplacement: "_") + .Should().Be(expectedValidSegment is not null); + validSegment.Should().Be(expectedValidSegment); + } [TestMethod] public void GetHashCodeTest() { diff --git a/src/Persistence.Tests/MsApp/MsappArchiveTests.cs b/src/Persistence.Tests/MsApp/MsappArchiveTests.cs index a1d109f9..c9edf9af 100644 --- a/src/Persistence.Tests/MsApp/MsappArchiveTests.cs +++ b/src/Persistence.Tests/MsApp/MsappArchiveTests.cs @@ -133,122 +133,4 @@ private static void SaveNewMinMsappWithHeaderOnly(MemoryStream archiveStream, st var headerFilePath = Path.Combine("_TestData", "headers", headerFileName); writeToArchive.CreateEntryFromFile(headerFilePath, "Header.json"); } - - [TestMethod] - [DataRow(":%/\\?!", false, DisplayName = "Unsafe chars only")] - [DataRow(" :%/\\ ?! ", false, DisplayName = "Unsafe and whitespace chars only")] - [DataRow("", false, DisplayName = "empty string")] - [DataRow(" ", false, DisplayName = "whitespace chars only")] - [DataRow("Foo.Bar", true, "Foo.Bar")] - [DataRow(" Foo Bar ", true, "Foo Bar", DisplayName = "with leading/trailing whitespace")] - [DataRow("Foo:%/\\-?!Bar", true, "Foo-Bar")] - public void TryMakeSafeForEntryPathSegmentWithDefaultReplacementTests(string unsafeName, bool expectedReturn, string? expectedSafeName = null) - { - MsappArchive.TryMakeSafeForEntryPathSegment(unsafeName, out var safeName).Should().Be(expectedReturn); - if (expectedReturn) - { - safeName.ShouldNotBeNull(); - if (expectedSafeName != null) - { - safeName.Should().Be(expectedSafeName); - } - } - else - { - safeName.Should().BeNull(); - } - } - - [TestMethod] - [DataRow(":%/\\?!", true, "______", DisplayName = "Unsafe chars only")] - [DataRow(" :%/\\ ?! ", true, "____ __", DisplayName = "Unsafe and whitespace chars only")] - [DataRow("", false, DisplayName = "empty string")] - [DataRow(" ", false, DisplayName = "whitespace chars only")] - [DataRow("Foo.Bar", true, "Foo.Bar")] - [DataRow(" Foo Bar ", true, "Foo Bar", DisplayName = "with leading/trailing whitespace")] - [DataRow("Foo:%/\\-?!Bar", true, "Foo____-__Bar")] - public void TryMakeSafeForEntryPathSegmentWithUnderscoreReplacementTests(string unsafeName, bool expectedReturn, string? expectedSafeName = null) - { - MsappArchive.TryMakeSafeForEntryPathSegment(unsafeName, out var safeName, unsafeCharReplacementText: "_").Should().Be(expectedReturn); - if (expectedReturn) - { - safeName.ShouldNotBeNull(); - if (expectedSafeName != null) - { - safeName.Should().Be(expectedSafeName); - } - } - else - { - safeName.Should().BeNull(); - } - } - - [TestMethod] - public void TryMakeSafeForEntryPathSegmentWhereInputContainsPathSeparatorCharsTests() - { - MsappArchive.TryMakeSafeForEntryPathSegment("Foo\\Bar.pa.yaml", out var safeName).Should().BeTrue(); - safeName.Should().Be("FooBar.pa.yaml"); - MsappArchive.TryMakeSafeForEntryPathSegment("Foo/Bar.pa.yaml", out safeName).Should().BeTrue(); - safeName.Should().Be("FooBar.pa.yaml"); - - // with replacement - MsappArchive.TryMakeSafeForEntryPathSegment("Foo\\Bar.pa.yaml", out safeName, unsafeCharReplacementText: "_").Should().BeTrue(); - safeName.Should().Be("Foo_Bar.pa.yaml"); - MsappArchive.TryMakeSafeForEntryPathSegment("Foo/Bar.pa.yaml", out safeName, unsafeCharReplacementText: "-").Should().BeTrue(); - safeName.Should().Be("Foo-Bar.pa.yaml"); - } - - [TestMethod] - public void TryMakeSafeForEntryPathSegmentWhereInputContainsInvalidPathCharTests() - { - var invalidChars = Path.GetInvalidPathChars() - .Union(Path.GetInvalidFileNameChars()); - foreach (var c in invalidChars) - { - // Default behavior should remove invalid chars - MsappArchive.TryMakeSafeForEntryPathSegment($"Foo{c}Bar.pa.yaml", out var safeName).Should().BeTrue(); - safeName.Should().Be("FooBar.pa.yaml"); - - // Replacement char should be used for invalid chars - MsappArchive.TryMakeSafeForEntryPathSegment($"Foo{c}Bar.pa.yaml", out safeName, unsafeCharReplacementText: "_").Should().BeTrue(); - safeName.Should().Be("Foo_Bar.pa.yaml"); - - // When input results in only whitespace or empty, return value should be false - MsappArchive.TryMakeSafeForEntryPathSegment($"{c}", out _).Should().BeFalse("because safe segment is empty string"); - MsappArchive.TryMakeSafeForEntryPathSegment($" {c} ", out _).Should().BeFalse("because safe segment is whitespace"); - MsappArchive.TryMakeSafeForEntryPathSegment($"{c} {c}", out _).Should().BeFalse("because safe segment is whitespace"); - } - } - - [TestMethod] - public void IsSafeForEntryPathSegmentTests() - { - MsappArchive.IsSafeForEntryPathSegment("Foo.pa.yaml").Should().BeTrue(); - - // Path separator chars should not be used for path segments - MsappArchive.IsSafeForEntryPathSegment("Foo/Bar.pa.yaml").Should().BeFalse("separator chars should not be used for path segments"); - MsappArchive.IsSafeForEntryPathSegment("/Foo.pa.yaml").Should().BeFalse("separator chars should not be used for path segments"); - MsappArchive.IsSafeForEntryPathSegment("Foo\\Bar.pa.yaml").Should().BeFalse("separator chars should not be used for path segments"); - MsappArchive.IsSafeForEntryPathSegment("\\Foo.pa.yaml").Should().BeFalse("separator chars should not be used for path segments"); - - MsappArchive.IsSafeForEntryPathSegment("Foo/\t.pa.yaml").Should().BeFalse("control chars should not be allowed"); - - // Currently, chars outside of ascii range are not allowed - MsappArchive.IsSafeForEntryPathSegment("Foo/éñü.pa.yaml").Should().BeFalse("latin chars are currently not allowed"); - MsappArchive.IsSafeForEntryPathSegment("Foo/あア.pa.yaml").Should().BeFalse("Japanese chars are currently not allowed"); - MsappArchive.IsSafeForEntryPathSegment("Foo/中文.pa.yaml").Should().BeFalse("CJK chars are currently not allowed"); - } - - [TestMethod] - public void IsSafeForEntryPathSegmentShouldNotAllowInvalidPathCharsTests() - { - var invalidChars = Path.GetInvalidPathChars() - .Union(Path.GetInvalidFileNameChars()); - - foreach (var c in invalidChars) - { - MsappArchive.IsSafeForEntryPathSegment($"Foo{c}Bar.pa.yaml").Should().BeFalse($"Invalid char '{c}' should not be allowed for path segments"); - } - } } diff --git a/src/Persistence/Compression/PaArchivePath.cs b/src/Persistence/Compression/PaArchivePath.cs index aa28c42b..a6771a16 100644 --- a/src/Persistence/Compression/PaArchivePath.cs +++ b/src/Persistence/Compression/PaArchivePath.cs @@ -45,12 +45,13 @@ public partial class PaArchivePath : IEquatable, IEquatable public static char[] GetInvalidPathChars() => [ - // ASCII Control Chars: + // ASCII Control Chars (0x00-0x1F, 0x7F): '\0', // NULL (char)1, (char)2, (char)3, (char)4, (char)5, (char)6, (char)7, (char)8, (char)9, (char)10, (char)11, (char)12, (char)13, (char)14, (char)15, (char)16, (char)17, (char)18, (char)19, (char)20, (char)21, (char)22, (char)23, (char)24, (char)25, (char)26, (char)27, (char)28, (char)29, (char)30, (char)31, + (char)127, // DEL // non-control chars not allowed in Windows filenames are added here: // We are explicitly only wanting to support valid relative paths, so we don't allow ':' for drive letters etc. @@ -58,11 +59,20 @@ public static char[] GetInvalidPathChars() => [ ':', '*', '?', ]; + /// + /// Characters that are invalid for use within a single path segment (file or directory name) in a . + /// + public static char[] GetInvalidSegmentChars() => [.. GetInvalidPathChars(), .. GetAllSeparatorChars()]; + #if NET8_0_OR_GREATER private static readonly System.Buffers.SearchValues _invalidPathChars = System.Buffers.SearchValues.Create(GetInvalidPathChars()); + private static readonly System.Buffers.SearchValues _invalidSegmentChars = System.Buffers.SearchValues.Create(GetInvalidSegmentChars()); #else private static readonly char[] _invalidPathChars = GetInvalidPathChars(); + private static readonly char[] _invalidSegmentChars = GetInvalidSegmentChars(); #endif + private static readonly char[] _whitespaceChars = [' ', '\t', '\n', '\v', '\f', '\r', '\u00A0', '\u0085']; + private static readonly char[] _whitespaceAndPeriodChars = [.. _whitespaceChars, '.']; /// /// Creates a new instance. @@ -228,7 +238,8 @@ public static string GetInvalidReasonExceptionMessage(PaArchivePathInvalidReason PaArchivePathInvalidReason.InvalidPathChars => $"The path contains invalid characters. Example invalid chars are ASCII control characters and {string.Join(string.Empty, GetInvalidPathChars().Where(static c => !Char.IsControl(c)))}.", PaArchivePathInvalidReason.WhitespaceOnlySegment => "The path contains a segment that is whitespace only, which is not allowed.", PaArchivePathInvalidReason.SegmentWithLeadingOrTrailingWhitespace => "The path contains a segment with leading or trailing whitespace, which is not allowed.", - PaArchivePathInvalidReason.IllegalSegment => "The path contains an illegal segment (e.g. '.' or '..'), which is not allowed.", + PaArchivePathInvalidReason.RelativeSegment => "The path contains an illegal relative segment (e.g. '.' or '..'), which is not allowed.", + PaArchivePathInvalidReason.SegmentEndsWithDot => "The path contains a segment that ends with a period ('.'), which is not allowed.", _ => $"The path is invalid. (Reason: {reason})" }; } @@ -339,17 +350,220 @@ private static bool TryValidatePath( { foreach (var segment in segments) { - if (string.IsNullOrWhiteSpace(segment)) - return PaArchivePathInvalidReason.WhitespaceOnlySegment; - if (segment == ".." || segment == "." - // In Windows, a filename cannot end with a period - || segment[^1] == '.') - return PaArchivePathInvalidReason.IllegalSegment; - if (segment.Trim().Length != segment.Length) - return PaArchivePathInvalidReason.SegmentWithLeadingOrTrailingWhitespace; + if (!TryValidateSegment(segment, out var reason2)) + return reason2; } return null; } } + + /// + /// Determines whether a single path segment (file or directory name) is valid for use in a . + /// + /// The path segment to validate. + /// true when the segment is valid; otherwise false. + public static bool IsValidSegment(string segment) + { + return TryValidateSegment(segment, out _); + } + + /// + /// Validates that a single path segment (file or directory name) is valid for use in a . + /// + /// The path segment to validate. + /// When this method returns false, contains the reason why the segment is invalid; otherwise null. + /// true when the segment is valid; otherwise false. + public static bool TryValidateSegment(string segment, [NotNullWhen(false)] out PaArchivePathInvalidReason? reason) + { + ThrowIfNull(segment); + + return TryValidateSegment(segment.AsSpan(), out reason); + } + + /// + /// Validates that a single path segment (file or directory name) is valid for use in a . + /// + /// The path segment to validate. + /// When this method returns false, contains the reason why the segment is invalid; otherwise null. + /// true when the segment is valid; otherwise false. + public static bool TryValidateSegment(ReadOnlySpan segment, [NotNullWhen(false)] out PaArchivePathInvalidReason? reason) + { + if (segment.IsWhiteSpace()) + { + reason = PaArchivePathInvalidReason.WhitespaceOnlySegment; + return false; + } + + if (segment.IndexOfAny(_invalidSegmentChars) >= 0) + { + reason = PaArchivePathInvalidReason.InvalidPathChars; + return false; + } + + if (segment.Trim().Length != segment.Length) + { + reason = PaArchivePathInvalidReason.SegmentWithLeadingOrTrailingWhitespace; + return false; + } + + // Detect relative segments + if (segment.SequenceEqual("..".AsSpan()) || segment.SequenceEqual(".".AsSpan())) + { + reason = PaArchivePathInvalidReason.RelativeSegment; + return false; + } + + // In Windows, a filename cannot end with a period + if (segment[^1] == '.') + { + reason = PaArchivePathInvalidReason.SegmentEndsWithDot; + return false; + } + + // REVIEW: Should we also limit path segments to 255 chars long? + + reason = null; + return true; + } + + /// + /// Attempts to make a path segment valid for use in a by removing invalid chars or problematic segments. + /// + /// The segment to make valid. + /// When this method returns true, contains the valid segment. + /// true when the segment was converted to a valid segment; otherwise false. + public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out string? validSegment) + { + ThrowIfNull(segment); + + return TryMakeValidSegmentCore(segment, out validSegment, createProposedSegment: (segmentSpan, buffer) => + { + var writtenLen = 0; + foreach (var c in segmentSpan) + { + if (IsValidSegmentChar(c)) + buffer[writtenLen++] = c; + } + + // Slice to written length, then Trim returns another slice (no allocation) + var proposed = ((ReadOnlySpan)buffer[..writtenLen]) + .TrimStart() + .TrimEnd(_whitespaceAndPeriodChars); + return proposed; + }); + } + + /// + /// Attempts to make a path segment valid for use in a by replacing invalid characters. + /// + /// The segment to make valid. + /// When this method returns true, contains the valid segment. + /// Invalid characters will be replaced with this string. null or empty string effectively just removes chars. + /// true when the segment was converted to a valid segment; otherwise false. + public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out string? validSegment, string? invalidCharReplacement) + { + ThrowIfNull(segment); + + if (StringTfmAdapter.IsNullOrEmpty(invalidCharReplacement)) + return TryMakeValidSegment(segment, out validSegment); + if (invalidCharReplacement.Length > 1) + throw new ArgumentException("Replacement text must be at most one character.", nameof(invalidCharReplacement)); + + var replacementChar = invalidCharReplacement[0]; + if (char.IsWhiteSpace(replacementChar)) + throw new ArgumentException("Replacement must not be a whitespace character.", nameof(invalidCharReplacement)); + if (IsInvalidSegmentChar(replacementChar)) + throw new ArgumentException("Replacement must be a valid segment character.", nameof(invalidCharReplacement)); + if (replacementChar == '.') + throw new ArgumentException("Replacement must not be a period ('.').", nameof(invalidCharReplacement)); + + return TryMakeValidSegmentCore(segment, out validSegment, createProposedSegment: (segmentSpan, buffer) => + { + // We know we'll have a string that's the same length as the output + // Set now, so we'll get some runtime validation if indexes get out of range. + var segmentLen = segmentSpan.Length; + buffer = buffer[..segmentLen]; + + // Replace trailing whitespace with replacementChar + var firstTrailingWhitespaceIdx = segmentLen; + for (int i = segmentLen - 1; i >= 0 && char.IsWhiteSpace(segmentSpan[i]); i--) + { + buffer[i] = replacementChar; + firstTrailingWhitespaceIdx = i; + } + + var lastLeadingWhitespaceIdx = -1; + for (int i = 0; i < firstTrailingWhitespaceIdx && char.IsWhiteSpace(segmentSpan[i]); i++) + { + buffer[i] = replacementChar; + lastLeadingWhitespaceIdx = i; + } + + // replace invalid chars not covered by leading/trailing whitespace + for (int i = lastLeadingWhitespaceIdx + 1; i < firstTrailingWhitespaceIdx; i++) + { + var c = segmentSpan[i]; + buffer[i] = IsInvalidSegmentChar(c) ? replacementChar : c; + } + + // Handle segment ending with a '.' + if (segmentSpan[^1] == '.') + buffer[^1] = replacementChar; + + // return the full buffer + return buffer; + }); + } + + private delegate ReadOnlySpan CreateProposedSegment(ReadOnlySpan segmentSpan, Span buffer); + + private static bool TryMakeValidSegmentCore(string segment, [NotNullWhen(true)] out string? validSegment, CreateProposedSegment createProposedSegment) + { + var segmentSpan = segment.AsSpan(); + if (TryValidateSegment(segmentSpan, out _)) + { + validSegment = segment; + return true; + } + + // Trivially handle when input segment is empty, as we can't fix it + if (segmentSpan.Length == 0) + { + validSegment = null; + return false; + } + + // Filter invalid chars into a stack buffer (fall back to pooled array for large inputs) + const int StackThreshold = 256; + char[]? rented = null; + Span buffer = segmentSpan.Length <= StackThreshold + ? stackalloc char[StackThreshold] + : (rented = System.Buffers.ArrayPool.Shared.Rent(segmentSpan.Length)); + + try + { + var proposed = createProposedSegment(segmentSpan, buffer); + + // If we're left with invalid segment still, then don't return anything + if (TryValidateSegment(proposed, out _)) + { + validSegment = proposed.ToString(); + return true; + } + else + { + validSegment = null; + return false; + } + } + finally + { + if (rented is not null) + System.Buffers.ArrayPool.Shared.Return(rented); + } + } + + private static bool IsValidSegmentChar(char c) => !_invalidSegmentChars.Contains(c); + private static bool IsInvalidSegmentChar(char c) => _invalidSegmentChars.Contains(c); } diff --git a/src/Persistence/Compression/PaArchivePathInvalidReason.cs b/src/Persistence/Compression/PaArchivePathInvalidReason.cs index ed03cdd8..c80589ac 100644 --- a/src/Persistence/Compression/PaArchivePathInvalidReason.cs +++ b/src/Persistence/Compression/PaArchivePathInvalidReason.cs @@ -12,5 +12,12 @@ public enum PaArchivePathInvalidReason InvalidPathChars, WhitespaceOnlySegment, SegmentWithLeadingOrTrailingWhitespace, - IllegalSegment, + /// + /// Relative segments like "." or ".." are not allowed, as they can lead to confusion and potential security issues when extracting archives. + /// + RelativeSegment, + /// + /// On Windows, filenames cannot end with ".". So we exclude on all platforms. + /// + SegmentEndsWithDot, } diff --git a/src/Persistence/MsApp/IMsappArchive.cs b/src/Persistence/MsApp/IMsappArchive.cs deleted file mode 100644 index 4d8bbedf..00000000 --- a/src/Persistence/MsApp/IMsappArchive.cs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -using Microsoft.PowerPlatform.PowerApps.Persistence.Compression; -using System.Diagnostics.CodeAnalysis; -using System.IO.Compression; - -namespace Microsoft.PowerPlatform.PowerApps.Persistence.MsApp; - -/// -/// base interface for MsappArchive -/// -public interface IMsappArchive : IPaArchive, IDisposable -{ - Version MSAppStructureVersion { get; } - - Version DocVersion { get; } - - /// - /// Provides access to the underlying zip archive. - /// Attention: This property might be removed in the future. - /// - [Obsolete("We shouldn't expose the underlying ZipArchive instance, as modifying it will make this instance inconsistent")] - ZipArchive ZipArchive { get; } - - /// - /// Attempts to generate a unique entry path in the specified directory, based on a starter file name and extension. - /// - /// The directory path for the entry; or null. Note: Directory path is expected to already be safe, as it is expected to not contain customer data. - /// The name of the file, without an extension. This file name should already have been made 'safe' by the caller. If needed, use to make it safe. - /// The file extension to add for the file path or null for no extension. Note: This should already contain only safe chars, as it is expected to not contain customer data. - /// The string added just before the unique file number and extension. Default is empty string. - /// The entry path which is unique in the specified . - /// directory is empty or whitespace. - /// A unique filename entry could not be generated. - string GenerateUniqueEntryPath( - string? directory, - string fileNameNoExtension, - string? extension, - string uniqueSuffixSeparator = ""); -} diff --git a/src/Persistence/MsApp/MsappArchive.cs b/src/Persistence/MsApp/MsappArchive.cs index cbb6a0a4..68b95f7a 100644 --- a/src/Persistence/MsApp/MsappArchive.cs +++ b/src/Persistence/MsApp/MsappArchive.cs @@ -19,20 +19,17 @@ namespace Microsoft.PowerPlatform.PowerApps.Persistence.MsApp; /// Represents a .msapp file. /// /// true to leave the stream open after the System.IO.Compression.ZipArchive object is disposed; otherwise, false -public partial class MsappArchive( +public class MsappArchive( Stream stream, ZipArchiveMode mode, bool leaveOpen = false, ILogger? logger = null) - : PaArchive(stream, mode, leaveOpen, entryNameEncoding: null, logger), IMsappArchive + : PaArchive(stream, mode, leaveOpen, entryNameEncoding: null, logger) { private HeaderJson? _header; private bool _packedJsonLoaded; private PackedJson? _packedJson; - /// - public ZipArchive ZipArchive => InnerZipArchive; - internal HeaderJson Header => _header ??= LoadHeader(); /// @@ -55,13 +52,10 @@ public string GenerateUniqueEntryPath( { var directoryPath = PaArchivePath.AsDirectoryOrRoot(directory); ThrowIfNull(fileNameNoExtension); - if (!IsSafeForEntryPathSegment(fileNameNoExtension)) - { - throw new ArgumentException($"The {nameof(fileNameNoExtension)} must be safe for use as an entry path segment. Prevalidate using {nameof(TryMakeSafeForEntryPathSegment)} first.", nameof(fileNameNoExtension)); - } - if (extension != null && !IsSafeForEntryPathSegment(extension)) + + if (!PaArchivePath.IsValidSegment($"{fileNameNoExtension}{extension}")) { - throw new ArgumentException("The extension can be null, but cannot be empty or whitespace only, and must be a valid entry path segment.", nameof(directory)); + throw new ArgumentException($"The {nameof(fileNameNoExtension)} combined with {nameof(extension)} must be safe for use as an entry path segment. Prevalidate using {nameof(PaArchivePath)}.{nameof(PaArchivePath.TryMakeValidSegment)} first.", nameof(fileNameNoExtension)); } var entryPathPrefix = $"{directoryPath.FullName}{fileNameNoExtension}"; @@ -96,57 +90,4 @@ private HeaderJson LoadHeader() _packedJsonLoaded = true; return _packedJson; } - - /// - /// Regular expression that matches any characters that are unsafe for entry filenames.
- /// Note: we don't allow any sort of directory separator chars for filenames to remove cross-platform issues. - ///
-#if NET7_0_OR_GREATER - [GeneratedRegex("[^a-zA-Z0-9 ._-]")] - private static partial Regex UnsafeFileNameCharactersRegex(); -#else - private static readonly Regex s_unsafeFileNameCharactersRegex = new("[^a-zA-Z0-9 ._-]", RegexOptions.Compiled); - private static Regex UnsafeFileNameCharactersRegex() => s_unsafeFileNameCharactersRegex; -#endif - - /// - /// Makes a user-provided name safe for use as an entry path segment in the archive. - /// After making the name safe, it will be trimmed and empty strings will result in a false return value.
- /// Note: The set of allowed chars is currently more limited than what is actually allowed in a . - ///
- /// An unsafe name which may contain invalid chars for usage in an entry path segment (e.g. directory name or file name). - /// Unsafe characters in the name will be replaced with this string. Default is empty string. - /// true, when was converted to a safe, non-empty string; otherwise, false indicates that input could not be turned into a safe, non-empty string. - public static bool TryMakeSafeForEntryPathSegment( - string unsafeName, - [NotNullWhen(true)] - out string? safeName, - string unsafeCharReplacementText = "") - { - ThrowIfNull(unsafeName); - ThrowIfNull(unsafeCharReplacementText); - - safeName = UnsafeFileNameCharactersRegex() - .Replace(unsafeName, unsafeCharReplacementText) - .Trim() - .EmptyToNull(); - - return safeName != null; - } - - /// - /// Used to verify that a name is safe for use as a single path segment for an entry. - /// Directory separator chars are not allowed in a path segment.
- /// Note: The set of allowed chars is currently more limited than what is actually allowed in a . - ///
- /// The proposed path segment name. - /// false when is null, empty, whitespace only, has leading or trailing whitespace, contains path separator chars or contains any other invalid chars. - public static bool IsSafeForEntryPathSegment(string name) - { - ThrowIfNull(name); - - return !string.IsNullOrWhiteSpace(name) - && !UnsafeFileNameCharactersRegex().IsMatch(name) - && name.Trim().Length == name.Length; // No leading or trailing whitespace - } } From c6010fa3c781a7a8f50fce6af3c2e102dca71f67 Mon Sep 17 00:00:00 2001 From: Joe Mayo Date: Mon, 27 Apr 2026 17:58:50 -0700 Subject: [PATCH 2/4] fix whitespace detection --- src/Persistence/Compression/PaArchivePath.cs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Persistence/Compression/PaArchivePath.cs b/src/Persistence/Compression/PaArchivePath.cs index a6771a16..e83d21af 100644 --- a/src/Persistence/Compression/PaArchivePath.cs +++ b/src/Persistence/Compression/PaArchivePath.cs @@ -71,8 +71,6 @@ public static char[] GetInvalidPathChars() => [ private static readonly char[] _invalidPathChars = GetInvalidPathChars(); private static readonly char[] _invalidSegmentChars = GetInvalidSegmentChars(); #endif - private static readonly char[] _whitespaceChars = [' ', '\t', '\n', '\v', '\f', '\r', '\u00A0', '\u0085']; - private static readonly char[] _whitespaceAndPeriodChars = [.. _whitespaceChars, '.']; /// /// Creates a new instance. @@ -448,9 +446,16 @@ public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out s // Slice to written length, then Trim returns another slice (no allocation) var proposed = ((ReadOnlySpan)buffer[..writtenLen]) - .TrimStart() - .TrimEnd(_whitespaceAndPeriodChars); - return proposed; + .TrimStart(); + + // Trip trailing whitespace and '.' + var lenToKeep = proposed.Length; + for (int i = proposed.Length - 1; i >= 0 && (proposed[i] == '.' || char.IsWhiteSpace(proposed[i])); i--) + { + lenToKeep = i; + } + + return proposed[..lenToKeep]; }); } From a519087e2bcbd16a16b1db3c09b2b316750911e8 Mon Sep 17 00:00:00 2001 From: Joe Mayo Date: Mon, 27 Apr 2026 18:05:01 -0700 Subject: [PATCH 3/4] fix type for 'replacementChar' --- .../Compression/PaArchivePathTests.cs | 2 +- src/Persistence/Compression/PaArchivePath.cs | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/Persistence.Tests/Compression/PaArchivePathTests.cs b/src/Persistence.Tests/Compression/PaArchivePathTests.cs index f12752ce..638be0cf 100644 --- a/src/Persistence.Tests/Compression/PaArchivePathTests.cs +++ b/src/Persistence.Tests/Compression/PaArchivePathTests.cs @@ -333,7 +333,7 @@ public void TryMakeValidSegmentTest(string segment, string? expectedValidSegment [DataRow("ends-with-dots...", "ends-with-dots.._")] public void TryMakeValidSegment_WithReplacementCharTest(string segment, string? expectedValidSegment) { - PaArchivePath.TryMakeValidSegment(segment, out var validSegment, invalidCharReplacement: "_") + PaArchivePath.TryMakeValidSegment(segment, out var validSegment, replacementChar: '_') .Should().Be(expectedValidSegment is not null); validSegment.Should().Be(expectedValidSegment); } diff --git a/src/Persistence/Compression/PaArchivePath.cs b/src/Persistence/Compression/PaArchivePath.cs index e83d21af..af5dabda 100644 --- a/src/Persistence/Compression/PaArchivePath.cs +++ b/src/Persistence/Compression/PaArchivePath.cs @@ -464,24 +464,18 @@ public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out s /// /// The segment to make valid. /// When this method returns true, contains the valid segment. - /// Invalid characters will be replaced with this string. null or empty string effectively just removes chars. + /// Invalid characters will be replaced with this character. /// true when the segment was converted to a valid segment; otherwise false. - public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out string? validSegment, string? invalidCharReplacement) + public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out string? validSegment, char replacementChar) { ThrowIfNull(segment); - if (StringTfmAdapter.IsNullOrEmpty(invalidCharReplacement)) - return TryMakeValidSegment(segment, out validSegment); - if (invalidCharReplacement.Length > 1) - throw new ArgumentException("Replacement text must be at most one character.", nameof(invalidCharReplacement)); - - var replacementChar = invalidCharReplacement[0]; - if (char.IsWhiteSpace(replacementChar)) - throw new ArgumentException("Replacement must not be a whitespace character.", nameof(invalidCharReplacement)); + if (char.IsWhiteSpace((char)replacementChar)) + throw new ArgumentException("Replacement must not be a whitespace character.", nameof(replacementChar)); if (IsInvalidSegmentChar(replacementChar)) - throw new ArgumentException("Replacement must be a valid segment character.", nameof(invalidCharReplacement)); + throw new ArgumentException("Replacement must be a valid segment character.", nameof(replacementChar)); if (replacementChar == '.') - throw new ArgumentException("Replacement must not be a period ('.').", nameof(invalidCharReplacement)); + throw new ArgumentException("Replacement must not be a period ('.').", nameof(replacementChar)); return TryMakeValidSegmentCore(segment, out validSegment, createProposedSegment: (segmentSpan, buffer) => { From 6f1c404579459e0cb6ec2830ec39f6ea1e84487c Mon Sep 17 00:00:00 2001 From: Joe Mayo Date: Tue, 28 Apr 2026 07:17:52 -0700 Subject: [PATCH 4/4] address feedback --- src/Persistence/Compression/PaArchivePath.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Persistence/Compression/PaArchivePath.cs b/src/Persistence/Compression/PaArchivePath.cs index af5dabda..6b791b17 100644 --- a/src/Persistence/Compression/PaArchivePath.cs +++ b/src/Persistence/Compression/PaArchivePath.cs @@ -448,7 +448,7 @@ public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out s var proposed = ((ReadOnlySpan)buffer[..writtenLen]) .TrimStart(); - // Trip trailing whitespace and '.' + // Trim trailing whitespace and '.' var lenToKeep = proposed.Length; for (int i = proposed.Length - 1; i >= 0 && (proposed[i] == '.' || char.IsWhiteSpace(proposed[i])); i--) { @@ -470,7 +470,7 @@ public static bool TryMakeValidSegment(string segment, [NotNullWhen(true)] out s { ThrowIfNull(segment); - if (char.IsWhiteSpace((char)replacementChar)) + if (char.IsWhiteSpace(replacementChar)) throw new ArgumentException("Replacement must not be a whitespace character.", nameof(replacementChar)); if (IsInvalidSegmentChar(replacementChar)) throw new ArgumentException("Replacement must be a valid segment character.", nameof(replacementChar));