From 661b4c7cd5c15e0e0b5c4a3965a38394ff9baffd Mon Sep 17 00:00:00 2001 From: Erik Mavrinac Date: Thu, 14 Dec 2023 10:28:04 -0800 Subject: [PATCH] Fix for chained copies within the same task session Previous logic did not understand chaining and sometimes would not copy some multi-hop files depending on parallel I/O ordering. Follow the chain back to the root and copy that forward to the destination in parallel. --- src/Artifacts.UnitTests/RobocopyTests.cs | 267 +++++++++++++++++- src/Artifacts/FileSystem.cs | 6 + src/Artifacts/Tasks/Robocopy.cs | 232 +++++++++------ src/Artifacts/Tasks/RobocopyMetadata.cs | 4 +- .../build/Microsoft.Build.Artifacts.targets | 4 +- src/TestShared/MSBuildSdkTestBase.cs | 2 +- 6 files changed, 418 insertions(+), 97 deletions(-) diff --git a/src/Artifacts.UnitTests/RobocopyTests.cs b/src/Artifacts.UnitTests/RobocopyTests.cs index f1a7eeb..45ec77a 100644 --- a/src/Artifacts.UnitTests/RobocopyTests.cs +++ b/src/Artifacts.UnitTests/RobocopyTests.cs @@ -21,16 +21,6 @@ namespace Microsoft.Build.Artifacts.UnitTests { public class RobocopyTests : MSBuildSdkTestBase { - [Fact] - public void DedupKeyOsDifferences() - { - var lowercase = new Robocopy.CopyFileDedupKey("foo", "bar"); - var uppercase = new Robocopy.CopyFileDedupKey("FOO", "BAR"); - Robocopy.CopyFileDedupKey.ComparerInstance.Equals(lowercase, uppercase).ShouldBe(IsWindows); - (Robocopy.CopyFileDedupKey.ComparerInstance.GetHashCode(lowercase) == - Robocopy.CopyFileDedupKey.ComparerInstance.GetHashCode(uppercase)).ShouldBe(IsWindows); - } - [Fact] public void NonRecursiveWildcards() { @@ -296,9 +286,10 @@ public void DuplicatedItemsShouldResultInOneCopy() }; copyArtifacts.Execute().ShouldBeTrue(buildEngine.GetConsoleLog()); + string consoleLog = buildEngine.GetConsoleLog(LoggerVerbosity.Diagnostic); copyArtifacts.NumFilesCopied.ShouldBe(1); copyArtifacts.NumErrors.ShouldBe(0); - copyArtifacts.NumFilesSkipped.ShouldBe(0); + copyArtifacts.NumFilesSkipped.ShouldBe(0, consoleLog); copyArtifacts.NumDuplicateDestinationDelayedJobs.ShouldBe(0); fs.NumCloneFileCalls.ShouldBe(1); fs.NumCopyFileCalls.ShouldBe(1); @@ -434,12 +425,264 @@ public void DifferentSourcesSameDestinationShouldRunDuplicatesSeparately() fs.NumCloneFileCalls.ShouldBe(3); } + [Theory] + [InlineData("*", 10)] + [InlineData("*.txt", 10)] + [InlineData("*", 100)] + [InlineData("*.txt", 100)] + [InlineData("*", 1000)] + [InlineData("*.txt", 1000)] + public void SingleAndLongChainCopiesParallelCopyOriginalSourceFile(string match, int longChainLength) + { + string longChainFileName = $"chain{longChainLength}.txt"; + DirectoryInfo source = CreateFiles( + "source", + "chain1.txt", + "chain2.txt", + longChainFileName); + + DirectoryInfo preexistingDest = CreateFiles( + "preexistingDest", + "preexistingDest1.txt", + "differentExtension.other", + Path.Combine("preexistingDestSubdir", "preexistingDestSubdir1.txt"), + Path.Combine("preexistingDestSubdir", "preexistingDestSubdir1.excludeme")); + + var destinationDirs = new DirectoryInfo[longChainLength]; + for (int i = 0; i < longChainLength; i++) + { + destinationDirs[i] = new DirectoryInfo(Path.Combine(TestRootPath, $"destination{i + 1}")); + } + + DirectoryInfo wildcardDestination1 = new DirectoryInfo(Path.Combine(TestRootPath, "wildcardDestination1")); + + BuildEngine buildEngine = BuildEngine.Create(); + MockFileSystem fs = new MockFileSystem(); + + // Note that Robocopy semantics are that when the source is a directory it must exist at the start of the call, + // else the item is assumed to be a file and will fail on nonexistence. + // Don't need to test or support chained directory copies where destination dirs don't exist yet. + List sources = new (longChainLength + 7) + { + new MockTaskItem(Path.Combine(source.FullName, "chain1.txt")) + { + ["DestinationFolder"] = destinationDirs[0].FullName, + ["AlwaysCopy"] = "true", // Bypass timestamp check. + }, + new MockTaskItem(Path.Combine(destinationDirs[0].FullName, "chain1.txt")) + { + ["DestinationFolder"] = destinationDirs[1].FullName, + ["AlwaysCopy"] = "true", + }, + + new MockTaskItem(Path.Combine(source.FullName, "chain2.txt")) + { + ["DestinationFolder"] = destinationDirs[0].FullName, + ["AlwaysCopy"] = "true", + }, + new MockTaskItem(Path.Combine(destinationDirs[0].FullName, "chain2.txt")) + { + ["DestinationFolder"] = destinationDirs[1].FullName, + ["AlwaysCopy"] = "true", + }, + new MockTaskItem(Path.Combine(destinationDirs[1].FullName, "chain2.txt")) + { + ["DestinationFolder"] = destinationDirs[2].FullName, + ["AlwaysCopy"] = "true", + }, + + // Case where the source is a pre-existing destination for another copy. + new MockTaskItem(Path.Combine(source.FullName, "chain1.txt")) + { + ["DestinationFolder"] = preexistingDest.FullName, + ["AlwaysCopy"] = "true", // Bypass timestamp check. + }, + new MockTaskItem(preexistingDest.FullName) + { + ["DestinationFolder"] = wildcardDestination1.FullName, + ["FileMatch"] = match, + ["IsRecursive"] = "true", + ["AlwaysCopy"] = "true", + ["FileExclude"] = "*.excludeme", + }, + + new MockTaskItem(Path.Combine(source.FullName, longChainFileName)) + { + ["DestinationFolder"] = destinationDirs[0].FullName, + ["AlwaysCopy"] = "true", + }, + }; + + for (int i = 1; i < longChainLength; i++) + { + sources.Add(new MockTaskItem(Path.Combine(destinationDirs[i - 1].FullName, longChainFileName)) + { + ["DestinationFolder"] = destinationDirs[i].FullName, + ["AlwaysCopy"] = "true", + }); + } + + Robocopy copyArtifacts = new Robocopy + { + BuildEngine = buildEngine, + Sleep = _ => { }, + FileSystem = fs, + Sources = sources.ToArray(), + }; + + bool expectOtherExtensionCopy = match == "*"; + int expectedCopies = longChainLength + (expectOtherExtensionCopy ? 1 : 0) + 9; + copyArtifacts.Execute().ShouldBeTrue(buildEngine.GetConsoleLog()); + string consoleLog = buildEngine.GetConsoleLog(LoggerVerbosity.Diagnostic); + copyArtifacts.NumFilesCopied.ShouldBe(expectedCopies, consoleLog); + copyArtifacts.NumErrors.ShouldBe(0, consoleLog); + copyArtifacts.NumFilesSkipped.ShouldBe(0, consoleLog); + copyArtifacts.NumDuplicateDestinationDelayedJobs.ShouldBe(0, consoleLog); // Every file in the chain should have been copied in parallel + fs.NumCloneFileCalls.ShouldBe(expectedCopies, consoleLog); + + Assert.True(File.Exists(Path.Combine(destinationDirs[0].FullName, "chain1.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[0].FullName, "chain2.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[0].FullName, longChainFileName))); + + Assert.True(File.Exists(Path.Combine(destinationDirs[1].FullName, "chain1.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[1].FullName, "chain2.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[1].FullName, longChainFileName))); + + Assert.False(File.Exists(Path.Combine(destinationDirs[2].FullName, "chain1.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[2].FullName, "chain2.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[2].FullName, longChainFileName))); + + for (int i = 3; i < longChainLength; i++) + { + Assert.False(File.Exists(Path.Combine(destinationDirs[i].FullName, "chain1.txt"))); + Assert.False(File.Exists(Path.Combine(destinationDirs[i].FullName, "chain2.txt"))); + Assert.True(File.Exists(Path.Combine(destinationDirs[i].FullName, longChainFileName))); + } + + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDest1.txt"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(wildcardDestination1.FullName, "differentExtension.other"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.txt"))); + Assert.False(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.excludeme"))); + } + + [Theory] + [InlineData("*")] + [InlineData("*.txt")] + public void ChainedDestinationDirectoryEnumerations(string match) + { + DirectoryInfo source = CreateFiles( + "source", + "source1.txt", + "source2.other", + "source.excludeme.txt", + "zzz_lastsource.txt"); + + DirectoryInfo preexistingDest = CreateFiles( + "preexistingDest", + "preexistingDest1.txt", + "differentExtension.other", + Path.Combine("preexistingDestSubdir", "preexistingDestSubdir1.txt"), + Path.Combine("preexistingDestSubdir", "preexistingDestSubdir1.excludeme.txt")); + + DirectoryInfo wildcardDestination1 = new DirectoryInfo(Path.Combine(TestRootPath, "wildcardDestination1")); + DirectoryInfo preexistingWildcardDestination2 = new DirectoryInfo(Path.Combine(TestRootPath, "preexistingWildcardDestination2")); + preexistingWildcardDestination2.Create(); + DirectoryInfo wildcardDestination3 = new DirectoryInfo(Path.Combine(TestRootPath, "wildcardDestination3")); + + BuildEngine buildEngine = BuildEngine.Create(); + MockFileSystem fs = new MockFileSystem(); + + Robocopy copyArtifacts = new Robocopy + { + BuildEngine = buildEngine, + Sleep = _ => { }, + FileSystem = fs, + Sources = new ITaskItem[] + { + // Note that Robocopy semantics are that when the source is a directory it must exist at the start of the call, + // else the item is assumed to be a file and will fail on nonexistence. + // Don't need to test or support chained directory copies where destination dirs don't exist yet. + + // Case where the source is a pre-existing destination for another copy - should be delayed to after the initial parallel copy wave, + // and all files (copied or pre-existing) should be copied. + new MockTaskItem(source.FullName) + { + ["DestinationFolder"] = preexistingDest.FullName, + ["FileMatch"] = match, + ["AlwaysCopy"] = "true", // Bypass timestamp check. + }, + new MockTaskItem(preexistingDest.FullName) + { + ["DestinationFolder"] = wildcardDestination1.FullName, + ["FileMatch"] = match, + ["AlwaysCopy"] = "true", + ["IsRecursive"] = "true", + ["FileExclude"] = "*.excludeme.*", + }, + new MockTaskItem(preexistingDest.FullName) + { + ["DestinationFolder"] = preexistingWildcardDestination2.FullName, + ["FileMatch"] = match, + ["AlwaysCopy"] = "true", + ["IsRecursive"] = "true", + ["FileExclude"] = "*.excludeme.*", + }, + new MockTaskItem(preexistingWildcardDestination2.FullName) + { + ["DestinationFolder"] = wildcardDestination3.FullName, + ["FileMatch"] = match, + ["AlwaysCopy"] = "true", + ["IsRecursive"] = "false", + ["FileExclude"] = "*.excludeme.*", + }, + }, + }; + + bool expectOtherExtensionCopy = match == "*"; + int expectedCopies = (expectOtherExtensionCopy ? 7 : 0) + 14; + copyArtifacts.Execute().ShouldBeTrue(buildEngine.GetConsoleLog()); + string consoleLog = buildEngine.GetConsoleLog(LoggerVerbosity.Diagnostic); + copyArtifacts.NumFilesCopied.ShouldBe(expectedCopies, consoleLog); + copyArtifacts.NumErrors.ShouldBe(0, consoleLog); + copyArtifacts.NumFilesSkipped.ShouldBe(0, consoleLog); + copyArtifacts.NumDuplicateDestinationDelayedJobs.ShouldBe(0, consoleLog); // Every file in the chain should have been copied in parallel + fs.NumCloneFileCalls.ShouldBe(expectedCopies, consoleLog); + + Assert.True(File.Exists(Path.Combine(preexistingDest.FullName, "source1.txt"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(preexistingDest.FullName, "source2.other"))); + Assert.True(File.Exists(Path.Combine(preexistingDest.FullName, "source.excludeme.txt"))); + Assert.True(File.Exists(Path.Combine(preexistingDest.FullName, "zzz_lastsource.txt"))); + + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "source1.txt"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "zzz_lastsource.txt"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDest1.txt"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(wildcardDestination1.FullName, "differentExtension.other"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(wildcardDestination1.FullName, "source2.other"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.txt"))); + Assert.False(File.Exists(Path.Combine(wildcardDestination1.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.excludeme.txt"))); + + Assert.True(File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "source1.txt"))); + Assert.True(File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "zzz_lastsource.txt"))); + Assert.True(File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "preexistingDest1.txt"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "differentExtension.other"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "source2.other"))); + Assert.True(File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.txt"))); + Assert.False(File.Exists(Path.Combine(preexistingWildcardDestination2.FullName, "preexistingDestSubdir", "preexistingDestSubdir1.excludeme.txt"))); + + Assert.True(File.Exists(Path.Combine(wildcardDestination3.FullName, "source1.txt"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination3.FullName, "zzz_lastsource.txt"))); + Assert.True(File.Exists(Path.Combine(wildcardDestination3.FullName, "preexistingDest1.txt"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(wildcardDestination3.FullName, "differentExtension.other"))); + Assert.Equal(expectOtherExtensionCopy, File.Exists(Path.Combine(wildcardDestination3.FullName, "source2.other"))); + Assert.False(Directory.Exists(Path.Combine(wildcardDestination3.FullName, "preexistingDestSubdir")), consoleLog); + } + [Fact] public void CoWSuccessDoesNotCopy() { DirectoryInfo source = CreateFiles( "source", - @"foo.txt"); + "foo.txt"); DirectoryInfo destination = new DirectoryInfo(Path.Combine(TestRootPath, "destination")); BuildEngine buildEngine = BuildEngine.Create(); diff --git a/src/Artifacts/FileSystem.cs b/src/Artifacts/FileSystem.cs index b1fce68..026ddbe 100644 --- a/src/Artifacts/FileSystem.cs +++ b/src/Artifacts/FileSystem.cs @@ -8,6 +8,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Text.RegularExpressions; #nullable enable @@ -35,6 +36,11 @@ private FileSystem() /// public static StringComparer PathComparer { get; } = IsWindows ? StringComparer.OrdinalIgnoreCase : StringComparer.Ordinal; + /// + /// Gets the OS-specific Regex options for path regex matching. + /// + public static RegexOptions PathRegexOptions { get; } = IsWindows ? RegexOptions.IgnoreCase : RegexOptions.None; + /// /// Gets a singleton instance of this class. /// diff --git a/src/Artifacts/Tasks/Robocopy.cs b/src/Artifacts/Tasks/Robocopy.cs index 4881b9d..9fc6049 100644 --- a/src/Artifacts/Tasks/Robocopy.cs +++ b/src/Artifacts/Tasks/Robocopy.cs @@ -11,6 +11,7 @@ using System.ComponentModel; using System.IO; using System.Linq; +using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks.Dataflow; @@ -31,10 +32,11 @@ public class Robocopy : Task private static readonly ExecutionDataflowBlockOptions ActionBlockOptions = new () { MaxDegreeOfParallelism = MsBuildCopyParallelism, EnsureOrdered = MsBuildCopyParallelism == 1 }; private readonly ConcurrentDictionary _dirsCreated = new (Artifacts.FileSystem.PathComparer); - private readonly HashSet _destinationPathsStarted = new (Artifacts.FileSystem.PathComparer); // Destination paths that were dispatched to copy. Extra copies to the same destination are copied single-threaded in a second wave. private readonly List _duplicateDestinationDelayedJobs = new (); // Jobs that were delayed because they were to a destination path that was already dispatched to copy. + private readonly ConcurrentDictionary> _destinationDirectoryFilesCopying = new (concurrencyLevel: 1, capacity: 31, Artifacts.FileSystem.PathComparer); // Map for destination directories to track files being copied there in parallel portion of copy. Concurrent dictionaries to get TryAdd(), GetOrAdd(). private readonly ActionBlock _copyFileBlock; - private readonly HashSet _filesCopied = new (CopyFileDedupKey.ComparerInstance); + private readonly HashSet _sourceFilesEncountered = new (Artifacts.FileSystem.PathComparer); // Reusable scratch space + private TimeSpan _retryWaitInMilliseconds = TimeSpan.Zero; private int _numFilesCopied; private int _numFilesSkipped; @@ -46,7 +48,7 @@ public Robocopy() { // Break from synchronous thread context of caller to get onto global thread pool thread for synchronous copy operations. await System.Threading.Tasks.Task.Yield(); - CopyFileImpl(job.SourceFile, job.DestFile, job.Metadata); + CopyFileImpl(job.SourceFile, job.DestFile, job.Metadata, job.ReplacementSourceFile); }, ActionBlockOptions); } @@ -104,16 +106,18 @@ public override bool Execute() CopyItems(bucket); } + // Complete the parallel part of the copy job. _copyFileBlock.Complete(); _copyFileBlock.Completion.GetAwaiter().GetResult(); + // Remaining jobs must run single-threaded in order to ensure ordering of filesystem changes. if (_duplicateDestinationDelayedJobs.Count > 0) { - Log.LogMessage("Finishing {0} delayed copies to same destinations as single-threaded copies", _duplicateDestinationDelayedJobs.Count); + Log.LogMessage($"Finishing {_duplicateDestinationDelayedJobs.Count} delayed copies to same destinations as single-threaded copies"); foreach (CopyJob job in _duplicateDestinationDelayedJobs) { job.DestFile.Refresh(); - CopyFileImpl(job.SourceFile, job.DestFile, job.Metadata); + CopyFileImpl(job.SourceFile, job.DestFile, job.Metadata, job.ReplacementSourceFile); } } @@ -126,6 +130,25 @@ public override bool Execute() return !Log.HasLoggedErrors; } + /// + /// Converts a wildcard file pattern to a regular expression string. + /// + internal static string WildcardToRegexStr(string pattern) + { + pattern = Regex.Escape(pattern); + pattern = pattern.Replace("\\*", ".*"); + pattern = pattern.Replace("\\?", ".?"); + return pattern; + } + + /// + /// Converts a wildcard file pattern to a regular expression. + /// + private static Regex WildcardToRegex(string pattern) + { + return new Regex(WildcardToRegexStr(pattern), Artifacts.FileSystem.PathRegexOptions); + } + private static int GetMsBuildCopyTaskParallelism() { // Use the MSBuild Copy task override parallelism setting if present. @@ -139,47 +162,78 @@ private static int GetMsBuildCopyTaskParallelism() return parallelism; } + /// + /// Intended to be called during the parallel portion of the copy job. It may kick work out to the single-threaded portion of the job. + /// During single-threaded post-processing use + /// private void CopyFile(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata) { - // When multiple copies are targeted to the same destination, copy the 2nd and subsequent copies single-threaded. - // Note this will not detect the same destination via symlinks or junctions. - if (_destinationPathsStarted.Add(destFile.FullName)) + // There may already be a copy to this source file path underway, indicating a link in a chained copy. + // This can be copied in parallel from the original source as long as the destination is unique. + string sourceDir = sourceFile.DirectoryName ?? string.Empty; + FileInfo? replacementSourceFile; + if (_destinationDirectoryFilesCopying.TryGetValue(sourceDir, out Dictionary? copiesUnderwayInSourceDir)) { - if (_filesCopied.Add(new CopyFileDedupKey(sourceFile.FullName, destFile.FullName))) - { - _copyFileBlock.Post(new CopyJob(sourceFile, destFile, metadata)); - } - else - { - Log.LogMessage(MessageImportance.Low, "Skipped {0} to {1} as duplicate copy", sourceFile.FullName, destFile.FullName); - } - } - else if (!_filesCopied.Contains(new CopyFileDedupKey(sourceFile.FullName, destFile.FullName))) - { - Log.LogMessage("Delaying copying {0} to {1} as duplicate destination", sourceFile.FullName, destFile.FullName); - _duplicateDestinationDelayedJobs.Add(new CopyJob(sourceFile, destFile, metadata)); + copiesUnderwayInSourceDir.TryGetValue(sourceFile.FullName, out replacementSourceFile); } else { - Log.LogMessage(MessageImportance.Low, "Skipped {0} to {1} as duplicate copy", sourceFile.FullName, destFile.FullName); + replacementSourceFile = null; } + + CopyFile(sourceFile, destFile, metadata, replacementSourceFile); } - private void CopyFileImpl(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata) + private void CopyFile(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata, FileInfo? replacementSourceFile) { - if (destFile.DirectoryName is not null) + string destFilePath = destFile.FullName; + string destDir = destFile.DirectoryName ?? string.Empty; + Dictionary copiesUnderwayInDestDir = _destinationDirectoryFilesCopying.GetOrAdd( + destDir, + _ => new Dictionary(Artifacts.FileSystem.PathComparer)); + if (!copiesUnderwayInDestDir.TryGetValue(destFilePath, out FileInfo? sourceFileUnderway)) { - CreateDirectoryWithRetries(destFile.DirectoryName); + // Create the destination directory before posting to support enumerating destination directories in CopySearch(). + if (destFile.DirectoryName is not null) + { + CreateDirectoryWithRetries(destFile.DirectoryName); + } + + // No other copies to this destination underway, kick off parallel copy. + copiesUnderwayInDestDir[destFilePath] = replacementSourceFile ?? sourceFile; + _copyFileBlock.Post(new CopyJob(sourceFile, destFile, metadata, replacementSourceFile)); + } + else + { + string sourceFilePath = sourceFile.FullName; + if (!string.Equals(sourceFilePath, sourceFileUnderway.FullName, Artifacts.FileSystem.PathComparison)) + { + // When multiple copies are targeted to the same destination, copy the 2nd and subsequent copies single-threaded. + // Note this will not detect the same destination via symlinks or junctions. + Log.LogMessage("Delaying copying {0} to {1} as duplicate destination", sourceFile.FullName, destFilePath); + _duplicateDestinationDelayedJobs.Add(new CopyJob(sourceFile, destFile, metadata, replacementSourceFile)); + } + else + { + Log.LogMessage(MessageImportance.Low, "Skipped {0} to {1} as duplicate copy", sourceFilePath, destFilePath); + } } + } - string sourcePath = sourceFile.FullName; + /// + /// Used for copying within either a parallel context in the action block or a single-threaded context in the single-threaded phase. + /// + private void CopyFileImpl(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata, FileInfo? replacementSourceFile) + { + string originalSourcePath = sourceFile.FullName; string destPath = destFile.FullName; for (int retry = 0; retry <= RetryCount; ++retry) { try { - if (metadata.ShouldCopy(FileSystem, sourceFile, destFile)) + FileInfo sourceToActuallyCopy = replacementSourceFile ?? sourceFile; + if (metadata.ShouldCopy(FileSystem, sourceToActuallyCopy, destFile)) { bool cowLinked = false; if (!DisableCopyOnWrite) @@ -188,7 +242,7 @@ private void CopyFileImpl(FileInfo sourceFile, FileInfo destFile, RobocopyMetada // On any problem fall back to real copy. try { - cowLinked = FileSystem.TryCloneFile(sourceFile, destFile); + cowLinked = FileSystem.TryCloneFile(sourceToActuallyCopy, destFile); if (cowLinked) { destFile.Refresh(); @@ -196,27 +250,27 @@ private void CopyFileImpl(FileInfo sourceFile, FileInfo destFile, RobocopyMetada } catch (Win32Exception win32Ex) when (win32Ex.NativeErrorCode == ErrorSharingViolation) { - Log.LogMessage("Sharing violation creating copy-on-write link from {0} to {1}, retrying with copy", sourcePath, destPath); + Log.LogMessage("Sharing violation creating copy-on-write link from {0} to {1}, retrying with copy", originalSourcePath, destPath); } catch (Exception ex) { - Log.LogMessage("Exception creating copy-on-write link from {0} to {1}, retrying with copy: {2}", sourcePath, destPath, ex); + Log.LogMessage("Exception creating copy-on-write link from {0} to {1}, retrying with copy: {2}", originalSourcePath, destPath, ex); } } if (!cowLinked) { - destFile = FileSystem.CopyFile(sourceFile, destPath, overwrite: true); + destFile = FileSystem.CopyFile(sourceToActuallyCopy, destPath, overwrite: true); } destFile.Attributes = FileAttributes.Normal; destFile.LastWriteTimeUtc = sourceFile.LastWriteTimeUtc; - Log.LogMessage("{0} {1} to {2}", cowLinked ? "Created copy-on-write link" : "Copied", sourcePath, destPath); + Log.LogMessage("{0} {1}{2} to {3}", cowLinked ? "Created copy-on-write link" : "Copied", originalSourcePath, replacementSourceFile is null ? string.Empty : $" (actually {replacementSourceFile.FullName})", destPath); Interlocked.Increment(ref _numFilesCopied); } else { - Log.LogMessage(MessageImportance.Low, "Skipped copying {0} to {1}", sourcePath, destPath); + Log.LogMessage(MessageImportance.Low, "Skipped copying {0} to {1}", originalSourcePath, destPath); Interlocked.Increment(ref _numFilesSkipped); } @@ -225,9 +279,9 @@ private void CopyFileImpl(FileInfo sourceFile, FileInfo destFile, RobocopyMetada catch (IOException e) { // Avoid issuing an error if the paths are actually to the same file. - if (!CopyExceptionHandling.FullPathsAreIdentical(sourcePath, destPath)) + if (!CopyExceptionHandling.FullPathsAreIdentical(originalSourcePath, destPath)) { - LogCopyFailureAndSleep(retry, "Failed to copy {0} to {1}. {2}", sourcePath, destPath, e.Message); + LogCopyFailureAndSleep(retry, "Failed to copy {0} to {1}. {2}", originalSourcePath, destPath, e.Message); } } } @@ -244,7 +298,7 @@ private void CopyItems(IList items) if (hasWildcards || isRecursive) { string match = GetMatchString(items); - CopySearch(items, isRecursive, match, source, null); + CopySearch(items, isRecursive, match, matchRegex: null, source, subDirectory: null); } else { @@ -263,7 +317,8 @@ private void CopyItems(IList items, DirectoryInfo source) { string sourcePath = Path.Combine(sourceDir, file); FileInfo sourceFile = new FileInfo(sourcePath); - if (Verify(sourceFile, item.VerifyExists)) + + if (VerifyExistence(sourceFile, item.VerifyExists)) { foreach (string destDir in item.DestinationFolders) { @@ -276,11 +331,48 @@ private void CopyItems(IList items, DirectoryInfo source) } } - private void CopySearch(IList bucket, bool isRecursive, string match, DirectoryInfo source, string? subDirectory) + /// + /// Enumerates a directory, adding and substituting file entries where a copy is in progress into the directory. + /// This allows full copy parallelism - we can copy from the original source file instead of having to wait for the + /// first copy to complete. + /// + private IEnumerable<(FileInfo, FileInfo?)> EnumerateCurrentAndInProgressFilesInSourceDir(DirectoryInfo sourceDir, string match, Regex matchRegex) + { + string sourceDirPath = sourceDir.FullName; + Dictionary copiesUnderwayIntoDir = _destinationDirectoryFilesCopying.GetOrAdd( + sourceDirPath, + _ => new Dictionary(Artifacts.FileSystem.PathComparer)); + foreach (FileInfo sourceFile in FileSystem.EnumerateFiles(sourceDir, match)) + { + // If this is a direct match for an in-progress copy, supply the original source to be used as a replacement. + string sourceFilePath = sourceFile.FullName; + copiesUnderwayIntoDir.TryGetValue(sourceFilePath, out FileInfo? replacementSourceFile); + yield return (sourceFile, replacementSourceFile); + _sourceFilesEncountered.Add(sourceFilePath); + } + + // Next enumerate the in-progress copies that match the search but may not have begun to actually copy into the + // destination yet because they are in the copy queue. + if (copiesUnderwayIntoDir.Count > 0) + { + foreach (KeyValuePair kvp in copiesUnderwayIntoDir + .Where(kvp => !_sourceFilesEncountered.Contains(kvp.Key) && + matchRegex.IsMatch(Path.GetFileName(kvp.Key)))) + { + // The FileInfo will show the file missing initially but fulfills the needs of logging the file path. + yield return (new FileInfo(kvp.Key), kvp.Value); + } + } + + _sourceFilesEncountered.Clear(); + } + + private void CopySearch(IList bucket, bool isRecursive, string match, Regex? matchRegex, DirectoryInfo source, string? subDirectory) { bool hasSubDirectory = !string.IsNullOrEmpty(subDirectory); + matchRegex ??= WildcardToRegex(match); - foreach (FileInfo sourceFile in FileSystem.EnumerateFiles(source, match)) + foreach ((FileInfo sourceFile, FileInfo? replacementSourceFile) in EnumerateCurrentAndInProgressFilesInSourceDir(source, match, matchRegex)) { foreach (RobocopyMetadata item in bucket) { @@ -292,15 +384,18 @@ private void CopySearch(IList bucket, bool isRecursive, string string destDir = hasSubDirectory ? Path.Combine(destinationDir, subDirectory!) : destinationDir; string destPath = Path.Combine(destDir, fileName); FileInfo destFile = new FileInfo(destPath); - CopyFile(sourceFile, destFile, item); + CopyFile(sourceFile, destFile, item, replacementSourceFile); } } } } - // Doing recursion manually so we can consider DirExcludes + // Doing recursion manually so we can consider DirExcludes. if (isRecursive) { + // For correctness when copying from another destination directory we rely on + // destination directories being created before launching an async copy, so that we + // can enumerate a real, but possibly empty or partially copied-to, directory. foreach (DirectoryInfo childSource in FileSystem.EnumerateDirectories(source)) { // per dir we need to re-items for those items excluding a specific dir @@ -314,7 +409,7 @@ private void CopySearch(IList bucket, bool isRecursive, string } } - CopySearch(subBucket, isRecursive: true, match, childSource, childSubDirectory); + CopySearch(subBucket, isRecursive: true, match, matchRegex, childSource, childSubDirectory); } } } @@ -368,11 +463,7 @@ private IEnumerable> GetBuckets() RobocopyMetadata first = allSources.First(); allSources.RemoveAt(0); - List bucket = new List - { - first, - }; - + List bucket = new List { first }; allBuckets.Add(bucket); if (ShowDiagnosticTrace) @@ -408,7 +499,7 @@ private string GetMatchString(IList bucket) string match = "*"; if (bucket.Count == 1) { - bucket[0].GetMatchString(); + match = bucket[0].GetMatchString(); } return match; @@ -439,58 +530,37 @@ private void LogCopyFailureAndSleep(int attempt, string message, params object[] } } - private bool Verify(FileInfo file, bool verifyExists) + private bool VerifyExistence(FileInfo file, bool verifyExists) { if (FileSystem.FileExists(file)) { return true; } - if (verifyExists) + // Virtual existence: If a copy operation to this source file is already underway, + // the copy logic can short-circuit to copy from the original source in parallel. + if (_destinationDirectoryFilesCopying.TryGetValue(file.DirectoryName, out Dictionary copiesInProgressToFileDir) && + copiesInProgressToFileDir.ContainsKey(file.FullName)) { - Log.LogError("Copy failed - file does not exist [{0}]", file.FullName); + return true; } - return false; - } - - // Internal for unit testing. - internal readonly struct CopyFileDedupKey - { - private readonly string _sourcePath; - private readonly string _destPath; - - public CopyFileDedupKey(string source, string dest) + if (verifyExists) { - _sourcePath = source; - _destPath = dest; + Log.LogError("Copy failed - file does not exist [{0}]", file.FullName); } - public static Comparer ComparerInstance { get; } = new (); - - public sealed class Comparer : IEqualityComparer - { - public bool Equals(CopyFileDedupKey x, CopyFileDedupKey y) - { - return x._sourcePath.Equals(y._sourcePath, Artifacts.FileSystem.PathComparison) && - x._destPath.Equals(y._destPath, Artifacts.FileSystem.PathComparison); - } - - public int GetHashCode(CopyFileDedupKey obj) - { - return Artifacts.FileSystem.PathComparer.GetHashCode(obj._destPath) ^ - Artifacts.FileSystem.PathComparer.GetHashCode(obj._sourcePath); - } - } + return false; } private sealed class CopyJob { - public CopyJob(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata) + public CopyJob(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata, FileInfo? replacementSourceFile) { SourceFile = sourceFile; DestFile = destFile; Metadata = metadata; + ReplacementSourceFile = replacementSourceFile; } public FileInfo SourceFile { get; } @@ -498,6 +568,8 @@ public CopyJob(FileInfo sourceFile, FileInfo destFile, RobocopyMetadata metadata public FileInfo DestFile { get; } public RobocopyMetadata Metadata { get; } + + public FileInfo? ReplacementSourceFile { get; } } } } diff --git a/src/Artifacts/Tasks/RobocopyMetadata.cs b/src/Artifacts/Tasks/RobocopyMetadata.cs index 2f3fb68..a7d13ca 100644 --- a/src/Artifacts/Tasks/RobocopyMetadata.cs +++ b/src/Artifacts/Tasks/RobocopyMetadata.cs @@ -334,8 +334,8 @@ private void SplitItems(string metadata, ITaskItem taskItem) else if (item.IndexOfAny(Wildcards) >= 0) { preRegex.Add(item); - string regexString = item.Replace(@"\", @"\\").Replace(".", "[.]").Replace("?", ".").Replace("*", ".*"); - regularExpressions.Add(new Regex($"^{regexString}$", RegexOptions.IgnoreCase)); + string regexString = Robocopy.WildcardToRegexStr(item); + regularExpressions.Add(new Regex($"^{regexString}$", FileSystem.PathRegexOptions)); } else { diff --git a/src/Artifacts/build/Microsoft.Build.Artifacts.targets b/src/Artifacts/build/Microsoft.Build.Artifacts.targets index d7e1872..da6f18b 100644 --- a/src/Artifacts/build/Microsoft.Build.Artifacts.targets +++ b/src/Artifacts/build/Microsoft.Build.Artifacts.targets @@ -29,9 +29,9 @@