diff --git a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs index 9d77004..2cae923 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs @@ -37,7 +37,9 @@ public static IEnumerable ArchiveData new object[] { "TestData.vhdx",3 }, new object[] { "TestData.wim",3 }, new object[] { "EmptyFile.txt", 1 }, - new object[] { "TestDataArchivesNested.Zip", 54 } + new object[] { "TestDataArchivesNested.Zip", 54 }, + new object[] { "UdfTest.iso", 3 }, + new object[] { "UdfTestWithMultiSystem.iso", 3 } }; } } @@ -170,7 +172,11 @@ public void ExtractArchive(string fileName, int expectedNumFiles) var extractor = new Extractor(); var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", fileName); var results = extractor.Extract(path, GetExtractorOptions()).ToList(); - Assert.AreEqual(expectedNumFiles, results.Count()); + foreach (var result in results) + { + Assert.AreNotEqual(FileEntryStatus.FailedArchive, result.EntryStatus); + } + Assert.AreEqual(expectedNumFiles, results.Count); } [TestMethod] diff --git a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs index 9965aac..361ae18 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/MiniMagicTests.cs @@ -19,7 +19,7 @@ public class MiniMagicTests : BaseExtractorTestClass [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", ArchiveFileType.DEB)] [DataRow("TestData.a", ArchiveFileType.AR)] [DataRow("TestData.iso", ArchiveFileType.ISO_9660)] - // [DataRow("TestData.vhd", ArchiveFileType.VHD)] + [DataRow("UdfTest.iso", ArchiveFileType.UDF)] [DataRow("TestData.vhdx", ArchiveFileType.VHDX)] [DataRow("TestData.wim", ArchiveFileType.WIM)] [DataRow("Empty.vmdk", ArchiveFileType.VMDK)] diff --git a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj index e0a36d3..1f460f2 100644 --- a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj +++ b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj @@ -267,5 +267,11 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + diff --git a/RecursiveExtractor.Tests/SanitizePathTests.cs b/RecursiveExtractor.Tests/SanitizePathTests.cs index b304f84..4ac0e0c 100644 --- a/RecursiveExtractor.Tests/SanitizePathTests.cs +++ b/RecursiveExtractor.Tests/SanitizePathTests.cs @@ -1,18 +1,11 @@ // Copyright (c) Microsoft Corporation. Licensed under the MIT License. +using Microsoft.CST.RecursiveExtractor; using Microsoft.VisualStudio.TestTools.UnitTesting; -using NLog; -using NLog.Config; -using NLog.Targets; -using System; -using System.Collections.Generic; using System.IO; -using System.Linq; using System.Runtime.InteropServices; -using System.Text.RegularExpressions; -using System.Threading.Tasks; -namespace Microsoft.CST.RecursiveExtractor.Tests +namespace RecursiveExtractor.Tests { [TestClass] public class SanitizePathTests diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTest.iso b/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTest.iso new file mode 100644 index 0000000..9fdaf1f Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTest.iso differ diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTestWithMultiSystem.iso b/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTestWithMultiSystem.iso new file mode 100644 index 0000000..f8d4c3a Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/UdfTestWithMultiSystem.iso differ diff --git a/RecursiveExtractor/Extractor.cs b/RecursiveExtractor/Extractor.cs index 20cc5a4..501671a 100644 --- a/RecursiveExtractor/Extractor.cs +++ b/RecursiveExtractor/Extractor.cs @@ -51,6 +51,7 @@ public void SetDefaultExtractors() SetExtractor(ArchiveFileType.AR, new GnuArExtractor(this)); SetExtractor(ArchiveFileType.GZIP, new GzipExtractor(this)); SetExtractor(ArchiveFileType.ISO_9660, new IsoExtractor(this)); + SetExtractor(ArchiveFileType.UDF, new UdfExtractor(this)); SetExtractor(ArchiveFileType.RAR, new RarExtractor(this)); SetExtractor(ArchiveFileType.RAR5, new RarExtractor(this)); SetExtractor(ArchiveFileType.P7ZIP, new SevenZipExtractor(this)); @@ -663,4 +664,4 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions? opt } } } -} \ No newline at end of file +} diff --git a/RecursiveExtractor/Extractors/UdfExtractor.cs b/RecursiveExtractor/Extractors/UdfExtractor.cs new file mode 100644 index 0000000..efc016c --- /dev/null +++ b/RecursiveExtractor/Extractors/UdfExtractor.cs @@ -0,0 +1,157 @@ +using DiscUtils.Udf; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace Microsoft.CST.RecursiveExtractor.Extractors +{ + /// + /// The UDF disc image extractor implementation. + /// + public class UdfExtractor : AsyncExtractorInterface + { + /// + /// The constructor takes the Extractor context for recursion. + /// + /// The Extractor context. + public UdfExtractor(Extractor context) + { + Context = context; + } + private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + + internal Extractor Context { get; } + + /// + /// Extracts an UDF file + /// + /// + public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + DiscUtils.DiscFileInfo[]? entries = null; + var failed = false; + try + { + using var cd = new UdfReader(fileEntry.Content); + entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + } + catch (Exception e) + { + Logger.Debug("Failed to open UDF {0}. ({1}:{2})", fileEntry.FullPath, e.GetType(), e.Message); + failed = true; + } + if (failed) + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + else if (entries != null) + { + foreach (var file in entries) + { + var fileInfo = file; + governor.CheckResourceGovernor(fileInfo.Length); + Stream? stream = null; + try + { + stream = fileInfo.OpenRead(); + } + catch (Exception e) + { + Logger.Debug("Failed to extract {0} from UDF {1}. ({2}:{3})", fileInfo.FullName, fileEntry.FullPath, e.GetType(), e.Message); + } + if (stream != null) + { + var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); + var newFileEntry = await FileEntry.FromStreamAsync(name, stream, fileEntry, fileInfo.CreationTime, fileInfo.LastWriteTime, fileInfo.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (options.Recurse || topLevel) + { + await foreach (var entry in Context.ExtractAsync(newFileEntry, options, governor, false)) + { + yield return entry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + } + + /// + /// Extracts an UDF file + /// + /// + public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) + { + DiscUtils.DiscFileInfo[]? entries = null; + var failed = false; + try + { + using var cd = new UdfReader(fileEntry.Content); + entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + } + catch(Exception e) + { + Logger.Debug("Failed to open UDF {0}. ({1}:{2})", fileEntry.FullPath, e.GetType(), e.Message); + failed = true; + } + if (failed) + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + else if (entries != null) + { + foreach (var file in entries) + { + var fileInfo = file; + governor.CheckResourceGovernor(fileInfo.Length); + Stream? stream = null; + try + { + stream = fileInfo.OpenRead(); + } + catch (Exception e) + { + Logger.Debug("Failed to extract {0} from UDF {1}. ({2}:{3})", fileInfo.FullName, fileEntry.FullPath, e.GetType(), e.Message); + } + if (stream != null) + { + var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); + var newFileEntry = new FileEntry(name, stream, fileEntry, createTime: file.CreationTime, modifyTime: file.LastWriteTime, accessTime: file.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff); + if (options.Recurse || topLevel) + { + foreach (var entry in Context.Extract(newFileEntry, options, governor, false)) + { + yield return entry; + } + } + else + { + yield return newFileEntry; + } + } + } + } + else + { + if (options.ExtractSelfOnFail) + { + fileEntry.EntryStatus = FileEntryStatus.FailedArchive; + yield return fileEntry; + } + } + } + } +} diff --git a/RecursiveExtractor/MiniMagic.cs b/RecursiveExtractor/MiniMagic.cs index af9f764..0c1544b 100644 --- a/RecursiveExtractor/MiniMagic.cs +++ b/RecursiveExtractor/MiniMagic.cs @@ -61,6 +61,10 @@ public enum ArchiveFileType /// ISO_9660, /// + /// An UDF disc image. + /// + UDF, + /// /// A VHDX disc image. /// VHDX, @@ -228,6 +232,10 @@ public static ArchiveFileType DetectFileType(Stream fileStream) { return ArchiveFileType.ISO_9660; } + if (buffer[0] == 'B' && buffer[1] == 'E' && buffer[2] == 'A' && buffer[3] == '0' && buffer[4] == '1') + { + return ArchiveFileType.UDF; + } } //https://www.microsoft.com/en-us/download/details.aspx?id=23850 - 'Hard Disk Footer Format' @@ -265,4 +273,4 @@ public static ArchiveFileType DetectFileType(Stream fileStream) /// The ArchiveFileType detected public static ArchiveFileType DetectFileType(FileEntry fileEntry) => DetectFileType(fileEntry?.Content ?? new MemoryStream()); } -} \ No newline at end of file +} diff --git a/RecursiveExtractor/RecursiveExtractor.csproj b/RecursiveExtractor/RecursiveExtractor.csproj index 3f01d29..a8b21ec 100644 --- a/RecursiveExtractor/RecursiveExtractor.csproj +++ b/RecursiveExtractor/RecursiveExtractor.csproj @@ -32,6 +32,7 @@ +