Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions RecursiveExtractor.Tests/ExtractorTests/FileMetadataTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.

using Microsoft.CST.RecursiveExtractor;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Xunit;

namespace RecursiveExtractor.Tests.ExtractorTests;

public class FileMetadataTests
{
[Fact]
public async Task TarEntries_HaveMetadata()
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.tar");
var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync();

Assert.NotEmpty(results);
foreach (var entry in results)
{
Assert.NotNull(entry.Metadata);
Assert.NotNull(entry.Metadata!.Mode);
// Regular files in TestData.tar have mode 0644 (octal) = 420 (decimal)
Assert.Equal(420, entry.Metadata.Mode);
Assert.False(entry.Metadata.IsExecutable);
Assert.False(entry.Metadata.IsSetUid);
Assert.False(entry.Metadata.IsSetGid);
Assert.NotNull(entry.Metadata.Uid);
Assert.NotNull(entry.Metadata.Gid);
}
}

[Fact]
public void TarEntries_HaveMetadata_Sync()
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.tar");
var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList();

Assert.NotEmpty(results);
foreach (var entry in results)
{
Assert.NotNull(entry.Metadata);
Assert.NotNull(entry.Metadata!.Mode);
Assert.Equal(420, entry.Metadata.Mode);
Assert.False(entry.Metadata.IsExecutable);
Assert.NotNull(entry.Metadata.Uid);
Assert.NotNull(entry.Metadata.Gid);
}
}

[Fact]
public async Task ArEntries_HaveMetadata()
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.a");
var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync();

Assert.NotEmpty(results);
foreach (var entry in results)
{
Assert.NotNull(entry.Metadata);
Assert.NotNull(entry.Metadata!.Mode);
// ar files in TestData.a have mode 0644 (octal) = 420 (decimal)
Assert.Equal(420, entry.Metadata.Mode);
Assert.False(entry.Metadata.IsExecutable);
Assert.NotNull(entry.Metadata.Uid);
Assert.Equal(0L, entry.Metadata.Uid);
Assert.NotNull(entry.Metadata.Gid);
Assert.Equal(0L, entry.Metadata.Gid);
}
}

[Fact]
public void ArEntries_HaveMetadata_Sync()
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.a");
var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList();

Assert.NotEmpty(results);
foreach (var entry in results)
{
Assert.NotNull(entry.Metadata);
Assert.NotNull(entry.Metadata!.Mode);
Assert.Equal(420, entry.Metadata.Mode);
Assert.NotNull(entry.Metadata.Uid);
Assert.NotNull(entry.Metadata.Gid);
}
}

[Fact]
public void MetadataDefaults_AreNull()
{
var metadata = new FileEntryMetadata();
Assert.Null(metadata.Mode);
Assert.Null(metadata.Uid);
Assert.Null(metadata.Gid);
Assert.Null(metadata.IsExecutable);
Assert.Null(metadata.IsSetUid);
Assert.Null(metadata.IsSetGid);
}

[Fact]
public void IsExecutable_DerivedFromMode()
{
// 0755 (octal) = 493 (decimal)
var metadata = new FileEntryMetadata { Mode = 493 };
Assert.True(metadata.IsExecutable);
Assert.False(metadata.IsSetUid);
Assert.False(metadata.IsSetGid);

// 0644 (octal) = 420 (decimal)
metadata = new FileEntryMetadata { Mode = 420 };
Assert.False(metadata.IsExecutable);
}

[Fact]
public void SetUidSetGid_DerivedFromMode()
{
// 04755 (octal) = 2541 (decimal) — setuid + rwxr-xr-x
var metadata = new FileEntryMetadata { Mode = 2541 };
Assert.True(metadata.IsSetUid);
Assert.False(metadata.IsSetGid);
Assert.True(metadata.IsExecutable);

// 02755 (octal) = 1517 (decimal) — setgid + rwxr-xr-x
metadata = new FileEntryMetadata { Mode = 1517 };
Assert.False(metadata.IsSetUid);
Assert.True(metadata.IsSetGid);
Assert.True(metadata.IsExecutable);
}

[Fact]
public void FileEntry_MetadataDefaultsToNull()
{
using var stream = new MemoryStream(new byte[] { 0 });
var entry = new FileEntry("test.txt", stream);
Assert.Null(entry.Metadata);
}
}
90 changes: 80 additions & 10 deletions RecursiveExtractor/ArFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
// The name length is included in the total size reported in the header
CopyStreamBytes(fileEntry.Content, entryStream, size - nameLength);

yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
else if (filename.Equals('/'))
Expand Down Expand Up @@ -149,7 +152,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract

var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
Expand Down Expand Up @@ -220,7 +226,10 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract

var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
Expand All @@ -241,14 +250,20 @@ public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry, Extract
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
CopyStreamBytes(fileEntry.Content, entryStream, size);

yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true); ;
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
else
{
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
CopyStreamBytes(fileEntry.Content, entryStream, size);

yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
else
Expand Down Expand Up @@ -329,7 +344,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
// The name length is included in the total size reported in the header
await CopyStreamBytesAsync(fileEntry.Content, entryStream, size - nameLength).ConfigureAwait(false);

yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan).TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
else if (filename.Equals('/'))
Expand Down Expand Up @@ -394,7 +412,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
}
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
await CopyStreamBytesAsync(fileEntry.Content, entryStream, innerSize).ConfigureAwait(false);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
Expand Down Expand Up @@ -465,7 +486,10 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi

var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, innerSize);
await CopyStreamBytesAsync(fileEntry.Content, entryStream, innerSize).ConfigureAwait(false);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
Expand All @@ -485,13 +509,19 @@ public static async IAsyncEnumerable<FileEntry> GetFileEntriesAsync(FileEntry fi
}
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
CopyStreamBytes(fileEntry.Content, entryStream, size);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
else
{
var entryStream = StreamFactory.GenerateAppropriateBackingStream(options, size);
await CopyStreamBytesAsync(fileEntry.Content, entryStream, size).ConfigureAwait(false);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true);
yield return new FileEntry(filename.TrimEnd('/'), entryStream, fileEntry, true)
{
Metadata = ParseArMetadata(headerBuffer)
};
}
}
else
Expand Down Expand Up @@ -570,6 +600,46 @@ internal static async Task<long> CopyStreamBytesAsync(Stream input, Stream outpu

private const int bufferSize = 4096;

/// <summary>
/// Parse file metadata (UID, GID, mode) from an ar file header buffer.
/// </summary>
/// <param name="headerBuffer">The 60-byte ar header</param>
/// <returns>A <see cref="FileEntryMetadata"/> with parsed values, or null if parsing fails.</returns>
internal static FileEntryMetadata? ParseArMetadata(byte[] headerBuffer)
{
var metadata = new FileEntryMetadata();
var hasData = false;

// ar_uid: bytes 28-33 (6 bytes), decimal
if (int.TryParse(Encoding.ASCII.GetString(headerBuffer[28..34]).Trim(), out var uid))
{
metadata.Uid = uid;
hasData = true;
}

// ar_gid: bytes 34-39 (6 bytes), decimal
if (int.TryParse(Encoding.ASCII.GetString(headerBuffer[34..40]).Trim(), out var gid))
{
metadata.Gid = gid;
hasData = true;
}

// ar_mode: bytes 40-47 (8 bytes), octal
var modeString = Encoding.ASCII.GetString(headerBuffer[40..48]).Trim();
try
{
if (!string.IsNullOrEmpty(modeString))
{
metadata.Mode = Convert.ToInt64(modeString, 8);
hasData = true;
}
}
catch (FormatException) { }
catch (OverflowException) { }

return hasData ? metadata : null;
}

private readonly static NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
}
}
16 changes: 16 additions & 0 deletions RecursiveExtractor/Extractors/RarExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra
var newFileEntry = await FileEntry.FromStreamAsync(name, entry.OpenEntryStream(), fileEntry, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false);
if (newFileEntry != null)
{
try
{
if (entry.Attrib.HasValue)
{
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
}
}
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.RAR, fileEntry.FullPath, e.Message); }
if (options.Recurse || topLevel)
{
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
Expand Down Expand Up @@ -158,6 +166,14 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
}
if (newFileEntry != null)
{
try
{
if (entry.Attrib.HasValue)
{
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
}
}
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.RAR, fileEntry.FullPath, e.Message); }
if (options.Recurse || topLevel)
{
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))
Expand Down
17 changes: 17 additions & 0 deletions RecursiveExtractor/Extractors/SevenZipExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra

if (newFileEntry != null)
{
try
{
if (entry.Attrib.HasValue)
{
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
}
}
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.P7ZIP, fileEntry.FullPath, e.Message); }
if (options.Recurse || topLevel)
{
await foreach (var innerEntry in Context.ExtractAsync(newFileEntry, options, governor, false))
Expand Down Expand Up @@ -157,6 +165,15 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
var name = (entry.Key ?? string.Empty).Replace('/', Path.DirectorySeparatorChar);
var newFileEntry = new FileEntry(name, entry.OpenEntryStream(), fileEntry, createTime: entry.CreatedTime, modifyTime: entry.LastModifiedTime, accessTime: entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff);

try
{
if (entry.Attrib.HasValue)
{
newFileEntry.Metadata = new FileEntryMetadata { Mode = entry.Attrib.Value };
}
}
catch (Exception e) { Logger.Trace("Failed to read file attributes for {0} in {1} archive {2}: {3}", entry.Key, ArchiveFileType.P7ZIP, fileEntry.FullPath, e.Message); }

if (options.Recurse || topLevel)
{
foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false))
Expand Down
10 changes: 8 additions & 2 deletions RecursiveExtractor/Extractors/TarExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ public async IAsyncEnumerable<FileEntry> ExtractAsync(FileEntry fileEntry, Extra
name = name[2..];
}

var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
{
Metadata = new FileEntryMetadata { Mode = tarEntry.Mode, Uid = tarEntry.UserID, Gid = tarEntry.GroupId }
};

if (options.Recurse || topLevel)
{
Expand Down Expand Up @@ -144,7 +147,10 @@ public IEnumerable<FileEntry> Extract(FileEntry fileEntry, ExtractorOptions opti
{
name = name[2..];
}
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff);
var newFileEntry = new FileEntry(name, fs, fileEntry, true, memoryStreamCutoff: options.MemoryStreamCutoff)
{
Metadata = new FileEntryMetadata { Mode = tarEntry.Mode, Uid = tarEntry.UserID, Gid = tarEntry.GroupId }
};

if (options.Recurse || topLevel)
{
Expand Down
Loading