Skip to content

Commit

Permalink
Implement streaming compression for compressed ELF sections.
Browse files Browse the repository at this point in the history
  • Loading branch information
resistor committed Apr 1, 2024
1 parent 5e5b656 commit 6f74a96
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 14 deletions.
11 changes: 11 additions & 0 deletions llvm/include/llvm/Support/Compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace llvm {
template <typename T> class SmallVectorImpl;
class Error;
class raw_ostream;

// None indicates no compression. The other members are a subset of
// compression::Format, which is used for compressed debug sections in some
Expand All @@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression);

void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression);

Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);

Expand All @@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression, bool EnableLdm = false);

void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression, bool EnableLdm = false);

Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);

Expand Down Expand Up @@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
void compress(Params P, ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &Output);

// Compress Input into a raw_ostream, without buffering the entire compressed
// output. Compression parameters are the same as for `compress`.
void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);

// Decompress Input. The uncompressed size must be available.
Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize);
Expand Down
22 changes: 8 additions & 14 deletions llvm/lib/MC/ELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,7 @@ struct ELFWriter {

uint64_t align(Align Alignment);

bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents,
Align Alignment);
bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);

public:
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
Expand Down Expand Up @@ -848,12 +846,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
}

// Include the debug info compression header.
bool ELFWriter::maybeWriteCompression(
uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
uint64_t HdrSize =
is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
if (Size <= HdrSize + CompressedContents.size())
bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
Align Alignment) {
if (Size <= 128)
return false;
// Platform specific header is followed by compressed data.
if (is64Bit()) {
Expand Down Expand Up @@ -904,10 +899,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
ChType = ELF::ELFCOMPRESS_ZSTD;
break;
}
compression::compress(compression::Params(CompressionType), Uncompressed,
Compressed);
if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
Sec.getAlign())) {

if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
W.OS << UncompressedData;
return;
}
Expand All @@ -916,7 +909,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
// Alignment field should reflect the requirements of
// the compressed section header.
Section.setAlignment(is64Bit() ? Align(8) : Align(4));
W.OS << toStringRef(Compressed);
compression::compressToStream(compression::Params(CompressionType),
Uncompressed, W.OS);
}

void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Support/Compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/Compression.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
Expand Down Expand Up @@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
}
}

void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
raw_ostream &OS) {
switch (P.format) {
case compression::Format::Zlib:
zlib::compressToStream(Input, OS, P.level);
break;
case compression::Format::Zstd:
zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
break;
}
}

Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize) {
switch (formatFor(T)) {
Expand Down Expand Up @@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}

void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level) {
// Allocate a fixed size buffer to hold the output.
constexpr size_t OutBufferSize = 4096;
auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);

z_stream ZStream;
ZStream.zalloc = Z_NULL;
ZStream.zfree = Z_NULL;
ZStream.opaque = Z_NULL;

int ZErr = deflateInit(&ZStream, Level);
if (ZErr != Z_OK)
report_bad_alloc_error("Failed to create ZStream");

// Ensure that the z_stream is cleaned up on all exit paths.
auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });

ZStream.next_in =
reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
ZStream.avail_in = Input.size();

// Repeatedly deflate into the output buffer and flush it into the
// output stream. Repeat until we have drained the entire compression
// state.
while (ZErr != Z_STREAM_END) {
ZStream.next_out = OutBuffer.get();
ZStream.avail_out = OutBufferSize;

ZErr = deflate(&ZStream, Z_FINISH);
if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
report_fatal_error(convertZlibCodeToString(ZErr));

// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);

if (ZStream.avail_out < OutBufferSize)
OS.write(reinterpret_cast<char *>(OutBuffer.get()),
OutBufferSize - ZStream.avail_out);
}
}

Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
Expand Down Expand Up @@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression) {
llvm_unreachable("zlib::compressToStream is unavailable");
}
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
size_t &UncompressedSize) {
llvm_unreachable("zlib::decompress is unavailable");
Expand Down Expand Up @@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}

void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
bool EnableLdm) {
// Allocate a buffer to hold the output.
size_t OutBufferSize = ZSTD_CStreamOutSize();
auto OutBuffer = std::make_unique<char[]>(OutBufferSize);

ZSTD_CStream *CStream = ZSTD_createCStream();
if (!CStream)
report_bad_alloc_error("Failed to create ZSTD_CCtx");

// Ensure that the ZSTD_CStream is cleaned up on all exit paths.
auto FreeCStreamOnExit =
make_scope_exit([=]() { ZSTD_freeCStream(CStream); });

if (ZSTD_isError(ZSTD_CCtx_setParameter(
CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
}

if (ZSTD_isError(
ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
}

ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};

// Repeatedly compress into the output buffer and flush it into the
// output stream. Repeat until we have drained the entire compression
// state.
size_t ZRet;
do {
ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
if (ZSTD_isError(ZRet))
report_fatal_error(ZSTD_getErrorName(ZRet));

// Tell MemorySanitizer that zstd output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZStd.
__msan_unpoison(OutputBuffer.data(), ZOutput.pos);

if (ZOutput.pos > 0)
OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
} while (ZRet != 0);
}

Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
const size_t Res = ::ZSTD_decompress(
Expand Down Expand Up @@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
bool EnableLdm) {
llvm_unreachable("zstd::compress is unavailable");
}
void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
int Level = DefaultCompression,
bool EnableLdm = false) {
llvm_unreachable("zstd::compressToStream is unavailable");
}
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
llvm_unreachable("zstd::decompress is unavailable");
Expand Down
18 changes: 18 additions & 0 deletions llvm/unittests/Support/CompressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);

// Check that stream compression results are the same as bulk compression.
SmallVector<char, 0> StreamCompressed;
raw_svector_ostream Stream(StreamCompressed);
zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
}

// Check that uncompressed buffer is the same as original.
Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
Expand Down Expand Up @@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);

// Check that stream compression results are the same as bulk compression.
SmallVector<char, 0> StreamCompressed;
raw_svector_ostream Stream(StreamCompressed);
zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
EXPECT_EQ(StreamCompressed.size(), Compressed.size());
for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
}

// Check that uncompressed buffer is the same as original.
Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
Expand Down

0 comments on commit 6f74a96

Please sign in to comment.