Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement streaming compression for compressed ELF sections. #87211

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

resistor
Copy link
Collaborator

@resistor resistor commented Apr 1, 2024

No description provided.

@llvmbot llvmbot added mc Machine (object) code llvm:support labels Apr 1, 2024
@resistor resistor requested a review from dwblaikie April 1, 2024 01:23
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 1, 2024

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-llvm-support

Author: Owen Anderson (resistor)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/87211.diff

4 Files Affected:

  • (modified) llvm/include/llvm/Support/Compression.h (+11)
  • (modified) llvm/lib/MC/ELFObjectWriter.cpp (+8-14)
  • (modified) llvm/lib/Support/Compression.cpp (+110)
  • (modified) llvm/unittests/Support/CompressionTest.cpp (+18)
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..09457df95250c0 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -19,6 +19,7 @@
 namespace llvm {
 template <typename T> class SmallVectorImpl;
 class Error;
+class raw_ostream;
 
 // None indicates no compression. The other members are a subset of
 // compression::Format, which is used for compressed debug sections in some
@@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &CompressedBuffer,
               int Level = DefaultCompression);
 
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                      int Level = DefaultCompression);
+
 Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                  size_t &UncompressedSize);
 
@@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &CompressedBuffer,
               int Level = DefaultCompression, bool EnableLdm = false);
 
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                      int Level = DefaultCompression, bool EnableLdm = false);
+
 Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                  size_t &UncompressedSize);
 
@@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
 void compress(Params P, ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &Output);
 
+// Compress Input into a raw_ostream, without buffering the entire compressed
+// output. Compression parameters are the same as for `compress`.
+void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);
+
 // Decompress Input. The uncompressed size must be available.
 Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
                  uint8_t *Output, size_t UncompressedSize);
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index f4c6cbc8dd4442..4b345f1cde1f1a 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -145,9 +145,7 @@ struct ELFWriter {
 
   uint64_t align(Align Alignment);
 
-  bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
-                             SmallVectorImpl<uint8_t> &CompressedContents,
-                             Align Alignment);
+  bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);
 
 public:
   ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
@@ -848,12 +846,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
 }
 
 // Include the debug info compression header.
-bool ELFWriter::maybeWriteCompression(
-    uint32_t ChType, uint64_t Size,
-    SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
-  uint64_t HdrSize =
-      is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
-  if (Size <= HdrSize + CompressedContents.size())
+bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
+                                      Align Alignment) {
+  if (Size <= 128)
     return false;
   // Platform specific header is followed by compressed data.
   if (is64Bit()) {
@@ -904,10 +899,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     ChType = ELF::ELFCOMPRESS_ZSTD;
     break;
   }
-  compression::compress(compression::Params(CompressionType), Uncompressed,
-                        Compressed);
-  if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
-                             Sec.getAlign())) {
+
+  if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
     W.OS << UncompressedData;
     return;
   }
@@ -916,7 +909,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
   // Alignment field should reflect the requirements of
   // the compressed section header.
   Section.setAlignment(is64Bit() ? Align(8) : Align(4));
-  W.OS << toStringRef(Compressed);
+  compression::compressToStream(compression::Params(CompressionType),
+                                Uncompressed, W.OS);
 }
 
 void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index badaf68ab59cd0..2c5380bd4e22c2 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Compression.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
@@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
   }
 }
 
+void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
+                                   raw_ostream &OS) {
+  switch (P.format) {
+  case compression::Format::Zlib:
+    zlib::compressToStream(Input, OS, P.level);
+    break;
+  case compression::Format::Zstd:
+    zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
+    break;
+  }
+}
+
 Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
                               uint8_t *Output, size_t UncompressedSize) {
   switch (formatFor(T)) {
@@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
     CompressedBuffer.truncate(CompressedSize);
 }
 
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level) {
+  // Allocate a fixed size buffer to hold the output.
+  constexpr size_t OutBufferSize = 4096;
+  auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);
+
+  z_stream ZStream;
+  ZStream.zalloc = Z_NULL;
+  ZStream.zfree = Z_NULL;
+  ZStream.opaque = Z_NULL;
+
+  int ZErr = deflateInit(&ZStream, Level);
+  if (ZErr != Z_OK)
+    report_bad_alloc_error("Failed to create ZStream");
+
+  // Ensure that the z_stream is cleaned up on all exit paths.
+  auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });
+
+  ZStream.next_in =
+      reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
+  ZStream.avail_in = Input.size();
+
+  // Repeatedly deflate into the output buffer and flush it into the
+  // output stream. Repeat until we have drained the entire compression
+  // state.
+  while (ZErr != Z_STREAM_END) {
+    ZStream.next_out = OutBuffer.get();
+    ZStream.avail_out = OutBufferSize;
+
+    ZErr = deflate(&ZStream, Z_FINISH);
+    if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
+      report_fatal_error(convertZlibCodeToString(ZErr));
+
+    // Tell MemorySanitizer that zlib output buffer is fully initialized.
+    // This avoids a false report when running LLVM with uninstrumented ZLib.
+    __msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);
+
+    if (ZStream.avail_out < OutBufferSize)
+      OS.write(reinterpret_cast<char *>(OutBuffer.get()),
+               OutBufferSize - ZStream.avail_out);
+  }
+}
+
 Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
@@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
   llvm_unreachable("zlib::compress is unavailable");
 }
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level = DefaultCompression) {
+  llvm_unreachable("zlib::compressToStream is unavailable");
+}
 Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
                        size_t &UncompressedSize) {
   llvm_unreachable("zlib::decompress is unavailable");
@@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
     CompressedBuffer.truncate(CompressedSize);
 }
 
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
+                            bool EnableLdm) {
+  // Allocate a buffer to hold the output.
+  size_t OutBufferSize = ZSTD_CStreamOutSize();
+  auto OutBuffer = std::make_unique<char[]>(OutBufferSize);
+
+  ZSTD_CStream *CStream = ZSTD_createCStream();
+  if (!CStream)
+    report_bad_alloc_error("Failed to create ZSTD_CCtx");
+
+  // Ensure that the ZSTD_CStream is cleaned up on all exit paths.
+  auto FreeCStreamOnExit =
+      make_scope_exit([=]() { ZSTD_freeCStream(CStream); });
+
+  if (ZSTD_isError(ZSTD_CCtx_setParameter(
+          CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
+    report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
+  }
+
+  if (ZSTD_isError(
+          ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
+    report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
+  }
+
+  ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};
+
+  // Repeatedly compress into the output buffer and flush it into the
+  // output stream. Repeat until we have drained the entire compression
+  // state.
+  size_t ZRet;
+  do {
+    ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
+    ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
+    if (ZSTD_isError(ZRet))
+      report_fatal_error(ZSTD_getErrorName(ZRet));
+
+    // Tell MemorySanitizer that zstd output buffer is fully initialized.
+    // This avoids a false report when running LLVM with uninstrumented ZStd.
+    __msan_unpoison(OutputBuffer.data(), ZOutput.pos);
+
+    if (ZOutput.pos > 0)
+      OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
+  } while (ZRet != 0);
+}
+
 Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   const size_t Res = ::ZSTD_decompress(
@@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
                     bool EnableLdm) {
   llvm_unreachable("zstd::compress is unavailable");
 }
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level = DefaultCompression,
+                            bool EnableLdm = false) {
+  llvm_unreachable("zstd::compressToStream is unavailable");
+}
 Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   llvm_unreachable("zstd::decompress is unavailable");
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 5d326cafbe3a1c..ead1007404a5f3 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
   SmallVector<uint8_t, 0> Uncompressed;
   zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);
 
+  // Check that stream compression results are the same as bulk compression.
+  SmallVector<char, 0> StreamCompressed;
+  raw_svector_ostream Stream(StreamCompressed);
+  zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+  EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+  for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+    EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+  }
+
   // Check that uncompressed buffer is the same as original.
   Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
   EXPECT_FALSE(std::move(E));
@@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
   SmallVector<uint8_t, 0> Uncompressed;
   zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
 
+  // Check that stream compression results are the same as bulk compression.
+  SmallVector<char, 0> StreamCompressed;
+  raw_svector_ostream Stream(StreamCompressed);
+  zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+  EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+  for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+    EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+  }
+
   // Check that uncompressed buffer is the same as original.
   Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
   EXPECT_FALSE(std::move(E));

@resistor resistor requested a review from MaskRay April 6, 2024 14:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:support mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

2 participants