Fix compress/decompress in LLVM Offloading API #150064

david-salinas · 2025-07-22T17:01:27Z

No description provided.

llvmbot · 2025-07-22T17:02:09Z

@llvm/pr-subscribers-llvm-binary-utilities

Author: David Salinas (david-salinas)

Changes

Patch is 31.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150064.diff

3 Files Affected:

(modified) llvm/include/llvm/Object/OffloadBundle.h (+54-25)
(modified) llvm/lib/Object/OffloadBundle.cpp (+319-166)
(modified) llvm/tools/llvm-objdump/OffloadDump.cpp (+18-9)

diff --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h
index f4d5a1d878b8d..99f54ea4f28aa 100644
--- a/llvm/include/llvm/Object/OffloadBundle.h
+++ b/llvm/include/llvm/Object/OffloadBundle.h
@@ -32,29 +32,40 @@ namespace llvm {
 
 namespace object {
 
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Total file size (4 bytes in V2, 8 bytes in V3).
+// - Uncompressed Size (4 bytes in V1/V2, 8 bytes in V3).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
 class CompressedOffloadBundle {
 private:
-  static inline const size_t MagicSize = 4;
-  static inline const size_t VersionFieldSize = sizeof(uint16_t);
-  static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t HashFieldSize = sizeof(uint64_t);
-  static inline const size_t V1HeaderSize =
-      MagicSize + VersionFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
-  static inline const size_t V2HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
   static inline const llvm::StringRef MagicNumber = "CCOB";
-  static inline const uint16_t Version = 2;
 
 public:
-  LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  struct CompressedBundleHeader {
+    unsigned Version;
+    llvm::compression::Format CompressionFormat;
+    std::optional<size_t> FileSize;
+    size_t UncompressedFileSize;
+    uint64_t Hash;
+
+    static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+  };
+
+  static inline const uint16_t DefaultVersion = 2;
+
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
   compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
-           bool Verbose = false);
-  LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
+           uint16_t Version, bool Verbose = false);
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
 };
 
 /// Bundle entry in binary clang-offload-bundler format.
@@ -62,12 +73,15 @@ struct OffloadBundleEntry {
   uint64_t Offset = 0u;
   uint64_t Size = 0u;
   uint64_t IDLength = 0u;
-  StringRef ID;
-  OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
-      : Offset(O), Size(S), IDLength(I), ID(T) {}
+  std::string ID;
+  OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, std::string T)
+      : Offset(O), Size(S), IDLength(I) {
+    ID.reserve(T.size());
+    ID = T;
+  }
   void dumpInfo(raw_ostream &OS) {
     OS << "Offset = " << Offset << ", Size = " << Size
-       << ", ID Length = " << IDLength << ", ID = " << ID;
+       << ", ID Length = " << IDLength << ", ID = " << ID << "\n";
   }
   void dumpURI(raw_ostream &OS, StringRef FilePath) {
     OS << ID.data() << "\tfile://" << FilePath << "#offset=" << Offset
@@ -82,15 +96,20 @@ class OffloadBundleFatBin {
   StringRef FileName;
   uint64_t NumberOfEntries;
   SmallVector<OffloadBundleEntry> Entries;
+  bool Decompressed;
 
 public:
+  std::unique_ptr<MemoryBuffer> DecompressedBuffer;
+
   SmallVector<OffloadBundleEntry> getEntries() { return Entries; }
   uint64_t getSize() const { return Size; }
   StringRef getFileName() const { return FileName; }
   uint64_t getNumEntries() const { return NumberOfEntries; }
+  bool isDecompressed() const { return Decompressed; }
 
   LLVM_ABI static Expected<std::unique_ptr<OffloadBundleFatBin>>
-  create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName);
+  create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName,
+         bool Decompress = false);
   LLVM_ABI Error extractBundle(const ObjectFile &Source);
 
   LLVM_ABI Error dumpEntryToCodeObject();
@@ -106,9 +125,15 @@ class OffloadBundleFatBin {
       Entry.dumpURI(outs(), FileName);
   }
 
-  OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
-      : FileName(File), NumberOfEntries(0),
-        Entries(SmallVector<OffloadBundleEntry>()) {}
+  OffloadBundleFatBin(MemoryBufferRef Source, StringRef File,
+                      bool Decompress = false)
+      : FileName(File), Decompressed(Decompress), NumberOfEntries(0),
+        Entries(SmallVector<OffloadBundleEntry>()) {
+    if (Decompress) {
+      DecompressedBuffer =
+          MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);
+    }
+  }
 };
 
 enum UriTypeT { FILE_URI, MEMORY_URI };
@@ -191,6 +216,10 @@ LLVM_ABI Error extractOffloadBundleFatBinary(
 LLVM_ABI Error extractCodeObject(const ObjectFile &Source, int64_t Offset,
                                  int64_t Size, StringRef OutputFileName);
 
+/// Extract code object memory from the given \p Source object file at \p Offset
+/// and of \p Size, and copy into \p OutputFileName.
+LLVM_ABI Error extractCodeObject(MemoryBufferRef Buffer, int64_t Offset,
+                                 int64_t Size, StringRef OutputFileName);
 /// Extracts an Offload Bundle Entry given by URI
 LLVM_ABI Error extractOffloadBundleByURI(StringRef URIstr);
 
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index 1e1042ce2bc21..57a8244a9b0e5 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -37,26 +37,63 @@ Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
 
   size_t Offset = 0;
   size_t NextbundleStart = 0;
+  StringRef Magic;
+  std::unique_ptr<MemoryBuffer> Buffer;
 
   // There could be multiple offloading bundles stored at this section.
-  while (NextbundleStart != StringRef::npos) {
-    std::unique_ptr<MemoryBuffer> Buffer =
+  while ((NextbundleStart != StringRef::npos) &&
+         (Offset < Contents.getBuffer().size())) {
+    Buffer =
         MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "",
                                    /*RequiresNullTerminator=*/false);
 
-    // Create the FatBinBindle object. This will also create the Bundle Entry
-    // list info.
-    auto FatBundleOrErr =
-        OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName);
-    if (!FatBundleOrErr)
-      return FatBundleOrErr.takeError();
-
-    // Add current Bundle to list.
-    Bundles.emplace_back(std::move(**FatBundleOrErr));
+    if (identify_magic((*Buffer).getBuffer()) ==
+        file_magic::offload_bundle_compressed) {
+      Magic = StringRef("CCOB");
+      // decompress this bundle first.
+      NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+      if (NextbundleStart == StringRef::npos) {
+        NextbundleStart = (*Buffer).getBuffer().size();
+      }
 
-    // Find the next bundle by searching for the magic string
-    StringRef Str = Buffer->getBuffer();
-    NextbundleStart = Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
+      ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+          MemoryBuffer::getMemBuffer((*Buffer).getBuffer().take_front(
+                                         NextbundleStart /*- Magic.size()*/),
+                                     FileName, false);
+      if (std::error_code EC = CodeOrErr.getError())
+        return createFileError(FileName, EC);
+
+      Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+          CompressedOffloadBundle::decompress(**CodeOrErr, false);
+      if (!DecompressedBufferOrErr)
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Failed to decompress input: " +
+                llvm::toString(DecompressedBufferOrErr.takeError()));
+
+      auto FatBundleOrErr = OffloadBundleFatBin::create(
+          **DecompressedBufferOrErr, Offset, FileName, true);
+      if (!FatBundleOrErr)
+        return FatBundleOrErr.takeError();
+
+      // Add current Bundle to list.
+      Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+    } else if (identify_magic((*Buffer).getBuffer()) ==
+               file_magic::offload_bundle) {
+      // Create the FatBinBindle object. This will also create the Bundle Entry
+      // list info.
+      auto FatBundleOrErr = OffloadBundleFatBin::create(
+          *Buffer, SectionOffset + Offset, FileName);
+      if (!FatBundleOrErr)
+        return FatBundleOrErr.takeError();
+
+      // Add current Bundle to list.
+      Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+      Magic = StringRef("__CLANG_OFFLOAD_BUNDLE__");
+      NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+    }
 
     if (NextbundleStart != StringRef::npos)
       Offset += NextbundleStart;
@@ -102,7 +139,8 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
       return errorCodeToError(object_error::parse_failed);
 
     auto Entry = std::make_unique<OffloadBundleEntry>(
-        EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
+        EntryOffset + SectionOffset, EntrySize, EntryIDSize,
+        std::move(EntryID.str()));
 
     Entries.push_back(*Entry);
   }
@@ -112,18 +150,22 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
 
 Expected<std::unique_ptr<OffloadBundleFatBin>>
 OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
-                            StringRef FileName) {
+                            StringRef FileName, bool Decompress) {
   if (Buf.getBufferSize() < 24)
     return errorCodeToError(object_error::parse_failed);
 
   // Check for magic bytes.
-  if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle)
+  if ((identify_magic(Buf.getBuffer()) != file_magic::offload_bundle) &&
+      (identify_magic(Buf.getBuffer()) !=
+       file_magic::offload_bundle_compressed))
     return errorCodeToError(object_error::parse_failed);
 
-  OffloadBundleFatBin *TheBundle = new OffloadBundleFatBin(Buf, FileName);
+  OffloadBundleFatBin *TheBundle =
+      new OffloadBundleFatBin(Buf, FileName, Decompress);
 
   // Read the Bundle Entries
-  Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
+  Error Err =
+      TheBundle->readEntries(Buf.getBuffer(), Decompress ? 0 : SectionOffset);
   if (Err)
     return errorCodeToError(object_error::parse_failed);
 
@@ -172,28 +214,9 @@ Error object::extractOffloadBundleFatBinary(
                                  "COFF object files not supported.\n");
 
       MemoryBufferRef Contents(*Buffer, Obj.getFileName());
-
-      if (llvm::identify_magic(*Buffer) ==
-          llvm::file_magic::offload_bundle_compressed) {
-        // Decompress the input if necessary.
-        Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
-            CompressedOffloadBundle::decompress(Contents, false);
-
-        if (!DecompressedBufferOrErr)
-          return createStringError(
-              inconvertibleErrorCode(),
-              "Failed to decompress input: " +
-                  llvm::toString(DecompressedBufferOrErr.takeError()));
-
-        MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
-        if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      } else {
-        if (Error Err = extractOffloadBundle(Contents, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      }
+      if (Error Err = extractOffloadBundle(Contents, SectionOffset,
+                                           Obj.getFileName(), Bundles))
+        return Err;
     }
   }
   return Error::success();
@@ -221,6 +244,22 @@ Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset,
   return Error::success();
 }
 
+Error object::extractCodeObject(const MemoryBufferRef Buffer, int64_t Offset,
+                                int64_t Size, StringRef OutputFileName) {
+  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(OutputFileName, Size);
+  if (!BufferOrErr)
+    return BufferOrErr.takeError();
+
+  std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+  std::copy(Buffer.getBufferStart() + Offset,
+            Buffer.getBufferStart() + Offset + Size, Buf->getBufferStart());
+  if (Error E = Buf->commit())
+    return E;
+
+  return Error::success();
+}
+
 // given a file name, offset, and size, extract data into a code object file,
 // into file <SourceFile>-offset<Offset>-size<Size>.co
 Error object::extractOffloadBundleByURI(StringRef URIstr) {
@@ -260,11 +299,233 @@ static std::string formatWithCommas(unsigned long long Value) {
 }
 
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
+CompressedOffloadBundle::compress(llvm::compression::Params P,
+                                  const llvm::MemoryBuffer &Input,
+                                  uint16_t Version, bool Verbose) {
+  if (!llvm::compression::zstd::isAvailable() &&
+      !llvm::compression::zlib::isAvailable())
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Compression not supported");
+  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+                        OffloadBundlerTimerGroup);
+  if (Verbose)
+    HashTimer.startTimer();
+  llvm::MD5 Hash;
+  llvm::MD5::MD5Result Result;
+  Hash.update(Input.getBuffer());
+  Hash.final(Result);
+  uint64_t TruncatedHash = Result.low();
+  if (Verbose)
+    HashTimer.stopTimer();
+
+  SmallVector<uint8_t, 0> CompressedBuffer;
+  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+      Input.getBuffer().size());
+  llvm::Timer CompressTimer("Compression Timer", "Compression time",
+                            OffloadBundlerTimerGroup);
+  if (Verbose)
+    CompressTimer.startTimer();
+  llvm::compression::compress(P, BufferUint8, CompressedBuffer);
+  if (Verbose)
+    CompressTimer.stopTimer();
+
+  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
+
+  // Store sizes in 64-bit variables first
+  uint64_t UncompressedSize64 = Input.getBuffer().size();
+  uint64_t TotalFileSize64;
+
+  // Calculate total file size based on version
+  if (Version == 2) {
+    // For V2, ensure the sizes don't exceed 32-bit limit
+    if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
+      return createStringError(llvm::inconvertibleErrorCode(),
+                               "Uncompressed size exceeds version 2 limit");
+    if ((MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
+         sizeof(CompressionMethod) + sizeof(uint32_t) + sizeof(TruncatedHash) +
+         CompressedBuffer.size()) > std::numeric_limits<uint32_t>::max())
+      return createStringError(llvm::inconvertibleErrorCode(),
+                               "Total file size exceeds version 2 limit");
+
+    TotalFileSize64 = MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
+                      sizeof(CompressionMethod) + sizeof(uint32_t) +
+                      sizeof(TruncatedHash) + CompressedBuffer.size();
+  } else { // Version 3
+    TotalFileSize64 = MagicNumber.size() + sizeof(uint64_t) + sizeof(Version) +
+                      sizeof(CompressionMethod) + sizeof(uint64_t) +
+                      sizeof(TruncatedHash) + CompressedBuffer.size();
+  }
+
+  SmallVector<char, 0> FinalBuffer;
+  llvm::raw_svector_ostream OS(FinalBuffer);
+  OS << MagicNumber;
+  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
+  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
+           sizeof(CompressionMethod));
+
+  // Write size fields according to version
+  if (Version == 2) {
+    uint32_t TotalFileSize32 = static_cast<uint32_t>(TotalFileSize64);
+    uint32_t UncompressedSize32 = static_cast<uint32_t>(UncompressedSize64);
+    OS.write(reinterpret_cast<const char *>(&TotalFileSize32),
+             sizeof(TotalFileSize32));
+    OS.write(reinterpret_cast<const char *>(&UncompressedSize32),
+             sizeof(UncompressedSize32));
+  } else { // Version 3
+    OS.write(reinterpret_cast<const char *>(&TotalFileSize64),
+             sizeof(TotalFileSize64));
+    OS.write(reinterpret_cast<const char *>(&UncompressedSize64),
+             sizeof(UncompressedSize64));
+  }
+
+  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
+           sizeof(TruncatedHash));
+  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
+           CompressedBuffer.size());
+
+  if (Verbose) {
+    auto MethodUsed =
+        P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+    double CompressionRate =
+        static_cast<double>(UncompressedSize64) / CompressedBuffer.size();
+    double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
+    double CompressionSpeedMBs =
+        (UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
+    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
+                 << "Total file size (including headers): "
+                 << formatWithCommas(TotalFileSize64) << " bytes\n"
+                 << "Compression method used: " << MethodUsed << "\n"
+                 << "Compression level: " << P.level << "\n"
+                 << "Binary size before compression: "
+                 << formatWithCommas(UncompressedSize64) << " bytes\n"
+                 << "Binary size after compression: "
+                 << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
+                 << "Compression rate: "
+                 << llvm::format("%.2lf", CompressionRate) << "\n"
+                 << "Compression ratio: "
+                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
+                 << "Compression speed: "
+                 << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
+                 << "Truncated MD5 hash: "
+                 << llvm::format_hex(TruncatedHash, 16) << "\n";
+  }
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+  struct CommonFields {
+    uint32_t Magic;
+    uint16_t Version;
+    uint16_t Method;
+  };
+
+  struct V1Header {
+    CommonFields Common;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V2Header {
+    CommonFields Common;
+    uint32_t FileSize;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V3Header {
+    CommonFields Common;
+    uint64_t FileSize;
+    uint64_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  CommonFields Common;
+  V1Header V1;
+  V2Header V2;
+  V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version
+static size_t getHeaderSize(uint16_t Version) {
+  switch (Version) {
+  case 1:
+    return sizeof(RawCompressedBundleHeader::V1Header);
+  case 2:
+    return sizeof(RawCompressedBundleHeader::V2Header);
+  case 3:
+    return sizeof(RawCompressedBundleHeader::V3Header);
+  default:
+    llvm_unreachable("Unsupported version");
+  }
+}
+
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+  assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
+  assert(llvm::identify_magic(Blob) ==
+         llvm::file_magic::offload_bundle_compressed);
+
+  RawCompressedBundleHeader Header;
+  memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+  CompressedBundleHeader Normalized;
+  Normalized.Version = Header.Common.Version;
+
+  size_t RequiredSize = getHeaderSize(Normalized.Version);
+
+  if (Blob.size() < RequiredSize)
+    return createStringError(inconvertibleErrorCode(),
+                             "Compressed bundle header size too small");
+
+  switch (Normalized....
[truncated]

jhuber6 · 2025-07-22T19:43:58Z

llvm/include/llvm/Object/OffloadBundle.h

+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".


This is probably more easily explained as a struct.

This is actually pulled from "clang/include/clang/Driver/OffloadBundler.h". The intent here is to migrate the offloading API in clang/lib/Driver (used by the clang-offload-bundler) into this LLVM API. In this case, I need to add (copy) this code from the Clang API, so that the llvm-* tools can decompress the offload bundles that the Clang Driver API created. For now we would/will have this code duplicated, which is not ideal, until we can have the clang-offload-bundler use this LLVM API to handle compress/decompress.

jhuber6 · 2025-07-22T19:45:22Z

llvm/include/llvm/Object/OffloadBundle.h

+    ID.reserve(T.size());
+    ID = T;


This is confusing, wouldn't the copy constructor already allocate this? Was the issue StringRef going out of scope? That's more easily solved with T.str() in the initializer list.

Yes, this is a memory issue with the string going out of scope. We're reading the string data from a MemoryBuffer returned from the "decompress()" function in the API. But we pass the results reading the offload section up the call stack, to a llvm-* tool (in this case llvm-objdump).

I admit this isn't pretty/elegant. A solution I considered was to just encapsulate all of the needed functionality for "llvm-objdump --offloading" directly into the API. But I felt that wasn't ideal API design either.

jhuber6 · 2025-07-22T19:46:00Z

llvm/include/llvm/Object/OffloadBundle.h

+    if (Decompress) {
+      DecompressedBuffer =
+          MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);
+    }


Suggested change

if (Decompress) {

DecompressedBuffer =

MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);

}

if (Decompress)

DecompressedBuffer =

MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);

jhuber6 · 2025-07-22T19:47:17Z

llvm/lib/Object/OffloadBundle.cpp

+  if (Error E = Buf->commit())
+    return E;
+
+  return Error::success();


Suggested change

if (Error E = Buf->commit())

return E;

return Error::success();

return Buf->commit();

jhuber6 · 2025-07-22T19:47:43Z

llvm/lib/Object/OffloadBundle.cpp

+                                  uint16_t Version, bool Verbose) {
+  if (!llvm::compression::zstd::isAvailable() &&
+      !llvm::compression::zlib::isAvailable())
+    return createStringError(llvm::inconvertibleErrorCode(),


Suggested change

return createStringError(llvm::inconvertibleErrorCode(),

return createStringError(

Pretty sure there's a constructor that does the error code for you.

This is copied code from the clang-offload-bundler API, but I agree and can make this change.

For the fixes from copy/paste, can we create a separate PR to go back and fix it in the original APIs?

jhuber6 · 2025-07-22T19:48:09Z

llvm/lib/Object/OffloadBundle.cpp

+
+  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
+
+  // Store sizes in 64-bit variables first


Comments should end in proper punctuation.

llvm/lib/Object/OffloadBundle.cpp

llvm/include/llvm/Object/OffloadBundle.h

jh7370 · 2025-07-23T06:59:25Z

llvm/lib/Object/OffloadBundle.cpp

+      if (!DecompressedBufferOrErr)
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Failed to decompress input: " +


Nit: LLVM style says lower-case letters for first letter in errors. Applies throughout this change.

llvm/lib/Object/OffloadBundle.cpp

jh7370 · 2025-07-23T07:17:40Z

llvm/lib/Object/OffloadBundle.cpp

+    double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
+    double CompressionSpeedMBs =
+        (UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
+    llvm::errs() << "Compressed bundle format version: " << Version << "\n"


Why are you printing directly to llvm::errs in the depths of a library?

Existing code from clang-offload-bundler API. It's printing to llvm::errs because "Verbose" has been enabled.

jh7370 · 2025-07-23T07:18:21Z

llvm/lib/Object/OffloadBundle.cpp

+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version


Suggested change

// Helper method to get header size based on version

// Helper method to get header size based on version.

And in many other places.

llvm/lib/Object/OffloadBundle.cpp

llvm/tools/llvm-objdump/OffloadDump.cpp

github-actions · 2025-08-12T16:44:45Z

✅ With the latest revision this PR passed the C/C++ code formatter.

jh7370

The clang-format checker is failing with the current version of the PR. Please fix!

Regarding the use of errs() directly in a library, I think it would be better to pass the stream to verbose print to into the library, rather than use errs() directly. The reasoning for this is related to what I talked about in an LLVM conference talk ages ago regarding errors in libraries: the client tool may need control as to where output goes. For example, it may want to send the verbose output to a log file, but keep the actual error messages on stderr, making redirection on the command-line inappropriate.

jh7370 · 2025-08-13T07:41:13Z

llvm/lib/Object/OffloadBundle.cpp

+  if (!compression::zstd::isAvailable() &&
+      !compression::zlib::isAvailable())
+    return createStringError(
+                             "Compression not supported");


There are still several places using upper-case for error messages in this PR. Please fix throughout.

jh7370 · 2025-08-13T07:42:08Z

llvm/lib/Object/OffloadBundle.cpp

-  uint16_t ThisVersion;
-  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += VersionFieldSize;
+  // Write size fields according to version


Suggested change

// Write size fields according to version

// Write size fields according to version.

This kind of issue still needs fixing in several other places in this file, not just here. Please fix all of them.

yxsamliu · 2025-08-14T03:35:53Z

llvm/include/llvm/Object/OffloadBundle.h

+    static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+  };
+
+  static inline const uint16_t DefaultVersion = 2;


please update the default version to 3 now

jh7370

Regarding the use of errs() directly in a library, I think it would be better to pass the stream to verbose print to into the library, rather than use errs() directly. The reasoning for this is related to what I talked about in an LLVM conference talk ages ago regarding errors in libraries: the client tool may need control as to where output goes. For example, it may want to send the verbose output to a log file, but keep the actual error messages on stderr, making redirection on the command-line inappropriate.

This out-of-line comment seems to have been ignored?

llvm/lib/Object/OffloadBundle.cpp

jh7370 · 2025-08-20T07:26:27Z

llvm/lib/Object/OffloadBundle.cpp

+    // For V2, ensure the sizes don't exceed 32-bit limit.
+    if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
+      return createStringError(inconvertibleErrorCode(),
+                               "uncompressed size exceeds version 2 limit.");


No full stops at end of error messages, per style guide. Also, here and below could probably benefit from additional context, e.g. the uncompressed size itself. See https://llvm.org/docs/CodingStandards.html#error-and-warning-messages for more details.

Applies throughout.

OK. Though this was copied from the Clang API. But there is no reason we can't correct it here.

What about the additional context I suggested?

jh7370 · 2025-09-03T08:32:31Z

Regarding the use of errs() directly in a library, I think it would be better to pass the stream to verbose print to into the library, rather than use errs() directly. The reasoning for this is related to what I talked about in an LLVM conference talk ages ago regarding errors in libraries: the client tool may need control as to where output goes. For example, it may want to send the verbose output to a log file, but keep the actual error messages on stderr, making redirection on the command-line inappropriate.

This out-of-line comment seems to have been ignored?

Still ignored?

david-salinas · 2025-09-04T20:36:56Z

Regarding the use of errs() directly in a library, I think it would be better to pass the stream to verbose print to into the library, rather than use errs() directly. The reasoning for this is related to what I talked about in an LLVM conference talk ages ago regarding errors in libraries: the client tool may need control as to where output goes. For example, it may want to send the verbose output to a log file, but keep the actual error messages on stderr, making redirection on the command-line inappropriate.

This out-of-line comment seems to have been ignored?

Still ignored?
Sorry missed this. Yes, I agree that is a better way to handle the output from verbose. Will resolve in the next patch.

…ompress/compress. Default the output stream to errs().

llvm/include/llvm/Object/OffloadBundle.h

llvm/lib/Object/OffloadBundle.cpp

llvm/include/llvm/Object/OffloadBundle.h

llvm/lib/Object/OffloadBundle.cpp

jh7370

Is it possible for testing to test the new error paths in particular?

Aside from that, I've got no more comments. Somebody with more offloading knowledge should review this a bit more.

SWDEV-52811 - fix compress/decompress in LLVM Offloading API

8fd5968

llvmbot added the llvm:binary-utilities label Jul 22, 2025

david-salinas requested review from jh7370, kzhuravl, lamb-j, yxsamliu and jhuber6 July 22, 2025 19:33

david-salinas changed the title ~~SWDEV-52811 - fix compress/decompress in LLVM Offloading API~~ Fix compress/decompress in LLVM Offloading API Jul 22, 2025

jhuber6 reviewed Jul 22, 2025

View reviewed changes

jh7370 reviewed Jul 23, 2025

View reviewed changes

Clean-up from review comments.

5a70f7d

david-salinas requested review from jh7370 and jhuber6 August 12, 2025 16:41

jh7370 reviewed Aug 13, 2025

View reviewed changes

yxsamliu reviewed Aug 14, 2025

View reviewed changes

Resolve PR comments.

bd67241

david-salinas requested review from jh7370 and yxsamliu August 19, 2025 16:41

jh7370 reviewed Aug 20, 2025

View reviewed changes

david-salinas added 2 commits August 28, 2025 19:54

Resolve PR comments part 2

98f59d4

Merge branch 'main' into correct-decompress-offloading-api

856bb01

david-salinas requested a review from jh7370 August 29, 2025 17:03

Change Offloading API to require a output stream to be passed for dec…

d3c9a79

…ompress/compress. Default the output stream to errs().

jh7370 reviewed Sep 5, 2025

View reviewed changes

llvm/include/llvm/Object/OffloadBundle.h Outdated Show resolved Hide resolved

david-salinas added 2 commits September 10, 2025 16:36

Refactor API change for user specified output stream

de45503

Merge branch 'main' into correct-decompress-offloading-api

a375975

david-salinas requested a review from jh7370 September 10, 2025 16:45

jh7370 reviewed Sep 11, 2025

View reviewed changes

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

llvm/include/llvm/Object/OffloadBundle.h Outdated Show resolved Hide resolved

Resolve build issue and address PR comments.

589e898

david-salinas requested a review from jh7370 September 16, 2025 17:19

jh7370 reviewed Sep 18, 2025

View reviewed changes

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

Address PR comment.

7ce94fe

jh7370 reviewed Sep 19, 2025

View reviewed changes

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

llvm/lib/Object/OffloadBundle.cpp Outdated Show resolved Hide resolved

resolve PR comments

442a6bd

david-salinas requested a review from jh7370 September 23, 2025 15:00

jh7370 reviewed Sep 24, 2025

View reviewed changes

	return createStringError(llvm::inconvertibleErrorCode(),
	return createStringError(


		uint16_t CompressionMethod = static_cast<uint16_t>(P.format);

		// Store sizes in 64-bit variables first

	// Helper method to get header size based on version
	// Helper method to get header size based on version.

	// Write size fields according to version
	// Write size fields according to version.

Fix compress/decompress in LLVM Offloading API #150064

Are you sure you want to change the base?

Fix compress/decompress in LLVM Offloading API #150064

Uh oh!

Conversation

david-salinas commented Jul 22, 2025

Uh oh!

llvmbot commented Jul 22, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jh7370 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jh7370 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Aug 12, 2025 •

edited

Loading