Skip to content

Conversation

@aokblast
Copy link
Contributor

In ELF file, there is a possible extended header for those phnum, shnum,
and shstrndx larger than the maximum of 16 bits. This extended header
use section 0 to record these fields in 32 bits.

We implment this feature so that programs rely on ELFFile::program_headers() can get the
correct number of segments. Also, the consumers don't have to check the
section 0 themselve, insteead, they can use the getPhNum() as an
alternative.

@aokblast
Copy link
Contributor Author

This change nothing but just creating a user branch so we can rebase easily.

In ELF file, there is a possible extended header for those phnum, shnum,
and shstrndx larger than the maximum of 16 bits. This extended header
use section 0 to record these fields in 32 bits.

We implment this feature so that programs rely on ELFFile::program_headers() can get the
correct number of segments. Also, the consumers don't have to check the
section 0 themselve, insteead, they can use the getPhNum() as an
alternative.
@aokblast aokblast force-pushed the users/aokblast/elf/fix_65535err branch from d661311 to cb48096 Compare October 30, 2025 16:38
@aokblast aokblast marked this pull request as ready for review October 30, 2025 16:38
@llvmbot
Copy link
Member

llvmbot commented Oct 30, 2025

@llvm/pr-subscribers-llvm-binary-utilities

Author: None (aokblast)

Changes

In ELF file, there is a possible extended header for those phnum, shnum,
and shstrndx larger than the maximum of 16 bits. This extended header
use section 0 to record these fields in 32 bits.

We implment this feature so that programs rely on ELFFile::program_headers() can get the
correct number of segments. Also, the consumers don't have to check the
section 0 themselve, insteead, they can use the getPhNum() as an
alternative.


Full diff: https://github.com/llvm/llvm-project/pull/165284.diff

2 Files Affected:

  • (modified) llvm/include/llvm/BinaryFormat/ELF.h (+2)
  • (modified) llvm/include/llvm/Object/ELF.h (+87-19)
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6ee6b666c1735..39e9611c7190e 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1125,6 +1125,8 @@ struct Elf64_Shdr {
   Elf64_Xword sh_entsize;
 };
 
+enum { PN_XNUM = 0xffff };
+
 // Special section indices.
 enum {
   SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 59f63eb6b5bb6..03d5ee21a71b4 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -278,9 +278,46 @@ class ELFFile {
   std::vector<Elf_Shdr> FakeSections;
   SmallString<0> FakeSectionStrings;
 
+  // When the number of program headers is >= PN_XNUM, the actual number is
+  // contained in the sh_info field of the section header at index 0.
+  std::optional<uint32_t> RealPhNum;
+  // When the number of section headers is >= SHN_LORESERVE, the actual number
+  // is contained in the sh_size field of the section header at index 0.
+  std::optional<uint64_t> RealShNum;
+  // When the section index of the section name table is >= SHN_LORESERVE, the
+  // actual number is contained in the sh_link field of the section header at
+  // index 0.
+  std::optional<uint32_t> RealShStrNdx;
+
   ELFFile(StringRef Object);
 
+  Error readShdrZero();
+
 public:
+  Expected<uint32_t> getPhNum() const {
+    if (!RealPhNum) {
+      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero())
+        return std::move(E);
+    }
+    return *RealPhNum;
+  }
+
+  Expected<uint64_t> getShNum() const {
+    if (!RealShNum) {
+      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero())
+        return std::move(E);
+    }
+    return *RealShNum;
+  }
+
+  Expected<uint32_t> getShStrNdx() const {
+    if (!RealShStrNdx) {
+      if (Error E = const_cast<ELFFile<ELFT> *>(this)->readShdrZero())
+        return std::move(E);
+    }
+    return *RealShStrNdx;
+  }
+
   const Elf_Ehdr &getHeader() const {
     return *reinterpret_cast<const Elf_Ehdr *>(base());
   }
@@ -379,22 +416,26 @@ class ELFFile {
 
   /// Iterate over program header table.
   Expected<Elf_Phdr_Range> program_headers() const {
-    if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
+    uint32_t NumPh;
+    if (Expected<uint32_t> PhNumOrErr = getPhNum())
+      NumPh = *PhNumOrErr;
+    else
+      return PhNumOrErr.takeError();
+    if (NumPh && getHeader().e_phentsize != sizeof(Elf_Phdr))
       return createError("invalid e_phentsize: " +
                          Twine(getHeader().e_phentsize));
 
-    uint64_t HeadersSize =
-        (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+    uint64_t HeadersSize = (uint64_t)NumPh * getHeader().e_phentsize;
     uint64_t PhOff = getHeader().e_phoff;
     if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
       return createError("program headers are longer than binary of size " +
                          Twine(getBufSize()) + ": e_phoff = 0x" +
                          Twine::utohexstr(getHeader().e_phoff) +
-                         ", e_phnum = " + Twine(getHeader().e_phnum) +
+                         ", e_phnum = " + Twine(NumPh) +
                          ", e_phentsize = " + Twine(getHeader().e_phentsize));
 
     auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
-    return ArrayRef(Begin, Begin + getHeader().e_phnum);
+    return ArrayRef(Begin, Begin + NumPh);
   }
 
   /// Get an iterator over notes in a program header.
@@ -772,19 +813,15 @@ template <class ELFT>
 Expected<StringRef>
 ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
                                      WarningHandler WarnHandler) const {
-  uint32_t Index = getHeader().e_shstrndx;
-  if (Index == ELF::SHN_XINDEX) {
-    // If the section name string table section index is greater than
-    // or equal to SHN_LORESERVE, then the actual index of the section name
-    // string table section is contained in the sh_link field of the section
-    // header at index 0.
-    if (Sections.empty())
-      return createError(
-          "e_shstrndx == SHN_XINDEX, but the section header table is empty");
+  Expected<uint32_t> ShStrNdxOrErr = getShStrNdx();
+  if (!ShStrNdxOrErr)
+    return ShStrNdxOrErr.takeError();
 
-    Index = Sections[0].sh_link;
-  }
+  if (*ShStrNdxOrErr == ELF::SHN_XINDEX && Sections.empty())
+    return createError(
+        "e_shstrndx == SHN_XINDEX, but the section header table is empty");
 
+  uint32_t Index = *ShStrNdxOrErr;
   // There is no section name string table. Return FakeSectionStrings which
   // is non-empty if we have created fake sections.
   if (!Index)
@@ -891,6 +928,35 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
 
 template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
 
+template <class ELFT> Error ELFFile<ELFT>::readShdrZero() {
+  const Elf_Ehdr &Header = getHeader();
+
+  if ((Header.e_phnum == ELF::PN_XNUM || Header.e_shnum == 0 ||
+       Header.e_shstrndx == ELF::SHN_XINDEX) &&
+      Header.e_shoff != 0) {
+    // Pretend we have section 0 or sections() would call getShNum and thus
+    // become an infinite recursion.
+    RealShNum = 1;
+    auto SecOrErr = getSection(0);
+    if (!SecOrErr) {
+      RealShNum = std::nullopt;
+      return SecOrErr.takeError();
+    }
+
+    RealPhNum =
+        Header.e_phnum == ELF::PN_XNUM ? (*SecOrErr)->sh_info : Header.e_phnum;
+    RealShNum = Header.e_shnum == 0 ? (*SecOrErr)->sh_size : Header.e_shnum;
+    RealShStrNdx = Header.e_shstrndx == ELF::SHN_XINDEX ? (*SecOrErr)->sh_link
+                                                        : Header.e_shstrndx;
+  } else {
+    RealPhNum = Header.e_phnum;
+    RealShNum = Header.e_shnum;
+    RealShStrNdx = Header.e_shstrndx;
+  }
+
+  return Error::success();
+}
+
 template <class ELFT>
 Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
   if (sizeof(Elf_Ehdr) > Object.size())
@@ -956,9 +1022,11 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
   const Elf_Shdr *First =
       reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
 
-  uintX_t NumSections = getHeader().e_shnum;
-  if (NumSections == 0)
-    NumSections = First->sh_size;
+  uintX_t NumSections = 0;
+  if (Expected<uint64_t> ShNumOrErr = getShNum())
+    NumSections = *ShNumOrErr;
+  else
+    return ShNumOrErr.takeError();
 
   if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))
     return createError("invalid number of sections specified in the NULL "

Copy link
Collaborator

@jh7370 jh7370 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, if something like this happens again and your original PR has been approved, you don't need to (currently) wait for re-approval of a new, identical PR.

Please however add a link to the original PR in your description, for traceability purposes.

@aokblast aokblast closed this Oct 31, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants