Skip to content

Conversation

aokblast
Copy link
Contributor

@aokblast aokblast commented Oct 9, 2025

Some binary, like FreeBSD coredump, use program headers to store mmaps
information. It is possible for program to use more than
PN_XNUM program headers. Therefore, we implement the support of PN_XNBUM
in readelf and objcopy.

In ELF file, there is a possible extended header for those phnum, shnum,
and shstrndx larger than the maximum of 16 bits. This extended header
use section 0 to record these fields in 32 bits.  We implment this
feature so that programs rely on ELFFile::program_headers() can get the
correct number of segments. Also, the consumers don't have to check the
section 0 themselve, insteead, they can use the getPhNum() as an
alternative.
@llvmbot
Copy link
Member

llvmbot commented Oct 9, 2025

@llvm/pr-subscribers-llvm-binary-utilities

Author: None (aokblast)

Changes

Some binary, like FreeBSD coredump, use program headers to store mmaps
information. It is possible for program to use more than
PN_XNUM program headers. Therefore, we implement the support of PN_XNBUM
in readelf and objcopy.


Full diff: https://github.com/llvm/llvm-project/pull/162648.diff

7 Files Affected:

  • (modified) llvm/include/llvm/BinaryFormat/ELF.h (+2)
  • (modified) llvm/include/llvm/Object/ELF.h (+41-19)
  • (modified) llvm/include/llvm/Object/ELFTypes.h (+5)
  • (modified) llvm/test/tools/llvm-objcopy/ELF/many-sections.test (+2-2)
  • (added) llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz ()
  • (added) llvm/test/tools/llvm-readobj/ELF/many-segments.test (+79)
  • (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+32-16)
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e619b186dfe3d..136f8cfbde818 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1123,6 +1123,8 @@ struct Elf64_Shdr {
   Elf64_Xword sh_entsize;
 };
 
+enum { PN_XNUM = 0xffff };
+
 // Special section indices.
 enum {
   SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 59f63eb6b5bb6..3b96c0e2b9d1f 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -278,9 +278,16 @@ class ELFFile {
   std::vector<Elf_Shdr> FakeSections;
   SmallString<0> FakeSectionStrings;
 
+  Elf_Word RealPhNum;
+  Elf_Word RealShNum;
+  Elf_Word RealShStrNdx;
+
   ELFFile(StringRef Object);
 
 public:
+  Elf_Word getPhNum() const { return RealPhNum; }
+  Elf_Word getShNum() const { return RealShNum; }
+  Elf_Word getShStrNdx() const { return RealShStrNdx; }
   const Elf_Ehdr &getHeader() const {
     return *reinterpret_cast<const Elf_Ehdr *>(base());
   }
@@ -379,22 +386,21 @@ class ELFFile {
 
   /// Iterate over program header table.
   Expected<Elf_Phdr_Range> program_headers() const {
-    if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
+    if (RealPhNum && getHeader().e_phentsize != sizeof(Elf_Phdr))
       return createError("invalid e_phentsize: " +
                          Twine(getHeader().e_phentsize));
 
-    uint64_t HeadersSize =
-        (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+    uint64_t HeadersSize = (uint64_t)RealPhNum * getHeader().e_phentsize;
     uint64_t PhOff = getHeader().e_phoff;
     if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
       return createError("program headers are longer than binary of size " +
                          Twine(getBufSize()) + ": e_phoff = 0x" +
                          Twine::utohexstr(getHeader().e_phoff) +
-                         ", e_phnum = " + Twine(getHeader().e_phnum) +
+                         ", e_phnum = " + Twine(RealPhNum) +
                          ", e_phentsize = " + Twine(getHeader().e_phentsize));
 
     auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
-    return ArrayRef(Begin, Begin + getHeader().e_phnum);
+    return ArrayRef(Begin, Begin + RealPhNum);
   }
 
   /// Get an iterator over notes in a program header.
@@ -772,18 +778,10 @@ template <class ELFT>
 Expected<StringRef>
 ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
                                      WarningHandler WarnHandler) const {
-  uint32_t Index = getHeader().e_shstrndx;
-  if (Index == ELF::SHN_XINDEX) {
-    // If the section name string table section index is greater than
-    // or equal to SHN_LORESERVE, then the actual index of the section name
-    // string table section is contained in the sh_link field of the section
-    // header at index 0.
-    if (Sections.empty())
-      return createError(
-          "e_shstrndx == SHN_XINDEX, but the section header table is empty");
-
-    Index = Sections[0].sh_link;
-  }
+  uint32_t Index = RealShStrNdx;
+  if (Index == ELF::SHN_XINDEX)
+    return createError(
+        "e_shstrndx == SHN_XINDEX, but the section header table is empty");
 
   // There is no section name string table. Return FakeSectionStrings which
   // is non-empty if we have created fake sections.
@@ -889,7 +887,31 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
   return 0;
 }
 
-template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
+template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {
+  const Elf_Ehdr &Header = getHeader();
+  RealPhNum = Header.e_phnum;
+  RealShNum = Header.e_shnum;
+  RealShStrNdx = Header.e_shstrndx;
+  if (!Header.hasPhdrNumExtension())
+    return;
+
+  // An ELF binary may report `hasExtendedHeader` as true but not actually
+  // include an extended header. For example, a core dump can contain 65,535
+  // segments but no sections at all. We defer reporting an error until section
+  // 0 is accessed. Consumers should handle and emit the error themselves when
+  // they attempt to access it.
+  auto SecOrErr = getSection(0);
+  if (!SecOrErr) {
+    consumeError(SecOrErr.takeError());
+    return;
+  }
+  if (RealPhNum == 0xFFFF)
+    RealPhNum = (*SecOrErr)->sh_info;
+  if (RealShNum == ELF::SHN_UNDEF)
+    RealShNum = (*SecOrErr)->sh_size;
+  if (RealShStrNdx == ELF::SHN_XINDEX)
+    RealShStrNdx = (*SecOrErr)->sh_link;
+}
 
 template <class ELFT>
 Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
@@ -956,7 +978,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
   const Elf_Shdr *First =
       reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
 
-  uintX_t NumSections = getHeader().e_shnum;
+  uintX_t NumSections = RealShNum;
   if (NumSections == 0)
     NumSections = First->sh_size;
 
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2fc31458..b791f7486fe97 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -529,6 +529,11 @@ struct Elf_Ehdr_Impl {
 
   unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
   unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  bool hasPhdrNumExtension() const {
+    return (e_phnum == ELF::PN_XNUM || e_shnum == ELF::SHN_UNDEF ||
+            e_shstrndx == ELF::SHN_XINDEX) &&
+           e_shoff != 0;
+  }
 };
 
 template <endianness Endianness>
diff --git a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
index 6622db237026f..4c618acb5c951 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
@@ -6,8 +6,8 @@ RUN: llvm-readobj --file-headers --sections --symbols %t2 | FileCheck %s
 RUN: llvm-readelf --symbols %t2 | FileCheck --check-prefix=SYMS %s
 
 ## The ELF header should have e_shnum == 0 and e_shstrndx == SHN_XINDEX.
-# CHECK:        SectionHeaderCount: 0
-# CHECK-NEXT:   StringTableSectionIndex: 65535
+# CHECK:        SectionHeaderCount: 0 (65540)
+# CHECK-NEXT:   StringTableSectionIndex: 65535 (65539)
 
 ## The first section header should store the real section header count and
 ## shstrndx in its fields.
diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz
new file mode 100644
index 0000000000000..0709ed1d6389e
Binary files /dev/null and b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz differ
diff --git a/llvm/test/tools/llvm-readobj/ELF/many-segments.test b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
new file mode 100644
index 0000000000000..20c31e97c8aca
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
@@ -0,0 +1,79 @@
+## Show that llvm-readelf can handle an input file with many segments.
+
+RUN: %python %p/../../llvm-objcopy/Inputs/ungzip.py %p/Inputs/many-segments.o.gz > %t
+RUN: llvm-readobj --file-headers --sections --segments %t2 | FileCheck %s
+RUN: llvm-readelf --segments %t2 | FileCheck --check-prefix=SYMS %s
+
+## The ELF header should have e_phnum == PN_XNUM
+# CHECK:        ProgramHeaderCount: 65535 (66549)
+## The first section header should store the real program header count in its fields.
+# CHECK:      Section {
+# CHECK-NEXT:   Index: 0
+# CHECK-NEXT:   Name:
+# CHECK-NEXT:   Type: SHT_NULL
+# CHECK-NEXT:   Flags [
+# CHECK-NEXT:   ]
+# CHECK-NEXT:   Address:
+# CHECK-NEXT:   Offset:
+# CHECK-NEXT:   Size:
+# CHECK-NEXT:   Link:
+# CHECK-NEXT:   Info: 66549
+
+## Show that the symbols with segments indexes around the reserved range still
+## have the right segment indexes afterwards.
+# 65535th segment
+# CHECK:         Offset: 0x1183B000
+# CHECK-NEXT:	 VirtualAddress: 0x349139F3000
+# CHECK:		 }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183C000
+# CHECK-NEXT    VirtualAddress: 0x349139F4000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x4)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183D000
+# CHECK-NEXT    VirtualAddress: 0x349139F5000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x6)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT      PF_W (0x2)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183E000
+# CHECK-NEXT    VirtualAddress: 0x349139F6000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x4)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK        ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x11C31000
+# CHECK-NEXT    VirtualAddress: 0x30D8E7868000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 8192
+# CHECK-NEXT    MemSize: 8192
+# CHECK-NEXT    Flags [ (0x6)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT      PF_W (0x2)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+
+# SYMS: There are 66549 program headers, starting at offset 64
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab93316907cc6..53d3a439f1e62 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3572,12 +3572,30 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
   OS.flush();
 }
 
+template <class ELFT>
+static std::string getProgramHeadersNumString(const ELFFile<ELFT> &Obj,
+                                              StringRef FileName) {
+  if (Obj.getHeader().e_phnum != ELF::PN_XNUM)
+    return to_string(Obj.getHeader().e_phnum);
+
+  Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
+  if (!ArrOrErr) {
+    // In this case we can ignore an error, because we have already reported a
+    // warning about the broken section header table earlier.
+    consumeError(ArrOrErr.takeError());
+    return "<?>";
+  }
+
+  if (Obj.getHeader().e_phnum == Obj.getPhNum())
+    return "65535";
+  return "65535 (" + to_string(Obj.getPhNum()) + ")";
+}
+
 template <class ELFT>
 static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
                                               StringRef FileName) {
-  const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
-  if (ElfHeader.e_shnum != 0)
-    return to_string(ElfHeader.e_shnum);
+  if (Obj.getHeader().e_shnum != 0)
+    return to_string(Obj.getHeader().e_shnum);
 
   Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
   if (!ArrOrErr) {
@@ -3587,17 +3605,16 @@ static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
     return "<?>";
   }
 
-  if (ArrOrErr->empty())
+  if (Obj.getHeader().e_shnum == Obj.getShNum())
     return "0";
-  return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")";
+  return "0 (" + to_string(Obj.getShNum()) + ")";
 }
 
 template <class ELFT>
 static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
                                                     StringRef FileName) {
-  const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
-  if (ElfHeader.e_shstrndx != SHN_XINDEX)
-    return to_string(ElfHeader.e_shstrndx);
+  if (Obj.getHeader().e_shstrndx != SHN_XINDEX)
+    return to_string(Obj.getHeader().e_shstrndx);
 
   Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
   if (!ArrOrErr) {
@@ -3607,10 +3624,9 @@ static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
     return "<?>";
   }
 
-  if (ArrOrErr->empty())
+  if (Obj.getHeader().e_shstrndx == Obj.getShStrNdx())
     return "65535 (corrupt: out of range)";
-  return to_string(ElfHeader.e_shstrndx) + " (" +
-         to_string((*ArrOrErr)[0].sh_link) + ")";
+  return "65535 (" + to_string(Obj.getShStrNdx()) + ")";
 }
 
 static const EnumEntry<unsigned> *getObjectFileEnumEntry(unsigned Type) {
@@ -3765,7 +3781,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
   printFields(OS, "Size of this header:", Str);
   Str = to_string(e.e_phentsize) + " (bytes)";
   printFields(OS, "Size of program headers:", Str);
-  Str = to_string(e.e_phnum);
+  Str = getProgramHeadersNumString(this->Obj, this->FileName);
   printFields(OS, "Number of program headers:", Str);
   Str = to_string(e.e_shentsize) + " (bytes)";
   printFields(OS, "Size of section headers:", Str);
@@ -4778,8 +4794,7 @@ void GNUELFDumper<ELFT>::printProgramHeaders(
     return;
 
   if (PrintProgramHeaders) {
-    const Elf_Ehdr &Header = this->Obj.getHeader();
-    if (Header.e_phnum == 0) {
+    if (this->Obj.getPhNum() == 0) {
       OS << "\nThere are no program headers in this file.\n";
     } else {
       printProgramHeaders();
@@ -4798,7 +4813,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printProgramHeaders() {
   OS << "\nElf file type is "
      << enumToString(Header.e_type, ArrayRef(ElfObjectFileType)) << "\n"
      << "Entry point " << format_hex(Header.e_entry, 3) << "\n"
-     << "There are " << Header.e_phnum << " program headers,"
+     << "There are " << this->Obj.getPhNum() << " program headers,"
      << " starting at offset " << Header.e_phoff << "\n\n"
      << "Program Headers:\n";
   if (ELFT::Is64Bits)
@@ -7470,7 +7485,8 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
       W.printFlags("Flags", E.e_flags);
     W.printNumber("HeaderSize", E.e_ehsize);
     W.printNumber("ProgramHeaderEntrySize", E.e_phentsize);
-    W.printNumber("ProgramHeaderCount", E.e_phnum);
+    W.printString("ProgramHeaderCount",
+                  getProgramHeadersNumString(this->Obj, this->FileName));
     W.printNumber("SectionHeaderEntrySize", E.e_shentsize);
     W.printString("SectionHeaderCount",
                   getSectionHeadersNumString(this->Obj, this->FileName));

…ents

Some binary, like FreeBSD coredump, use program headers to store mmaps
informations. It is possible for program to use more than
PN_XNUM program headers. Therefore, we implemnet the support of PN_XNBUM
in readelf and objcopy.
@aokblast aokblast force-pushed the readelf/pxnum_support branch from ca911f0 to 70f9399 Compare October 9, 2025 12:48
@aokblast aokblast changed the title [llvm-readobj, ELF] Support reading bianry has more than PN_XNUM segments [llvm-readobj, ELF] Support reading binaries has more than PN_XNUM segments Oct 9, 2025
@aokblast aokblast changed the title [llvm-readobj, ELF] Support reading binaries has more than PN_XNUM segments [llvm-readobj, ELF] Support for reading binary has more than PN_XNUM segments Oct 9, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants