Skip to content

Commit

Permalink
[llvm-elfabi] Support ELF file that lacks .gnu.hash section
Browse files Browse the repository at this point in the history
Before this change, when reading ELF file, elfabi determines number of
entries in .dynsym by reading the .gnu.hash section. This change makes
elfabi read section headers directly first. This change allows elfabi
works on ELF files which do not have .gnu.hash sections.

Differential Revision: https://reviews.llvm.org/D93362
  • Loading branch information
zeroomega committed Jan 26, 2021
1 parent 4210b87 commit 15313f6
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 57 deletions.
95 changes: 95 additions & 0 deletions llvm/include/llvm/Object/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ class ELFFile {
Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section,
Elf_Shdr_Range Sections) const;

Expected<uint64_t> getDynSymtabSize() const;

StringRef getRelocationTypeName(uint32_t Type) const;
void getRelocationTypeName(uint32_t Type,
SmallVectorImpl<char> &Result) const;
Expand Down Expand Up @@ -651,6 +653,99 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
return getStringTable(Sections[Index], WarnHandler);
}

/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static Expected<uint64_t>
getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table,
const void *BufEnd) {
using Elf_Word = typename ELFT::Word;
if (Table.nbuckets == 0)
return Table.symndx + 1;
uint64_t LastSymIdx = 0;
// Find the index of the first symbol in the last chain.
for (Elf_Word Val : Table.buckets())
LastSymIdx = std::max(LastSymIdx, (uint64_t)Val);
const Elf_Word *It =
reinterpret_cast<const Elf_Word *>(Table.values(LastSymIdx).end());
// Locate the end of the chain to find the last symbol index.
while (It < BufEnd && (*It & 1) == 0) {
++LastSymIdx;
++It;
}
if (It >= BufEnd) {
return createStringError(
object_error::parse_failed,
"no terminator found for GNU hash section before buffer end");
}
return LastSymIdx + 1;
}

/// This function determines the number of dynamic symbols. It reads section
/// headers first. If section headers are not available, the number of
/// symbols will be inferred by parsing dynamic hash tables.
template <class ELFT>
Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
// Read .dynsym section header first if available.
Expected<Elf_Shdr_Range> SectionsOrError = sections();
if (!SectionsOrError)
return SectionsOrError.takeError();
for (const Elf_Shdr &Sec : *SectionsOrError) {
if (Sec.sh_type == ELF::SHT_DYNSYM) {
if (Sec.sh_size % Sec.sh_entsize != 0) {
return createStringError(object_error::parse_failed,
"SHT_DYNSYM section has sh_size (" +
Twine(Sec.sh_size) + ") % sh_entsize (" +
Twine(Sec.sh_entsize) + ") that is not 0");
}
return Sec.sh_size / Sec.sh_entsize;
}
}

if (!SectionsOrError->empty()) {
// Section headers are available but .dynsym header is not found.
// Return 0 as .dynsym does not exist.
return 0;
}

// Section headers do not exist. Falling back to infer
// upper bound of .dynsym from .gnu.hash and .hash.
Expected<Elf_Dyn_Range> DynTable = dynamicEntries();
if (!DynTable)
return DynTable.takeError();
llvm::Optional<uint64_t> ElfHash;
llvm::Optional<uint64_t> ElfGnuHash;
for (const Elf_Dyn &Entry : *DynTable) {
switch (Entry.d_tag) {
case ELF::DT_HASH:
ElfHash = Entry.d_un.d_ptr;
break;
case ELF::DT_GNU_HASH:
ElfGnuHash = Entry.d_un.d_ptr;
break;
}
}
if (ElfGnuHash) {
Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfGnuHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_GnuHash *Table =
reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
return getDynSymtabSizeFromGnuHash<ELFT>(*Table, this->Buf.bytes_end());
}

// Search SYSV hash table to try to find the upper bound of dynsym.
if (ElfHash) {
Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
return Table->nchain;
}
return 0;
}

template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}

template <class ELFT>
Expand Down
58 changes: 1 addition & 57 deletions llvm/lib/InterfaceStub/ELFObjHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,62 +440,6 @@ static Error populateDynamic(DynamicEntries &Dyn,
return Error::success();
}

/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
using Elf_Word = typename ELFT::Word;
if (Table.nbuckets == 0)
return Table.symndx + 1;
uint64_t LastSymIdx = 0;
uint64_t BucketVal = 0;
// Find the index of the first symbol in the last chain.
for (Elf_Word Val : Table.buckets()) {
BucketVal = std::max(BucketVal, (uint64_t)Val);
}
LastSymIdx += BucketVal;
const Elf_Word *It =
reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
// Locate the end of the chain to find the last symbol index.
while ((*It & 1) == 0) {
LastSymIdx++;
It++;
}
return LastSymIdx + 1;
}

/// This function determines the number of dynamic symbols.
/// Without access to section headers, the number of symbols must be determined
/// by parsing dynamic hash tables.
///
/// @param Dyn Entries with the locations of hash tables.
/// @param ElfFile The ElfFile that the section contents reside in.
template <class ELFT>
static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
const ELFFile<ELFT> &ElfFile) {
using Elf_Hash = typename ELFT::Hash;
using Elf_GnuHash = typename ELFT::GnuHash;
// Search GNU hash table to try to find the upper bound of dynsym.
if (Dyn.GnuHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_GnuHash *Table =
reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
return getDynSymtabSize<ELFT>(*Table);
}
// Search SYSV hash table to try to find the upper bound of dynsym.
if (Dyn.ElfHash.hasValue()) {
Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
if (!TablePtr)
return TablePtr.takeError();
const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
return Table->nchain;
}
return 0;
}

/// This function extracts symbol type from a symbol's st_info member and
/// maps it to an ELFSymbolType enum.
/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
Expand Down Expand Up @@ -637,7 +581,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
}

// Populate Symbols from .dynsym table and dynamic string table.
Expected<uint64_t> SymCount = getNumSyms(DynEnt, ElfFile);
Expected<uint64_t> SymCount = ElfFile.getDynSymtabSize();
if (!SymCount)
return SymCount.takeError();
if (*SymCount > 0) {
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/tools/llvm-elfabi/read-elf-dynsym.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
## Test reading ELF with .dynsym under the following conditions:
## * Section headers are available.
## * Section headers are stripped but there is a DT_GNU_HASH dynamic tag.
## * Section headers are stripped but there is a DT_HASH dynamic tag.

## Test if llvm-elfabi reads DT_SYMTAB size through section headers by puting the wrong terminator in DT_GNU_HASH.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00"
# RUN: llvm-elfabi --elf %tfull --emit-tbe=- | FileCheck %s

## Test if llvm-elfabi fails to read DT_SYMTAB size through section headers when the value of sh_entsize is invalid.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DENTSIZE="0x19"
# RUN: not llvm-elfabi --elf %tfull --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=BADENTSIZE

## Test if llvm-elfabi reads DT_SYMTAB size through DT_GNU_HASH.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- | FileCheck %s

## Test if llvm-elfabi fails to read DT_SYMTAB size through DT_GNU_HASH when there is no terminator.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0xA]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: not llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=NOTERMINATOR

# CHECK: --- !tapi-tbe
# CHECK-NEXT: TbeVersion: 1.0
# CHECK-NEXT: Arch: AArch64
# CHECK-NEXT: Symbols:
# CHECK-NEXT: bar: { Type: Object, Size: 0, Undefined: true }
# CHECK-NEXT: foo: { Type: Func, Undefined: true }
# CHECK-NEXT: ...

# BADENTSIZE: SHT_DYNSYM section has sh_size (72) % sh_entsize (25) that is not 0

# NOTERMINATOR: error: no terminator found for GNU hash section before buffer end

--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_DYN
Machine: EM_AARCH64
Sections:
- Name: .text
Type: SHT_PROGBITS
- Name: .data
Type: SHT_PROGBITS
- Name: .strtab
Type: SHT_STRTAB
- Name: .shstrtab
Type: SHT_STRTAB
- Name: .dynsym
Type: SHT_DYNSYM
Flags: [ SHF_ALLOC ]
EntSize: [[ENTSIZE=0x18]]
Address: 0x400
AddressAlign: 0x400
- Name: .dynstr
Type: SHT_STRTAB
Flags: [ SHF_ALLOC ]
Address: 0x600
AddressAlign: 0x200
- Name: .dynamic
Type: SHT_DYNAMIC
Flags: [ SHF_ALLOC ]
Address: 0x800
AddressAlign: 0x200
Entries:
- Tag: DT_STRTAB
Value: 0x600
- Tag: DT_STRSZ
Value: 9
- Tag: DT_SYMTAB
Value: 0x400
- Tag: [[TAG1]]
Value: [[VAL1]]
- Tag: DT_NULL
Value: 0
- Name: .hash
Type: SHT_HASH
Flags: [ SHF_ALLOC ]
Address: 0xA00
AddressAlign: 0x200
Bucket: [ 1 ]
Chain: [ 1, 2, 3 ]
- Name: .gnu.hash
Type: SHT_GNU_HASH
Flags: [ SHF_ALLOC ]
Address: 0xC00
AddressAlign: 0x200
Header:
SymNdx: 0x1
Shift2: 0x2
MaskWords: 2
NBuckets: 2
BloomFilter: [0x3, 0x4]
HashBuckets: [0x0, 0x1]
HashValues: [[GNUHASHVALUE]]
DynamicSymbols:
- Name: foo
Type: STT_FUNC
Value: 0x100
Binding: 1
- Name: bar
Type: STT_OBJECT
Value: 0x200
Binding: 1
ProgramHeaders:
- Type: PT_LOAD
VAddr: 0x400
FirstSec: .dynsym
LastSec: .gnu.hash
- Type: PT_DYNAMIC
VAddr: 0x800
FirstSec: .dynamic
LastSec: .dynamic
SectionHeaderTable:
NoHeaders: [[NOHEADER=false]]

0 comments on commit 15313f6

Please sign in to comment.