Skip to content

Commit 985d124

Browse files
committed
Enhance automatic symbol counting
1 parent e805669 commit 985d124

File tree

3 files changed

+136
-59
lines changed

3 files changed

+136
-59
lines changed

api/c/ELF/Binary.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,12 @@ void init_c_binary(Elf_Binary_t* c_binary, Binary* binary) {
4040
c_binary->interpreter = nullptr;
4141
if (binary->has_interpreter()) {
4242
std::string interp = binary->interpreter();
43-
c_binary->interpreter = static_cast<char*>(malloc(interp.size() * sizeof(char)));
43+
c_binary->interpreter = static_cast<char*>(malloc((interp.size() + 1) * sizeof(char)));
4444
std::memcpy(
4545
reinterpret_cast<void*>(const_cast<char*>(c_binary->interpreter)),
4646
reinterpret_cast<const void*>(interp.data()),
4747
interp.size());
48+
reinterpret_cast<char*>(const_cast<char*>(c_binary->interpreter))[interp.size()] = '\0';
4849
}
4950

5051

include/LIEF/ELF/Parser.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ namespace ELF {
4646
class DLL_PUBLIC Parser : public LIEF::Parser {
4747
public:
4848

49+
static constexpr uint32_t NB_MAX_SYMBOLS = 1000000;
50+
static constexpr uint32_t DELTA_NB_SYMBOLS = 3000;
51+
4952
//! @brief Parse an ELF file an return a LIEF::ELF::Binary object
5053
//!
5154
//! For weird binaries (e.g. sectionless) you can choose which method use to count dynamic symbols
@@ -207,6 +210,10 @@ class DLL_PUBLIC Parser : public LIEF::Parser {
207210
//! @brief Parse Symbols's SYSV hash
208211
void parse_symbol_sysv_hash(uint64_t offset);
209212

213+
214+
template<typename ELF_T, typename REL_T>
215+
uint32_t max_relocation_index(uint64_t relocations_offset, uint64_t size) const;
216+
210217
std::unique_ptr<VectorStream> stream_;
211218
Binary* binary_;
212219
uint32_t type_;

src/ELF/Parser.tcc

+127-58
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ namespace LIEF {
2323
namespace ELF {
2424
template<typename ELF_T>
2525
void Parser::parse_binary(void) {
26-
using Elf_Addr = typename ELF_T::Elf_Addr;
2726
using Elf_Off = typename ELF_T::Elf_Off;
2827

2928
VLOG(VDEBUG) << "Start parsing";
@@ -481,99 +480,175 @@ uint32_t Parser::get_numberof_dynamic_symbols(DYNSYM_COUNT_METHODS mtd) const {
481480
case DYNSYM_COUNT_METHODS::COUNT_AUTO:
482481
default:
483482
{
484-
uint32_t nb_dynsym = 0;
485-
486-
nb_dynsym = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_HASH);
483+
uint32_t nb_dynsym, nb_dynsym_tmp = 0;
487484

488-
if (nb_dynsym > 0) {
489-
return nb_dynsym;
490-
}
485+
nb_dynsym = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_RELOCATIONS);
491486

492-
nb_dynsym = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_SECTION);
487+
nb_dynsym_tmp = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_SECTION);
493488

494-
if (nb_dynsym > 0) {
495-
return nb_dynsym;
489+
if (nb_dynsym_tmp < Parser::NB_MAX_SYMBOLS and
490+
nb_dynsym_tmp > nb_dynsym and
491+
(nb_dynsym_tmp - nb_dynsym) < Parser::DELTA_NB_SYMBOLS) {
492+
nb_dynsym = nb_dynsym_tmp;
496493
}
497494

495+
nb_dynsym_tmp = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_HASH);
498496

499-
nb_dynsym = this->get_numberof_dynamic_symbols<ELF_T>(DYNSYM_COUNT_METHODS::COUNT_RELOCATIONS);
500-
501-
if (nb_dynsym > 0) {
502-
return nb_dynsym;
497+
if (nb_dynsym_tmp < Parser::NB_MAX_SYMBOLS and
498+
nb_dynsym_tmp > nb_dynsym and
499+
(nb_dynsym_tmp - nb_dynsym) < Parser::DELTA_NB_SYMBOLS) {
500+
nb_dynsym = nb_dynsym_tmp;
503501
}
504502

505-
return 0;
503+
return nb_dynsym;
506504
}
507505
}
508506
}
509507

510508
template<typename ELF_T>
511509
uint32_t Parser::nb_dynsym_relocations(void) const {
512-
using Elf_Rela = typename ELF_T::Elf_Rela;
513-
using Elf_Rel = typename ELF_T::Elf_Rel;
510+
uint32_t nb_symbols = 0;
514511

515-
using Elf_Addr = typename ELF_T::Elf_Addr;
516-
using Elf_Off = typename ELF_T::Elf_Off;
512+
// Dynamic Relocations
513+
// ===================
517514

518-
auto&& it_pltgot_relocations_size = std::find_if(
515+
// RELA
516+
// ----
517+
auto&& it_dynamic_relocations = std::find_if(
519518
std::begin(this->binary_->dynamic_entries_),
520519
std::end(this->binary_->dynamic_entries_),
521520
[] (const DynamicEntry* entry) {
522-
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_PLTRELSZ;
521+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_RELA;
523522
});
524523

525-
auto&& it_pltgot_relocations_type = std::find_if(
524+
auto&& it_dynamic_relocations_size = std::find_if(
526525
std::begin(this->binary_->dynamic_entries_),
527526
std::end(this->binary_->dynamic_entries_),
528527
[] (const DynamicEntry* entry) {
529-
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_PLTREL;
528+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_RELASZ;
530529
});
531530

532-
if (it_pltgot_relocations_size == std::end(this->binary_->dynamic_entries_)) {
533-
return 0;
531+
if (it_dynamic_relocations != std::end(this->binary_->dynamic_entries_) and
532+
it_dynamic_relocations_size != std::end(this->binary_->dynamic_entries_)) {
533+
const uint64_t virtual_address = (*it_dynamic_relocations)->value();
534+
const uint64_t size = (*it_dynamic_relocations_size)->value();
535+
try {
536+
uint64_t offset = this->binary_->virtual_address_to_offset(virtual_address);
537+
nb_symbols = std::max(nb_symbols, this->max_relocation_index<ELF_T, typename ELF_T::Elf_Rela>(offset, size));
538+
} catch (const LIEF::exception&) {
539+
}
534540
}
535541

536-
DYNAMIC_TAGS type;
537-
const Elf_Off size = (*it_pltgot_relocations_size)->value();
538542

539-
if (it_pltgot_relocations_type != std::end(this->binary_->dynamic_entries_)) {
540-
type = static_cast<DYNAMIC_TAGS>((*it_pltgot_relocations_type)->value());
541-
} else {
542-
if (std::is_same<ELF_T, ELF64>::value) {
543-
type = DYNAMIC_TAGS::DT_RELA;
544-
} else {
545-
type = DYNAMIC_TAGS::DT_REL;
543+
// REL
544+
// ---
545+
it_dynamic_relocations = std::find_if(
546+
std::begin(this->binary_->dynamic_entries_),
547+
std::end(this->binary_->dynamic_entries_),
548+
[] (const DynamicEntry* entry) {
549+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_REL;
550+
});
551+
552+
it_dynamic_relocations_size = std::find_if(
553+
std::begin(this->binary_->dynamic_entries_),
554+
std::end(this->binary_->dynamic_entries_),
555+
[] (const DynamicEntry* entry) {
556+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_RELSZ;
557+
});
558+
559+
if (it_dynamic_relocations != std::end(this->binary_->dynamic_entries_) and
560+
it_dynamic_relocations_size != std::end(this->binary_->dynamic_entries_)) {
561+
const uint64_t virtual_address = (*it_dynamic_relocations)->value();
562+
const uint64_t size = (*it_dynamic_relocations_size)->value();
563+
try {
564+
const uint64_t offset = this->binary_->virtual_address_to_offset(virtual_address);
565+
nb_symbols = std::max(nb_symbols, this->max_relocation_index<ELF_T, typename ELF_T::Elf_Rel>(offset, size));
566+
} catch (const LIEF::exception&) {
567+
546568
}
569+
547570
}
548571

549-
switch(type) {
550-
case DYNAMIC_TAGS::DT_RELA:
551-
{
552-
return static_cast<uint32_t>(size / sizeof(Elf_Rela));
553-
break;
554-
}
572+
// Parse PLT/GOT Relocations
573+
// ==========================
574+
auto&& it_pltgot_relocations = std::find_if(
575+
std::begin(this->binary_->dynamic_entries_),
576+
std::end(this->binary_->dynamic_entries_),
577+
[] (const DynamicEntry* entry) {
578+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_JMPREL;
579+
});
555580

556-
case DYNAMIC_TAGS::DT_REL:
557-
{
558-
return static_cast<uint32_t>(size / sizeof(Elf_Rel));
559-
break;
581+
auto&& it_pltgot_relocations_size = std::find_if(
582+
std::begin(this->binary_->dynamic_entries_),
583+
std::end(this->binary_->dynamic_entries_),
584+
[] (const DynamicEntry* entry) {
585+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_PLTRELSZ;
586+
});
587+
588+
auto&& it_pltgot_relocations_type = std::find_if(
589+
std::begin(this->binary_->dynamic_entries_),
590+
std::end(this->binary_->dynamic_entries_),
591+
[] (const DynamicEntry* entry) {
592+
return entry != nullptr and entry->tag() == DYNAMIC_TAGS::DT_PLTREL;
593+
});
594+
595+
if (it_pltgot_relocations != std::end(this->binary_->dynamic_entries_) and
596+
it_pltgot_relocations_size != std::end(this->binary_->dynamic_entries_)) {
597+
const uint64_t virtual_address = (*it_pltgot_relocations)->value();
598+
const uint64_t size = (*it_pltgot_relocations_size)->value();
599+
DYNAMIC_TAGS type;
600+
if (it_pltgot_relocations_type != std::end(this->binary_->dynamic_entries_)) {
601+
type = static_cast<DYNAMIC_TAGS>((*it_pltgot_relocations_type)->value());
602+
} else {
603+
// Try to guess: We assume that on ELF64 -> DT_RELA and on ELF32 -> DT_REL
604+
if (std::is_same<ELF_T, ELF64>::value) {
605+
type = DYNAMIC_TAGS::DT_RELA;
606+
} else {
607+
type = DYNAMIC_TAGS::DT_REL;
560608
}
609+
}
561610

562-
default:
563-
{
564-
return 0;
611+
try {
612+
const uint64_t offset = this->binary_->virtual_address_to_offset(virtual_address);
613+
if (type == DYNAMIC_TAGS::DT_RELA) {
614+
nb_symbols = std::max(nb_symbols, this->max_relocation_index<ELF_T, typename ELF_T::Elf_Rela>(offset, size));
615+
} else {
616+
nb_symbols = std::max(nb_symbols, this->max_relocation_index<ELF_T, typename ELF_T::Elf_Rel>(offset, size));
565617
}
618+
} catch (const LIEF::exception& e) {
619+
LOG(WARNING) << e.what();
620+
621+
}
566622
}
567-
return 0;
623+
624+
return nb_symbols;
568625
}
569626

627+
template<typename ELF_T, typename REL_T>
628+
uint32_t Parser::max_relocation_index(uint64_t relocations_offset, uint64_t size) const {
629+
static_assert(std::is_same<REL_T, typename ELF_T::Elf_Rel>::value or
630+
std::is_same<REL_T, typename ELF_T::Elf_Rela>::value, "REL_T must be Elf_Rel or Elf_Rela");
631+
632+
const uint8_t shift = std::is_same<ELF_T, ELF32>::value ? 8 : 32;
633+
634+
const uint32_t nb_entries = static_cast<uint32_t>(size / sizeof(REL_T));
635+
636+
const REL_T* reloc_entry = reinterpret_cast<const REL_T*>(
637+
this->stream_->read(relocations_offset, nb_entries * sizeof(REL_T)));
638+
uint32_t idx = 0;
639+
for (uint32_t i = 0; i < nb_entries; ++i) {
640+
idx = std::max(idx, static_cast<uint32_t>(reloc_entry->r_info >> shift));
641+
reloc_entry++;
642+
}
643+
return (idx + 1);
644+
} // max_relocation_index
645+
646+
570647

571648
template<typename ELF_T>
572649
uint32_t Parser::nb_dynsym_section(void) const {
573650
using Elf_Sym = typename ELF_T::Elf_Sym;
574-
575-
using Elf_Addr = typename ELF_T::Elf_Addr;
576-
using Elf_Off = typename ELF_T::Elf_Off;
651+
using Elf_Off = typename ELF_T::Elf_Off;
577652

578653
auto&& it_dynamic_section = std::find_if(
579654
std::begin(this->binary_->sections_),
@@ -609,7 +684,6 @@ uint32_t Parser::nb_dynsym_hash(void) const {
609684

610685
template<typename ELF_T>
611686
uint32_t Parser::nb_dynsym_sysv_hash(void) const {
612-
using Elf_Addr = typename ELF_T::Elf_Addr;
613687
using Elf_Off = typename ELF_T::Elf_Off;
614688

615689
const DynamicEntry& dyn_hash = this->binary_->get(DYNAMIC_TAGS::DT_HASH);
@@ -633,7 +707,6 @@ template<typename ELF_T>
633707
uint32_t Parser::nb_dynsym_gnu_hash(void) const {
634708
using uint__ = typename ELF_T::uint;
635709

636-
using Elf_Addr = typename ELF_T::Elf_Addr;
637710
using Elf_Off = typename ELF_T::Elf_Off;
638711

639712
const DynamicEntry& dyn_hash = this->binary_->get(DYNAMIC_TAGS::DT_GNU_HASH);
@@ -722,7 +795,6 @@ template<typename ELF_T>
722795
void Parser::parse_sections(void) {
723796
using Elf_Shdr = typename ELF_T::Elf_Shdr;
724797

725-
using Elf_Addr = typename ELF_T::Elf_Addr;
726798
using Elf_Off = typename ELF_T::Elf_Off;
727799
VLOG(VDEBUG) << "[+] Parsing Section";
728800

@@ -780,7 +852,6 @@ template<typename ELF_T>
780852
void Parser::parse_segments(void) {
781853
using Elf_Phdr = typename ELF_T::Elf_Phdr;
782854

783-
using Elf_Addr = typename ELF_T::Elf_Addr;
784855
using Elf_Off = typename ELF_T::Elf_Off;
785856

786857
VLOG(VDEBUG) << "[+] Parse Segments";
@@ -813,7 +884,7 @@ void Parser::parse_segments(void) {
813884
this->stream_->read(offset_to_content, size));
814885
segment->content({content, content + size});
815886
if (segment->type() == SEGMENT_TYPES::PT_INTERP) {
816-
this->binary_->interpreter_ = this->stream_->read_string(offset_to_content);
887+
this->binary_->interpreter_ = this->stream_->read_string(offset_to_content, segment->physical_size());
817888
}
818889

819890
} catch (const LIEF::read_out_of_bound&) {
@@ -901,7 +972,6 @@ template<typename ELF_T>
901972
void Parser::parse_dynamic_symbols(uint64_t offset) {
902973
using Elf_Sym = typename ELF_T::Elf_Sym;
903974

904-
using Elf_Addr = typename ELF_T::Elf_Addr;
905975
using Elf_Off = typename ELF_T::Elf_Off;
906976
VLOG(VDEBUG) << "[+] Parsing dynamics symbols";
907977

@@ -1272,7 +1342,6 @@ template<typename ELF_T, typename REL_T>
12721342
void Parser::parse_pltgot_relocations(uint64_t offset, uint64_t size) {
12731343
static_assert(std::is_same<REL_T, typename ELF_T::Elf_Rel>::value or
12741344
std::is_same<REL_T, typename ELF_T::Elf_Rela>::value, "REL_T must be Elf_Rel or Elf_Rela");
1275-
using Elf_Addr = typename ELF_T::Elf_Addr;
12761345
using Elf_Off = typename ELF_T::Elf_Off;
12771346

12781347
// Already Parsed

0 commit comments

Comments
 (0)