Skip to content

Commit 9036a24

Browse files
committed
Bug fix in dynamic symbols counting based on GnuHash
API Changes (ELF): * ELF::GnuHash::{check_bloom_filter, check_bucket, check}
1 parent 2f53542 commit 9036a24

File tree

7 files changed

+134
-12
lines changed

7 files changed

+134
-12
lines changed

api/python/ELF/objects/pyGnuHash.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,32 @@ void init_ELF_GnuHash_class(py::module& m) {
6565
"Hash values",
6666
py::return_value_policy::reference_internal)
6767

68+
.def("check_bloom_filter",
69+
&GnuHash::check_bloom_filter,
70+
"Check if the given hash pass the bloom filter",
71+
"hash"_a)
72+
73+
.def("check_bucket",
74+
&GnuHash::check_bucket,
75+
"Check if the given hash pass the bucket filter",
76+
"hash"_a)
77+
78+
.def("check",
79+
static_cast<bool(GnuHash::*)(const std::string&) const>(&GnuHash::check),
80+
"Check if the symbol *probably* exists. If "
81+
"the returned value is ``false`` you can assume at ``100%`` that "
82+
"the symbol with the given name doesn't exists. If ``true`` you can't "
83+
"do any assumption ",
84+
"symbol_name"_a)
85+
86+
.def("check",
87+
static_cast<bool(GnuHash::*)(uint32_t) const>(&GnuHash::check),
88+
"Check if the symbol associated with the given *probably* exists. If "
89+
"the returned value is ``false`` you can assume at ``100%`` that "
90+
"the symbol doesn't exists. If ``true`` you can't "
91+
"do any assumption",
92+
"hash_value"_a)
93+
6894
.def("__eq__", &GnuHash::operator==)
6995
.def("__ne__", &GnuHash::operator!=)
7096
.def("__hash__",

examples/python/elf_reader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import traceback
1414

1515
from lief import Logger
16-
Logger.set_level(lief.LOGGING_LEVEL.WARNING)
16+
Logger.set_level(lief.LOGGING_LEVEL.INFO)
1717

1818
from optparse import OptionParser
1919
terminal_rows, terminal_columns = 100, 100

include/LIEF/ELF/GnuHash.hpp

+27
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ class DLL_PUBLIC GnuHash : public Visitable {
3737

3838
public:
3939
GnuHash(void);
40+
GnuHash(uint32_t symbol_idx,
41+
uint32_t shift2,
42+
const std::vector<uint64_t>& bloom_filters,
43+
const std::vector<uint32_t>& buckets,
44+
const std::vector<uint32_t>& hash_values = {});
45+
46+
4047
GnuHash& operator=(const GnuHash& copy);
4148
GnuHash(const GnuHash& copy);
4249
virtual ~GnuHash(void);
@@ -65,6 +72,24 @@ class DLL_PUBLIC GnuHash : public Visitable {
6572
//! @brief Hash values
6673
const std::vector<uint32_t>& hash_values(void) const;
6774

75+
//! @brief Check if the given hash pass the bloom filter
76+
bool check_bloom_filter(uint32_t hash) const;
77+
78+
//! @brief Check if the given hash pass the bucket filter
79+
bool check_bucket(uint32_t hash) const;
80+
81+
//! @brief Check if the symbol *probably* exists. If
82+
//! the returned value is ``false`` you can assume at ``100%`` that
83+
//! the symbol with the given name doesn't exists. If ``true`` you can't
84+
//! do any assumption
85+
bool check(const std::string& symbol_name) const;
86+
87+
//! @brief Check if the symbol associated with the given *probably* exists. If
88+
//! the returned value is ``false`` you can assume at ``100%`` that
89+
//! the symbol doesn't exists. If ``true`` you can't
90+
//! do any assumption
91+
bool check(uint32_t hash) const;
92+
6893
bool operator==(const GnuHash& rhs) const;
6994
bool operator!=(const GnuHash& rhs) const;
7095

@@ -79,6 +104,8 @@ class DLL_PUBLIC GnuHash : public Visitable {
79104
std::vector<uint64_t> bloom_filters_;
80105
std::vector<uint32_t> buckets_;
81106
std::vector<uint32_t> hash_values_;
107+
108+
size_t c_;
82109
};
83110

84111

include/LIEF/ELF/utils.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ DLL_PUBLIC unsigned long hash32(const char* name);
3535
DLL_PUBLIC unsigned long hash64(const char* name);
3636
DLL_PUBLIC uint32_t dl_new_hash(const char* name);
3737

38+
3839
}
3940
}
4041

src/ELF/GnuHash.cpp

+51-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "LIEF/visitors/Hash.hpp"
2121

22+
#include "LIEF/ELF/utils.hpp"
2223
#include "LIEF/ELF/GnuHash.hpp"
2324

2425
namespace LIEF {
@@ -32,7 +33,22 @@ GnuHash::GnuHash(void) :
3233
shift2_{0},
3334
bloom_filters_{0},
3435
buckets_{0},
35-
hash_values_{0}
36+
hash_values_{0},
37+
c_{0}
38+
{}
39+
40+
41+
GnuHash::GnuHash(uint32_t symbol_idx,
42+
uint32_t shift2,
43+
const std::vector<uint64_t>& bloom_filters,
44+
const std::vector<uint32_t>& buckets,
45+
const std::vector<uint32_t>& hash_values) :
46+
symbol_index_{symbol_idx},
47+
shift2_{shift2},
48+
bloom_filters_{bloom_filters},
49+
buckets_{buckets},
50+
hash_values_{hash_values},
51+
c_{0}
3652
{}
3753

3854

@@ -64,6 +80,40 @@ const std::vector<uint32_t>& GnuHash::hash_values(void) const {
6480
return this->hash_values_;
6581
}
6682

83+
bool GnuHash::check_bloom_filter(uint32_t hash) const {
84+
const size_t C = this->c_;
85+
const uint32_t h1 = hash;
86+
const uint32_t h2 = hash >> this->shift2();
87+
88+
const uint32_t n1 = (h1 / C) % this->maskwords();
89+
90+
const uint32_t b1 = h1 % C;
91+
const uint32_t b2 = h2 % C;
92+
const uint64_t filter = this->bloom_filters()[n1];
93+
return (filter >> b1) & (filter >> b2) & 1;
94+
}
95+
96+
97+
bool GnuHash::check_bucket(uint32_t hash) const {
98+
return this->buckets()[hash % this->nb_buckets()] > 0;
99+
}
100+
101+
bool GnuHash::check(const std::string& symbol_name) const {
102+
uint32_t hash = dl_new_hash(symbol_name.c_str());
103+
return this->check(hash);
104+
}
105+
106+
107+
bool GnuHash::check(uint32_t hash) const {
108+
if (not this->check_bloom_filter(hash)) { // Bloom filter not passed
109+
return false;
110+
}
111+
112+
if (not this->check_bucket(hash)) { // hash buck not passed
113+
return false;
114+
}
115+
return true;
116+
}
67117

68118
bool GnuHash::operator==(const GnuHash& rhs) const {
69119
size_t hash_lhs = Hash::hash(*this);

src/ELF/Parser.tcc

+21-10
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
649649
const uint32_t nbuckets = header[0];
650650
const uint32_t symndx = header[1];
651651
const uint32_t maskwords = header[2];
652-
//const uint32_t shift2 = header[3];
652+
const uint32_t shift2 = header[3];
653653

654654
if (maskwords & (maskwords - 1)) {
655655
LOG(WARNING) << "maskwords is not a power of 2";
@@ -692,21 +692,30 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
692692
return 0;
693693
}
694694

695-
nb_symbols = std::max(nb_symbols, symndx);
695+
nb_symbols = symndx;
696696

697-
const uint32_t* hash_values = reinterpret_cast<const uint32_t*>(
698-
this->stream_->read(current_offset, nb_symbols * sizeof(uint32_t)));
697+
GnuHash gnuhash{symndx, shift2, bloom_filters, buckets};
698+
gnuhash.c_ = sizeof(uint__) * 8;
699699

700700

701-
// "It is set to 1 when a symbol is the last symbol in a given hash chain"
702-
while (((*hash_values) & 1) == 0) {
703-
++nb_symbols;
704-
++hash_values;
705-
}
701+
// Register the size of symbols store a the buckets
702+
std::vector<size_t> nbsym_buckets(nbuckets, 0);
706703

707-
return ++nb_symbols;
704+
for (size_t i = 0; i < nbuckets; ++i) {
705+
uint32_t hash_value = 0;
706+
size_t nsyms = 0;
707+
do {
708+
hash_value = this->stream_->read_integer<uint32_t>(current_offset);
709+
current_offset += sizeof(uint32_t);
708710

711+
nsyms++;
712+
} while ((hash_value & 1) == 0); // "It is set to 1 when a symbol is the last symbol in a given hash bucket"
709713

714+
nbsym_buckets[i] = buckets[i] + nsyms;
715+
}
716+
717+
nb_symbols = std::max<uint32_t>(nb_symbols, *std::max_element(std::begin(nbsym_buckets), std::end(nbsym_buckets)));
718+
return nb_symbols;
710719
}
711720

712721
template<typename ELF_T>
@@ -897,6 +906,7 @@ void Parser::parse_dynamic_symbols(uint64_t offset) {
897906
VLOG(VDEBUG) << "[+] Parsing dynamics symbols";
898907

899908
uint32_t nb_symbols = this->get_numberof_dynamic_symbols<ELF_T>(this->count_mtd_);
909+
VLOG(VDEBUG) << "Number of symbols counted: " << nb_symbols;
900910

901911
const Elf_Off dynamic_symbols_offset = offset;
902912
const Elf_Off string_offset = this->get_dynamic_string_table();
@@ -1494,6 +1504,7 @@ void Parser::parse_symbol_gnu_hash(uint64_t offset) {
14941504

14951505
VLOG(VDEBUG) << "[+] Parser symbol GNU hash";
14961506
GnuHash gnuhash;
1507+
gnuhash.c_ = sizeof(uint__) * 8;
14971508

14981509
uint64_t current_offset = offset;
14991510

tests/elf/elf_test.py

+7
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ def test_gnuhash(self):
7676
0x12F7C433, 0xEB01FAB6, 0xECD54543, 0xAD3C9892, 0x72632CCF, 0x12F7A2B3, 0x7C92E3BB, 0x7C96F087]
7777
self.assertEqual(hash_values, hash_values_test)
7878

79+
#for s in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]:
80+
# print(gnu_hash.check(s.name), s.name)
81+
self.assertTrue(all(gnu_hash.check(x.name) for x in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]))
82+
83+
self.assertFalse(gnu_hash.check("foofdsfdsfds"))
84+
self.assertFalse(gnu_hash.check("fazertrvkdfsrezklqpfjeopqdi"))
85+
7986
def test_permutation(self):
8087
samples = [
8188
"ELF/ELF64_x86-64_binary_ls.bin",

0 commit comments

Comments
 (0)