Skip to content
Permalink
Browse files
Bug fix in dynamic symbols counting based on GnuHash
API Changes (ELF):

  * ELF::GnuHash::{check_bloom_filter, check_bucket, check}
  • Loading branch information
romainthomas committed Sep 23, 2017
1 parent 2f53542 commit 9036a24
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 12 deletions.
@@ -65,6 +65,32 @@ void init_ELF_GnuHash_class(py::module& m) {
"Hash values",
py::return_value_policy::reference_internal)

.def("check_bloom_filter",
&GnuHash::check_bloom_filter,
"Check if the given hash pass the bloom filter",
"hash"_a)

.def("check_bucket",
&GnuHash::check_bucket,
"Check if the given hash pass the bucket filter",
"hash"_a)

.def("check",
static_cast<bool(GnuHash::*)(const std::string&) const>(&GnuHash::check),
"Check if the symbol *probably* exists. If "
"the returned value is ``false`` you can assume at ``100%`` that "
"the symbol with the given name doesn't exists. If ``true`` you can't "
"do any assumption ",
"symbol_name"_a)

.def("check",
static_cast<bool(GnuHash::*)(uint32_t) const>(&GnuHash::check),
"Check if the symbol associated with the given *probably* exists. If "
"the returned value is ``false`` you can assume at ``100%`` that "
"the symbol doesn't exists. If ``true`` you can't "
"do any assumption",
"hash_value"_a)

.def("__eq__", &GnuHash::operator==)
.def("__ne__", &GnuHash::operator!=)
.def("__hash__",
@@ -13,7 +13,7 @@
import traceback

from lief import Logger
Logger.set_level(lief.LOGGING_LEVEL.WARNING)
Logger.set_level(lief.LOGGING_LEVEL.INFO)

from optparse import OptionParser
terminal_rows, terminal_columns = 100, 100
@@ -37,6 +37,13 @@ class DLL_PUBLIC GnuHash : public Visitable {

public:
GnuHash(void);
GnuHash(uint32_t symbol_idx,
uint32_t shift2,
const std::vector<uint64_t>& bloom_filters,
const std::vector<uint32_t>& buckets,
const std::vector<uint32_t>& hash_values = {});


GnuHash& operator=(const GnuHash& copy);
GnuHash(const GnuHash& copy);
virtual ~GnuHash(void);
@@ -65,6 +72,24 @@ class DLL_PUBLIC GnuHash : public Visitable {
//! @brief Hash values
const std::vector<uint32_t>& hash_values(void) const;

//! @brief Check if the given hash pass the bloom filter
bool check_bloom_filter(uint32_t hash) const;

//! @brief Check if the given hash pass the bucket filter
bool check_bucket(uint32_t hash) const;

//! @brief Check if the symbol *probably* exists. If
//! the returned value is ``false`` you can assume at ``100%`` that
//! the symbol with the given name doesn't exists. If ``true`` you can't
//! do any assumption
bool check(const std::string& symbol_name) const;

//! @brief Check if the symbol associated with the given *probably* exists. If
//! the returned value is ``false`` you can assume at ``100%`` that
//! the symbol doesn't exists. If ``true`` you can't
//! do any assumption
bool check(uint32_t hash) const;

bool operator==(const GnuHash& rhs) const;
bool operator!=(const GnuHash& rhs) const;

@@ -79,6 +104,8 @@ class DLL_PUBLIC GnuHash : public Visitable {
std::vector<uint64_t> bloom_filters_;
std::vector<uint32_t> buckets_;
std::vector<uint32_t> hash_values_;

size_t c_;
};


@@ -35,6 +35,7 @@ DLL_PUBLIC unsigned long hash32(const char* name);
DLL_PUBLIC unsigned long hash64(const char* name);
DLL_PUBLIC uint32_t dl_new_hash(const char* name);


}
}

@@ -19,6 +19,7 @@

#include "LIEF/visitors/Hash.hpp"

#include "LIEF/ELF/utils.hpp"
#include "LIEF/ELF/GnuHash.hpp"

namespace LIEF {
@@ -32,7 +33,22 @@ GnuHash::GnuHash(void) :
shift2_{0},
bloom_filters_{0},
buckets_{0},
hash_values_{0}
hash_values_{0},
c_{0}
{}


GnuHash::GnuHash(uint32_t symbol_idx,
uint32_t shift2,
const std::vector<uint64_t>& bloom_filters,
const std::vector<uint32_t>& buckets,
const std::vector<uint32_t>& hash_values) :
symbol_index_{symbol_idx},
shift2_{shift2},
bloom_filters_{bloom_filters},
buckets_{buckets},
hash_values_{hash_values},
c_{0}
{}


@@ -64,6 +80,40 @@ const std::vector<uint32_t>& GnuHash::hash_values(void) const {
return this->hash_values_;
}

bool GnuHash::check_bloom_filter(uint32_t hash) const {
const size_t C = this->c_;
const uint32_t h1 = hash;
const uint32_t h2 = hash >> this->shift2();

const uint32_t n1 = (h1 / C) % this->maskwords();

const uint32_t b1 = h1 % C;
const uint32_t b2 = h2 % C;
const uint64_t filter = this->bloom_filters()[n1];
return (filter >> b1) & (filter >> b2) & 1;
}


bool GnuHash::check_bucket(uint32_t hash) const {
return this->buckets()[hash % this->nb_buckets()] > 0;
}

bool GnuHash::check(const std::string& symbol_name) const {
uint32_t hash = dl_new_hash(symbol_name.c_str());
return this->check(hash);
}


bool GnuHash::check(uint32_t hash) const {
if (not this->check_bloom_filter(hash)) { // Bloom filter not passed
return false;
}

if (not this->check_bucket(hash)) { // hash buck not passed
return false;
}
return true;
}

bool GnuHash::operator==(const GnuHash& rhs) const {
size_t hash_lhs = Hash::hash(*this);
@@ -649,7 +649,7 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
const uint32_t nbuckets = header[0];
const uint32_t symndx = header[1];
const uint32_t maskwords = header[2];
//const uint32_t shift2 = header[3];
const uint32_t shift2 = header[3];

if (maskwords & (maskwords - 1)) {
LOG(WARNING) << "maskwords is not a power of 2";
@@ -692,21 +692,30 @@ uint32_t Parser::nb_dynsym_gnu_hash(void) const {
return 0;
}

nb_symbols = std::max(nb_symbols, symndx);
nb_symbols = symndx;

const uint32_t* hash_values = reinterpret_cast<const uint32_t*>(
this->stream_->read(current_offset, nb_symbols * sizeof(uint32_t)));
GnuHash gnuhash{symndx, shift2, bloom_filters, buckets};
gnuhash.c_ = sizeof(uint__) * 8;


// "It is set to 1 when a symbol is the last symbol in a given hash chain"
while (((*hash_values) & 1) == 0) {
++nb_symbols;
++hash_values;
}
// Register the size of symbols store a the buckets
std::vector<size_t> nbsym_buckets(nbuckets, 0);

return ++nb_symbols;
for (size_t i = 0; i < nbuckets; ++i) {
uint32_t hash_value = 0;
size_t nsyms = 0;
do {
hash_value = this->stream_->read_integer<uint32_t>(current_offset);
current_offset += sizeof(uint32_t);

nsyms++;
} while ((hash_value & 1) == 0); // "It is set to 1 when a symbol is the last symbol in a given hash bucket"

nbsym_buckets[i] = buckets[i] + nsyms;
}

nb_symbols = std::max<uint32_t>(nb_symbols, *std::max_element(std::begin(nbsym_buckets), std::end(nbsym_buckets)));
return nb_symbols;
}

template<typename ELF_T>
@@ -897,6 +906,7 @@ void Parser::parse_dynamic_symbols(uint64_t offset) {
VLOG(VDEBUG) << "[+] Parsing dynamics symbols";

uint32_t nb_symbols = this->get_numberof_dynamic_symbols<ELF_T>(this->count_mtd_);
VLOG(VDEBUG) << "Number of symbols counted: " << nb_symbols;

const Elf_Off dynamic_symbols_offset = offset;
const Elf_Off string_offset = this->get_dynamic_string_table();
@@ -1494,6 +1504,7 @@ void Parser::parse_symbol_gnu_hash(uint64_t offset) {

VLOG(VDEBUG) << "[+] Parser symbol GNU hash";
GnuHash gnuhash;
gnuhash.c_ = sizeof(uint__) * 8;

uint64_t current_offset = offset;

@@ -76,6 +76,13 @@ def test_gnuhash(self):
0x12F7C433, 0xEB01FAB6, 0xECD54543, 0xAD3C9892, 0x72632CCF, 0x12F7A2B3, 0x7C92E3BB, 0x7C96F087]
self.assertEqual(hash_values, hash_values_test)

#for s in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]:
# print(gnu_hash.check(s.name), s.name)
self.assertTrue(all(gnu_hash.check(x.name) for x in list(ls.dynamic_symbols)[gnu_hash.symbol_index:]))

self.assertFalse(gnu_hash.check("foofdsfdsfds"))
self.assertFalse(gnu_hash.check("fazertrvkdfsrezklqpfjeopqdi"))

def test_permutation(self):
samples = [
"ELF/ELF64_x86-64_binary_ls.bin",

0 comments on commit 9036a24

Please sign in to comment.