Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] add hashtable fuzzing #87949

Merged
merged 12 commits into from
May 2, 2024
18 changes: 18 additions & 0 deletions libc/fuzzing/__support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,21 @@ add_libc_fuzzer(
DEPENDS
libc.src.__support.big_int
)

add_libc_fuzzer(
hashtable_fuzz
SRCS
hashtable_fuzz.cpp
DEPENDS
libc.src.__support.HashTable.table
)

add_libc_fuzzer(
hashtable_opt_fuzz
SRCS
hashtable_fuzz.cpp
DEPENDS
libc.src.__support.HashTable.table
COMPILE_OPTIONS
-D__LIBC_EXPLICIT_SIMD_OPT
)
182 changes: 182 additions & 0 deletions libc/fuzzing/__support/hashtable_fuzz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
//===-- hashtable_fuzz.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Fuzzing test for llvm-libc hashtable implementations.
///
//===----------------------------------------------------------------------===//
#include "include/llvm-libc-types/ENTRY.h"
SchrodingerZhu marked this conversation as resolved.
Show resolved Hide resolved
#include "src/__support/CPP/string_view.h"
#include "src/__support/HashTable/table.h"

namespace LIBC_NAMESPACE {

// A fuzzing payload starts with
// - uint16_t: initial capacity for table A
// - uint64_t: seed for table A
// - uint16_t: initial capacity for table B
// - uint64_t: seed for table B
// Followed by a sequence of actions:
// - CrossCheck: only a single byte valued (4 mod 5)
// - Find: a single byte valued (3 mod 5) followed by a null-terminated string
// - Insert: a single byte valued (0,1,2 mod 5) followed by a null-terminated
// string
static constexpr size_t INITIAL_HEADER_SIZE =
2 * (sizeof(uint16_t) + sizeof(uint64_t));
extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been looking into moving to https://github.com/google/fuzztest as the fuzzing framework for our fuzz tests in future. It seems like this might be easier with that framework, if you are interested in trying it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it is using structural unit test; so yes, it should be much more easier to approach.

extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
size_t max_size, unsigned int seed) {
size = LLVMFuzzerMutate(data, size, max_size);
// not enough to read the initial capacities and seeds
if (size < INITIAL_HEADER_SIZE)
return 0;

// skip the initial capacities and seeds
size_t i = INITIAL_HEADER_SIZE;
while (i < size) {
// cross check
if (static_cast<uint8_t>(data[i]) % 5 == 4) {
// skip the cross check byte
++i;
continue;
}

// find or insert
// check if there is enough space for the action byte and the
// null-terminator
if (i + 2 >= max_size)
return i;
// skip the action byte
++i;
// skip the null-terminated string
while (i < max_size && data[i] != 0)
++i;
// in the case the string is not null-terminated, null-terminate it
if (i == max_size && data[i - 1] != 0) {
data[i - 1] = 0;
return max_size;
}

// move to the next action
++i;
}
// return the new size
return i;
}

// a tagged union
struct Action {
enum class Tag { Find, Insert, CrossCheck } tag;
cpp::string_view key;
};

static struct {
size_t remaining;
const char *buffer;

template <typename T> T next() {
static_assert(cpp::is_integral<T>::value, "T must be an integral type");
union {
T result;
char data[sizeof(T)];
};
for (size_t i = 0; i < sizeof(result); i++)
data[i] = buffer[i];
buffer += sizeof(result);
remaining -= sizeof(result);
return result;
}

cpp::string_view next_string() {
cpp::string_view result(buffer);
buffer = result.end() + 1;
remaining -= result.size() + 1;
return result;
}

Action next_action() {
uint8_t byte = next<uint8_t>();
switch (byte % 5) {
case 4:
return {Action::Tag::CrossCheck, {}};
case 3:
return {Action::Tag::Find, next_string()};
default:
return {Action::Tag::Insert, next_string()};
}
}
} global_status;

class HashTable {
internal::HashTable *table;

public:
HashTable(uint64_t size, uint64_t seed)
: table(internal::HashTable::allocate(size, seed)) {}
HashTable(internal::HashTable *table) : table(table) {}
~HashTable() { internal::HashTable::deallocate(table); }
HashTable(HashTable &&other) : table(other.table) { other.table = nullptr; }
bool is_valid() const { return table != nullptr; }
ENTRY *find(const char *key) { return table->find(key); }
ENTRY *insert(const ENTRY &entry) {
return internal::HashTable::insert(this->table, entry);
}
using iterator = internal::HashTable::iterator;
iterator begin() const { return table->begin(); }
iterator end() const { return table->end(); }
};

HashTable next_hashtable() {
size_t size = global_status.next<uint16_t>();
uint64_t seed = global_status.next<uint64_t>();
return HashTable(size, seed);
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
global_status.buffer = reinterpret_cast<const char *>(data);
global_status.remaining = size;
if (global_status.remaining < INITIAL_HEADER_SIZE)
return 0;

HashTable table_a = next_hashtable();
HashTable table_b = next_hashtable();
for (;;) {
if (global_status.remaining == 0)
break;
Action action = global_status.next_action();
switch (action.tag) {
case Action::Tag::Find: {
if (static_cast<bool>(table_a.find(action.key.data())) !=
static_cast<bool>(table_b.find(action.key.data())))
__builtin_trap();
break;
}
case Action::Tag::Insert: {
char *ptr = const_cast<char *>(action.key.data());
ENTRY *a = table_a.insert(ENTRY{ptr, ptr});
ENTRY *b = table_b.insert(ENTRY{ptr, ptr});
if (a->data != b->data)
__builtin_trap();
break;
}
case Action::Tag::CrossCheck: {
for (ENTRY a : table_a)
if (const ENTRY *b = table_b.find(a.key); a.data != b->data)
__builtin_trap();

for (ENTRY b : table_b)
if (const ENTRY *a = table_a.find(b.key); a->data != b.data)
__builtin_trap();

break;
}
}
}
return 0;
}

} // namespace LIBC_NAMESPACE
11 changes: 11 additions & 0 deletions libc/fuzzing/__support/uint_fuzz.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
//===-- uint_fuzz.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Fuzzing test for llvm-libc unsigned integer utilities.
///
//===----------------------------------------------------------------------===//
#include "src/__support/CPP/bit.h"
#include "src/__support/big_int.h"
#include "src/string/memory_utils/inline_memcpy.h"
Expand Down
17 changes: 10 additions & 7 deletions libc/src/__support/HashTable/generic/bitmask_impl.inc
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) {
return byte;
}

using BitMask = BitMaskAdaptor<bitmask_t, 0x8ull>;
using BitMask = BitMaskAdaptor<bitmask_t, 0x8ul>;
using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;

struct Group {
LIBC_INLINE_VAR static constexpr bitmask_t MASK = repeat_byte(0x80ul);
bitmask_t data;

// Load a group of control words from an arbitary address.
Expand Down Expand Up @@ -100,21 +101,23 @@ struct Group {
// - The check for key equality will catch these.
// - This only happens if there is at least 1 true match.
// - The chance of this happening is very low (< 1% chance per byte).
auto cmp = data ^ repeat_byte(byte);
auto result = LIBC_NAMESPACE::Endian::to_little_endian(
(cmp - repeat_byte(0x01)) & ~cmp & repeat_byte(0x80));
static constexpr bitmask_t ONES = repeat_byte(0x01ul);
auto cmp = data ^ repeat_byte(static_cast<bitmask_t>(byte) & 0xFFul);
auto result =
LIBC_NAMESPACE::Endian::to_little_endian((cmp - ONES) & ~cmp & MASK);
return {BitMask{result}};
}

// Find out the lanes equal to EMPTY or DELETE (highest bit set) and
// return the bitmask with corresponding bits set.
LIBC_INLINE BitMask mask_available() const {
return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)};
bitmask_t le_data = LIBC_NAMESPACE::Endian::to_little_endian(data);
return {le_data & MASK};
}

LIBC_INLINE IteratableBitMask occupied() const {
return {
{static_cast<bitmask_t>(mask_available().word ^ repeat_byte(0x80))}};
bitmask_t available = mask_available().word;
return {BitMask{available ^ MASK}};
}
};
} // namespace internal
Expand Down
Loading