Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] add hashtable fuzzing #87949

Merged
merged 12 commits into from
May 2, 2024
Merged

Conversation

SchrodingerZhu
Copy link
Contributor

No description provided.

@llvmbot llvmbot added the libc label Apr 7, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 7, 2024

@llvm/pr-subscribers-libc

Author: Schrodinger ZHU Yifan (SchrodingerZhu)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/87949.diff

4 Files Affected:

  • (modified) libc/cmake/modules/LLVMLibCCheckMPFR.cmake (+1-1)
  • (modified) libc/fuzzing/__support/CMakeLists.txt (+9)
  • (added) libc/fuzzing/__support/hashtable_fuzz.cpp (+157)
  • (modified) libc/utils/MPFRWrapper/CMakeLists.txt (+1-7)
diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
index bbaeb9f0dc053f..532b0b9bfae392 100644
--- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
@@ -2,7 +2,7 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed (
 
 if(LLVM_LIBC_MPFR_INSTALL_PATH)
   set(LIBC_TESTS_CAN_USE_MPFR TRUE)
-elseif(LIBC_TARGET_OS_IS_GPU)
+elseif(LIBC_TARGET_OS_IS_GPU OR LLVM_LIBC_FULL_BUILD)
   set(LIBC_TESTS_CAN_USE_MPFR FALSE)
 else()
   try_compile(
diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt
index 278e914e3fbe95..d115cd8434b15b 100644
--- a/libc/fuzzing/__support/CMakeLists.txt
+++ b/libc/fuzzing/__support/CMakeLists.txt
@@ -5,3 +5,12 @@ add_libc_fuzzer(
   DEPENDS
     libc.src.__support.uint
 )
+
+add_libc_fuzzer(
+  hashtable_fuzz
+  SRCS
+    hashtable_fuzz.cpp
+  DEPENDS
+    libc.src.__support.HashTable.table
+    libc.src.string.memcpy
+)
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
new file mode 100644
index 00000000000000..4b862b03b9d309
--- /dev/null
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -0,0 +1,157 @@
+#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/HashTable/table.h"
+#include "src/string/memcpy.h"
+#include <search.h>
+#include <stdint.h>
+namespace LIBC_NAMESPACE {
+
+enum class Action { Find, Insert, CrossCheck };
+static uint8_t *global_buffer = nullptr;
+static size_t remaining = 0;
+
+static cpp::optional<uint8_t> next_u8() {
+  if (remaining == 0)
+    return cpp::nullopt;
+  uint8_t result = *global_buffer;
+  global_buffer++;
+  remaining--;
+  return result;
+}
+
+static cpp::optional<uint64_t> next_uint64() {
+  uint64_t result;
+  if (remaining < sizeof(result))
+    return cpp::nullopt;
+  memcpy(&result, global_buffer, sizeof(result));
+  global_buffer += sizeof(result);
+  remaining -= sizeof(result);
+  return result;
+}
+
+static cpp::optional<Action> next_action() {
+  if (cpp::optional<uint8_t> action = next_u8()) {
+    switch (*action % 3) {
+    case 0:
+      return Action::Find;
+    case 1:
+      return Action::Insert;
+    case 2:
+      return Action::CrossCheck;
+    }
+  }
+  return cpp::nullopt;
+}
+
+static cpp::optional<char *> next_cstr() {
+  char *result = reinterpret_cast<char *>(global_buffer);
+  if (cpp::optional<uint64_t> len = next_uint64()) {
+    uint64_t length;
+    for (length = 0; length < *len % 128; length++) {
+      if (length >= remaining)
+        return cpp::nullopt;
+      if (*global_buffer == '\0')
+        break;
+    }
+    if (length >= remaining)
+      return cpp::nullopt;
+    global_buffer[length] = '\0';
+    global_buffer += length + 1;
+    remaining -= length + 1;
+    return result;
+  }
+  return cpp::nullopt;
+}
+
+#define GET_VAL(op)                                                            \
+  __extension__({                                                              \
+    auto val = op();                                                           \
+    if (!val)                                                                  \
+      return 0;                                                                \
+    *val;                                                                      \
+  })
+
+template <typename Fn> struct CleanUpHook {
+  cpp::optional<Fn> fn;
+  ~CleanUpHook() {
+    if (fn)
+      (*fn)();
+  }
+  CleanUpHook(Fn fn) : fn(cpp::move(fn)) {}
+  CleanUpHook(const CleanUpHook &) = delete;
+  CleanUpHook(CleanUpHook &&other) : fn(cpp::move(other.fn)) {
+    other.fn = cpp::nullopt;
+  }
+};
+
+#define register_cleanup(ID, ...)                                              \
+  auto cleanup_hook##ID = __extension__({                                      \
+    auto a = __VA_ARGS__;                                                      \
+    CleanUpHook<decltype(a)>{a};                                               \
+  });
+
+static void trap_with_message(const char *msg) { __builtin_trap(); }
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  AllocChecker ac;
+  global_buffer = static_cast<uint8_t *>(::operator new(size, ac));
+  register_cleanup(0, [global_buffer = global_buffer, size] {
+    ::operator delete(global_buffer, size);
+  });
+  if (!ac)
+    return 0;
+  memcpy(global_buffer, data, size);
+
+  remaining = size;
+  uint64_t size_a = GET_VAL(next_uint64) % 256;
+  uint64_t size_b = GET_VAL(next_uint64) % 256;
+  uint64_t rand_a = GET_VAL(next_uint64);
+  uint64_t rand_b = GET_VAL(next_uint64);
+  internal::HashTable *table_a = internal::HashTable::allocate(size_a, rand_a);
+  register_cleanup(1, [&table_a] { internal::HashTable::deallocate(table_a); });
+  internal::HashTable *table_b = internal::HashTable::allocate(size_b, rand_b);
+  register_cleanup(2, [&table_b] { internal::HashTable::deallocate(table_b); });
+  if (!table_a || !table_b)
+    return 0;
+  for (;;) {
+    Action action = GET_VAL(next_action);
+    switch (action) {
+    case Action::Find: {
+      const char *key = GET_VAL(next_cstr);
+      if (!key)
+        return 0;
+      if (static_cast<bool>(table_a->find(key)) !=
+          static_cast<bool>(table_b->find(key)))
+        trap_with_message(key);
+      break;
+    }
+    case Action::Insert: {
+      char *key = GET_VAL(next_cstr);
+      if (!key)
+        return 0;
+      ENTRY *a = internal::HashTable::insert(table_a, ENTRY{key, key});
+      ENTRY *b = internal::HashTable::insert(table_b, ENTRY{key, key});
+      if (a->data != b->data)
+        __builtin_trap();
+      break;
+    }
+    case Action::CrossCheck: {
+      for (ENTRY a : *table_a) {
+        if (const ENTRY *b = table_b->find(a.key)) {
+          if (a.data != b->data)
+            __builtin_trap();
+        }
+      }
+      for (ENTRY b : *table_b) {
+        if (const ENTRY *a = table_a->find(b.key)) {
+          if (a->data != b.data)
+            __builtin_trap();
+        }
+      }
+      break;
+    }
+    }
+  }
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt
index 2f2b0ac09df9a8..6af6fd77070418 100644
--- a/libc/utils/MPFRWrapper/CMakeLists.txt
+++ b/libc/utils/MPFRWrapper/CMakeLists.txt
@@ -5,12 +5,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
     mpfr_inc.h
   )
   target_compile_options(libcMPFRWrapper PRIVATE -O3)
-  if (LLVM_LIBC_FULL_BUILD)
-    # It is not easy to make libcMPFRWrapper a standalone library because gmp.h may unconditionally
-    # pull in some STL headers. As a result, targets using this library will need to link against
-    # C++ and unwind libraries. Since we are using MPFR anyway, we directly specifies the GNU toolchain.
-    target_link_libraries(libcMPFRWrapper PUBLIC -lstdc++ -lgcc_s)
-  endif()
   add_dependencies(
     libcMPFRWrapper
     libc.src.__support.CPP.string_view
@@ -24,6 +18,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
     target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib)
   endif()
   target_link_libraries(libcMPFRWrapper PUBLIC LibcFPTestHelpers.unit LibcTest.unit mpfr gmp)
-elseif(NOT LIBC_TARGET_OS_IS_GPU)
+elseif(NOT LIBC_TARGET_OS_IS_GPU AND NOT LLVM_LIBC_FULL_BUILD)
   message(WARNING "Math tests using MPFR will be skipped.")
 endif()

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding fuzzing for hashtable is a good idea, but I'm not sure this is the right design for it. It seems like you're ending up re-implementing C++ language features like cleanups and functions on classes.

libc/fuzzing/__support/hashtable_fuzz.cpp Outdated Show resolved Hide resolved
libc/fuzzing/__support/hashtable_fuzz.cpp Outdated Show resolved Hide resolved
libc/fuzzing/__support/hashtable_fuzz.cpp Outdated Show resolved Hide resolved
@SchrodingerZhu
Copy link
Contributor Author

I have reworked all the logic of this fuzzer and addressed a bug on my side.

@SchrodingerZhu
Copy link
Contributor Author

SchrodingerZhu commented Apr 30, 2024

put a hold on this. I am still experiencing some errors.


update: should be fixed now.

@SchrodingerZhu
Copy link
Contributor Author

SchrodingerZhu commented May 1, 2024

update:
Seems that I run into another error when increase the chance of insertion to 3/5:

==2288660==ERROR: UndefinedBehaviorSanitizer: SEGV on unknown address 0x000000000008 (pc 0x5a48d233fb71 bp 0x7fffdb7c6ef0 sp 0x7fffdb7c6ce0 T2288660)
==2288660==The signal is caused by a READ memory access.
==2288660==Hint: address points to the zero page.
    #0 0x5a48d233fb71 in LLVMFuzzerTestOneInput /home/schrodingerzy/Documents/llvm/llvm-project/libc/fuzzing/__support/hashtable_fuzz.cpp:168:58
    #1 0x5a48d22e8f18 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0x3df18) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #2 0x5a48d22e9bf0 in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool, bool*) (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0x3ebf0) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #3 0x5a48d22eac81 in fuzzer::Fuzzer::MutateAndTestOne() (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0x3fc81) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #4 0x5a48d22ebaa7 in fuzzer::Fuzzer::Loop(std::vector<fuzzer::SizedFile, std::allocator<fuzzer::SizedFile>>&) (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0x40aa7) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #5 0x5a48d22cc316 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0x21316) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #6 0x5a48d22b6597 in main (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0xb597) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)
    #7 0x7ea689f4400f in __libc_start_call_main /usr/src/debug/glibc/glibc/csu/../sysdeps/nptl/libc_start_call_main.h:58:16
    #8 0x7ea689f440c9 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:360:3
    #9 0x5a48d22b65f4 in _start (/home/schrodingerzy/Documents/llvm/llvm-project/build/projects/libc/fuzzing/__support/libc.fuzzing.__support.hashtable_opt_fuzz+0xb5f4) (BuildId: 57f188612f1e6b12555f10c85afddf26fba91ab5)

UndefinedBehaviorSanitizer can not provide additional info.
SUMMARY: UndefinedBehaviorSanitizer: SEGV /home/schrodingerzy/Documents/llvm/llvm-project/libc/fuzzing/__support/hashtable_fuzz.cpp:168:58 in LLVMFuzzerTestOneInput
==2288660==ABORTING
MS: 4 CMP-ShuffleBytes-CrossOver-Custom- DE: "\021\000\000\000\000\000\000\000"-; base unit: adc83b19e793491b1c6ea0fd8b46cd9f32e592fc
artifact_prefix='./'; Test unit written to ./crash-515328899757caa24abf222700ab03823fe25732

After rerunning the example, it should be a problem of generator. As it actually crashes on next_string.

// string
static constexpr size_t INITIAL_HEADER_SIZE =
2 * (sizeof(uint16_t) + sizeof(uint64_t));
extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been looking into moving to https://github.com/google/fuzztest as the fuzzing framework for our fuzz tests in future. It seems like this might be easier with that framework, if you are interested in trying it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it is using structural unit test; so yes, it should be much more easier to approach.

@SchrodingerZhu
Copy link
Contributor Author

I think this PR is good to go now.

image

The coverage data looks fine to me for the current stage.

Copy link
Contributor

@lntue lntue left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@SchrodingerZhu SchrodingerZhu merged commit 0e5ff62 into llvm:main May 2, 2024
4 checks passed
@SchrodingerZhu SchrodingerZhu deleted the libc/hash-fuzzing branch May 2, 2024 19:36
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants