From 81e3e7e5d455f85e070a27763c578df493716ae9 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Tue, 28 Nov 2023 21:02:25 -0500 Subject: [PATCH] [libc] [search] implement hcreate(_r)/hsearch(_r)/hdestroy(_r) (#73469) This patch implements `hcreate(_r)/hsearch(_r)/hdestroy(_r)` as specified in https://man7.org/linux/man-pages/man3/hsearch.3.html. Notice that `neon/asimd` extension is not yet added in this patch. - The implementation is largely simplified from rust's [`hashbrown`](https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs) as we only consider fix-sized insertion-only hashtables. Technical details are provided in code comments. - This patch also contains a portable string hash function, which is derived from [`aHash`](https://github.com/tkaitchuck/aHash)'s fallback routine. Not using any SIMD acceleration, it has a good enough quality (passing all SMHasher tests) and is not too bad in speed. - Some general functionalities are added, such as `memory_size`, `offset_to`(alignment), `next_power_of_two`, `is_power_of_two`. `ctz/clz` are extended to support shorter integers. --- libc/cmake/modules/LLVMLibCFlagRules.cmake | 7 + libc/cmake/modules/LLVMLibCObjectRules.cmake | 14 ++ libc/config/linux/aarch64/entrypoints.txt | 8 + libc/config/linux/aarch64/headers.txt | 1 + libc/config/linux/api.td | 4 + libc/config/linux/arm/entrypoints.txt | 8 + libc/config/linux/arm/headers.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 8 + libc/config/linux/riscv/headers.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 8 + libc/config/linux/x86_64/headers.txt | 1 + libc/include/CMakeLists.txt | 12 + libc/include/llvm-libc-types/ACTION.h | 14 ++ libc/include/llvm-libc-types/CMakeLists.txt | 3 + libc/include/llvm-libc-types/ENTRY.h | 17 ++ .../llvm-libc-types/struct_hsearch_data.h | 17 ++ libc/include/search.h.def | 18 ++ libc/spec/gnu_ext.td | 40 ++- libc/spec/posix.td | 35 ++- libc/spec/spec.td | 5 + libc/src/CMakeLists.txt | 1 + libc/src/__support/CMakeLists.txt | 25 ++ libc/src/__support/HashTable/CMakeLists.txt | 52 ++++ libc/src/__support/HashTable/bitmask.h | 94 +++++++ .../HashTable/generic/bitmask_impl.inc | 102 ++++++++ libc/src/__support/HashTable/randomness.h | 62 +++++ .../__support/HashTable/sse2/bitmask_impl.inc | 40 +++ libc/src/__support/HashTable/table.h | 235 ++++++++++++++++++ libc/src/__support/bit.h | 40 +++ libc/src/__support/hash.h | 164 ++++++++++++ libc/src/__support/memory_size.h | 72 ++++++ libc/src/search/CMakeLists.txt | 79 ++++++ libc/src/search/hcreate.cpp | 28 +++ libc/src/search/hcreate.h | 18 ++ libc/src/search/hcreate_r.cpp | 32 +++ libc/src/search/hcreate_r.h | 18 ++ libc/src/search/hdestroy.cpp | 21 ++ libc/src/search/hdestroy.h | 18 ++ libc/src/search/hdestroy_r.cpp | 25 ++ libc/src/search/hdestroy_r.h | 18 ++ libc/src/search/hsearch.cpp | 36 +++ libc/src/search/hsearch.h | 18 ++ libc/src/search/hsearch/CMakeLists.txt | 7 + libc/src/search/hsearch/global.cpp | 13 + libc/src/search/hsearch/global.h | 13 + libc/src/search/hsearch_r.cpp | 42 ++++ libc/src/search/hsearch_r.h | 19 ++ libc/test/src/CMakeLists.txt | 1 + libc/test/src/__support/CMakeLists.txt | 27 ++ .../src/__support/HashTable/CMakeLists.txt | 33 +++ .../src/__support/HashTable/bitmask_test.cpp | 69 +++++ .../src/__support/HashTable/group_test.cpp | 90 +++++++ .../src/__support/HashTable/table_test.cpp | 77 ++++++ libc/test/src/__support/bit_test.cpp | 49 ++++ libc/test/src/__support/hash_test.cpp | 140 +++++++++++ libc/test/src/__support/memory_size_test.cpp | 85 +++++++ libc/test/src/search/CMakeLists.txt | 16 ++ libc/test/src/search/hsearch_test.cpp | 124 +++++++++ 58 files changed, 2223 insertions(+), 2 deletions(-) create mode 100644 libc/include/llvm-libc-types/ACTION.h create mode 100644 libc/include/llvm-libc-types/ENTRY.h create mode 100644 libc/include/llvm-libc-types/struct_hsearch_data.h create mode 100644 libc/include/search.h.def create mode 100644 libc/src/__support/HashTable/CMakeLists.txt create mode 100644 libc/src/__support/HashTable/bitmask.h create mode 100644 libc/src/__support/HashTable/generic/bitmask_impl.inc create mode 100644 libc/src/__support/HashTable/randomness.h create mode 100644 libc/src/__support/HashTable/sse2/bitmask_impl.inc create mode 100644 libc/src/__support/HashTable/table.h create mode 100644 libc/src/__support/hash.h create mode 100644 libc/src/__support/memory_size.h create mode 100644 libc/src/search/CMakeLists.txt create mode 100644 libc/src/search/hcreate.cpp create mode 100644 libc/src/search/hcreate.h create mode 100644 libc/src/search/hcreate_r.cpp create mode 100644 libc/src/search/hcreate_r.h create mode 100644 libc/src/search/hdestroy.cpp create mode 100644 libc/src/search/hdestroy.h create mode 100644 libc/src/search/hdestroy_r.cpp create mode 100644 libc/src/search/hdestroy_r.h create mode 100644 libc/src/search/hsearch.cpp create mode 100644 libc/src/search/hsearch.h create mode 100644 libc/src/search/hsearch/CMakeLists.txt create mode 100644 libc/src/search/hsearch/global.cpp create mode 100644 libc/src/search/hsearch/global.h create mode 100644 libc/src/search/hsearch_r.cpp create mode 100644 libc/src/search/hsearch_r.h create mode 100644 libc/test/src/__support/HashTable/CMakeLists.txt create mode 100644 libc/test/src/__support/HashTable/bitmask_test.cpp create mode 100644 libc/test/src/__support/HashTable/group_test.cpp create mode 100644 libc/test/src/__support/HashTable/table_test.cpp create mode 100644 libc/test/src/__support/hash_test.cpp create mode 100644 libc/test/src/__support/memory_size_test.cpp create mode 100644 libc/test/src/search/CMakeLists.txt create mode 100644 libc/test/src/search/hsearch_test.cpp diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake index a1d3dc4b567aa..37ffe708fb754 100644 --- a/libc/cmake/modules/LLVMLibCFlagRules.cmake +++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake @@ -132,6 +132,8 @@ endfunction(get_fq_dep_list_without_flag) # Special flags set(FMA_OPT_FLAG "FMA_OPT") set(ROUND_OPT_FLAG "ROUND_OPT") +# SSE2 is the baseline for x86_64, so we add a negative flag to disable it if needed. +set(DISABLE_SSE2_OPT_FLAG "DISABLE_SSE2_OPT") # Skip FMA_OPT flag for targets that don't support fma. if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR @@ -143,3 +145,8 @@ endif() if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))) set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE) endif() + +# Skip DISABLE_SSE2_OPT flag for targets that don't support SSE2. +if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2"))) + set(SKIP_FLAG_EXPANSION_DISABLE_SSE2_OPT TRUE) +endif() diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index d5df27a2dcb43..6d94ce97f0b68 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -18,6 +18,14 @@ function(_get_common_compile_options output_var flags) set(ADD_SSE4_2_FLAG TRUE) endif() + list(FIND flags ${DISABLE_SSE2_OPT_FLAG} no_sse2) + if(${no_sse2} LESS 0) + list(FIND flags "${DISABLE_SSE2_OPT_FLAG}__ONLY" no_sse2) + endif() + if((${no_sse2} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE2")) + set(DISABLE_SSE2_FLAG TRUE) + endif() + set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN}) if(LLVM_COMPILER_IS_GCC_COMPATIBLE) list(APPEND compile_options "-fpie") @@ -58,12 +66,18 @@ function(_get_common_compile_options output_var flags) if(ADD_SSE4_2_FLAG) list(APPEND compile_options "-msse4.2") endif() + if(DISABLE_SSE2_FLAG) + list(APPEND compile_options "-mno-sse2") + endif() elseif(MSVC) list(APPEND compile_options "/EHs-c-") list(APPEND compile_options "/GR-") if(ADD_FMA_FLAG) list(APPEND compile_options "/arch:AVX2") endif() + if(DISABLE_SSE2_FLAG) + list(APPEND compile_options "/arch:SSE") + endif() endif() if (LIBC_TARGET_ARCHITECTURE_IS_GPU) list(APPEND compile_options "-nogpulib") diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 284feb7b99096..ecefa5884adb3 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -130,6 +130,14 @@ set(TARGET_LIBC_ENTRYPOINTS #libc.src.stdio.scanf #libc.src.stdio.fscanf + # search.h entrypoints + libc.src.search.hcreate + libc.src.search.hcreate_r + libc.src.search.hsearch + libc.src.search.hsearch_r + libc.src.search.hdestroy + libc.src.search.hdestroy_r + # sys/mman.h entrypoints libc.src.sys.mman.madvise libc.src.sys.mman.mmap diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt index c47e05c924fd9..cfca5959b5ffa 100644 --- a/libc/config/linux/aarch64/headers.txt +++ b/libc/config/linux/aarch64/headers.txt @@ -12,6 +12,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.stdlib libc.include.string libc.include.strings + libc.include.search libc.include.sys_mman libc.include.sys_socket libc.include.sys_syscall diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 377763b97cfd9..726e58f376eaa 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -248,3 +248,7 @@ def TermiosAPI : PublicAPI<"termios.h"> { def SetJmpAPI : PublicAPI<"setjmp.h"> { let Types = ["jmp_buf"]; } + +def SearchAPI : PublicAPI<"search.h"> { + let Types = ["ACTION", "ENTRY", "struct hsearch_data"]; +} diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 27c0b8e5b3a3a..ee701c04b2e2a 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -89,6 +89,14 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdlib.strtoul libc.src.stdlib.strtoull + # search.h entrypoints + libc.src.search.hcreate + libc.src.search.hcreate_r + libc.src.search.hsearch + libc.src.search.hsearch_r + libc.src.search.hdestroy + libc.src.search.hdestroy_r + # sys/mman.h entrypoints libc.src.sys.mman.mmap libc.src.sys.mman.munmap diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt index fe7c88e922e07..bd08d8f8fa437 100644 --- a/libc/config/linux/arm/headers.txt +++ b/libc/config/linux/arm/headers.txt @@ -7,4 +7,5 @@ set(TARGET_PUBLIC_HEADERS libc.include.stdlib libc.include.string libc.include.strings + libc.include.search ) diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index a5f0c91e32d08..1ccb40108bd85 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -136,6 +136,14 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.scanf libc.src.stdio.fscanf + # search.h entrypoints + libc.src.search.hcreate + libc.src.search.hcreate_r + libc.src.search.hsearch + libc.src.search.hsearch_r + libc.src.search.hdestroy + libc.src.search.hdestroy_r + # sys/mman.h entrypoints libc.src.sys.mman.madvise libc.src.sys.mman.mmap diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt index 24247ee5819f9..3e2b1630f1695 100644 --- a/libc/config/linux/riscv/headers.txt +++ b/libc/config/linux/riscv/headers.txt @@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.stdlib libc.include.string libc.include.strings + libc.include.search libc.include.termios libc.include.threads libc.include.time diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 63aa7473115a0..43266e0e5b66e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -497,6 +497,14 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.spawn.posix_spawn_file_actions_destroy libc.src.spawn.posix_spawn_file_actions_init + # search.h entrypoints + libc.src.search.hcreate + libc.src.search.hcreate_r + libc.src.search.hsearch + libc.src.search.hsearch_r + libc.src.search.hdestroy + libc.src.search.hdestroy_r + # threads.h entrypoints libc.src.threads.call_once libc.src.threads.cnd_broadcast diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt index 24247ee5819f9..3e2b1630f1695 100644 --- a/libc/config/linux/x86_64/headers.txt +++ b/libc/config/linux/x86_64/headers.txt @@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.stdlib libc.include.string libc.include.strings + libc.include.search libc.include.termios libc.include.threads libc.include.time diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 9d170603ffa45..429c0f1f12866 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -133,6 +133,18 @@ add_gen_header( .llvm-libc-types.size_t ) +add_gen_header( + search + DEF_FILE search.h.def + GEN_HDR search.h + DEPENDS + .llvm_libc_common_h + .llvm-libc-types.ACTION + .llvm-libc-types.ENTRY + .llvm-libc-types.struct_hsearch_data + .llvm-libc-types.size_t +) + add_gen_header( time DEF_FILE time.h.def diff --git a/libc/include/llvm-libc-types/ACTION.h b/libc/include/llvm-libc-types/ACTION.h new file mode 100644 index 0000000000000..7181a59b177d6 --- /dev/null +++ b/libc/include/llvm-libc-types/ACTION.h @@ -0,0 +1,14 @@ +//===-- Definition of ACTION type -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_ACTION_H__ +#define __LLVM_LIBC_TYPES_ACTION_H__ + +typedef enum { FIND, ENTER } ACTION; + +#endif // __LLVM_LIBC_TYPES_ACTION_H__ diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 3c0cc7bbc71da..225ad780c4d01 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -91,3 +91,6 @@ add_header(wint_t HDR wint_t.h) add_header(sa_family_t HDR sa_family_t.h) add_header(struct_sockaddr HDR struct_sockaddr.h) add_header(rpc_opcodes_t HDR rpc_opcodes_t.h) +add_header(ACTION HDR ACTION.h) +add_header(ENTRY HDR ENTRY.h) +add_header(struct_hsearch_data HDR struct_hsearch_data.h) diff --git a/libc/include/llvm-libc-types/ENTRY.h b/libc/include/llvm-libc-types/ENTRY.h new file mode 100644 index 0000000000000..0ccb5938207ac --- /dev/null +++ b/libc/include/llvm-libc-types/ENTRY.h @@ -0,0 +1,17 @@ +//===-- Definition of ENTRY type ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_ENTRY_H__ +#define __LLVM_LIBC_TYPES_ENTRY_H__ + +typedef struct { + char *key; + void *data; +} ENTRY; + +#endif // __LLVM_LIBC_TYPES_ENTRY_H__ diff --git a/libc/include/llvm-libc-types/struct_hsearch_data.h b/libc/include/llvm-libc-types/struct_hsearch_data.h new file mode 100644 index 0000000000000..7e2a7232fce53 --- /dev/null +++ b/libc/include/llvm-libc-types/struct_hsearch_data.h @@ -0,0 +1,17 @@ +//===-- Definition of type struct hsearch_data ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__ +#define __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__ + +struct hsearch_data { + void *__opaque; + unsigned int __unused[2]; +}; + +#endif // __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__ diff --git a/libc/include/search.h.def b/libc/include/search.h.def new file mode 100644 index 0000000000000..3435c1f8ad048 --- /dev/null +++ b/libc/include/search.h.def @@ -0,0 +1,18 @@ +//===-- POSIX header search.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SEARCH_H +#define LLVM_LIBC_SEARCH_H + +#include <__llvm-libc-common.h> +#define __need_size_t +#include + +%%public_api() + +#endif // LLVM_LIBC_SEARCH_H diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td index dfb12419d1400..cb0407c84d4e2 100644 --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -3,6 +3,8 @@ def CpuSetPtr : PtrType; def ConstCpuSetPtr : ConstType; def QSortRCompareT : NamedType<"__qsortrcompare_t">; +def StructHsearchData : NamedType<"struct hsearch_data">; +def StructHsearchDataPtr : PtrType; def GnuExtensions : StandardSpec<"GNUExtensions"> { NamedType CookieIOFunctionsT = NamedType<"cookie_io_functions_t">; @@ -54,7 +56,6 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { >, ] >; - HeaderSpec String = HeaderSpec< "string.h", [], // Macros @@ -89,6 +90,42 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { ] >; + HeaderSpec Search = HeaderSpec< + "search.h", + [], // Macros + [ + StructHsearchData + ], + [], // Enumerations + [ + FunctionSpec< + "hcreate_r", + RetValSpec, + [ + ArgSpec, + ArgSpec + ] + >, + FunctionSpec< + "hdestroy_r", + RetValSpec, + [ + ArgSpec + ] + >, + FunctionSpec< + "hsearch_r", + RetValSpec, + [ + ArgSpec, + ArgSpec, + ArgSpec, + ArgSpec + ] + >, + ] + >; + HeaderSpec FEnv = HeaderSpec< "fenv.h", [], // Macros @@ -243,6 +280,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { StdIO, StdLib, String, + Search, UniStd, ]; } diff --git a/libc/spec/posix.td b/libc/spec/posix.td index a367cf2a6935c..c7acf6d25a2d8 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -1269,6 +1269,38 @@ def POSIX : StandardSpec<"POSIX"> { ] >; + HeaderSpec Search = HeaderSpec< + "search.h", + [], // Macros + [ + ActionType, + EntryType + ], // Types + [], // Enumerations + [ + FunctionSpec< + "hcreate", + RetValSpec, + [ + ArgSpec + ] + >, + FunctionSpec< + "hdestroy", + RetValSpec, + [] // Args + >, + FunctionSpec< + "hsearch", + RetValSpec, + [ + ArgSpec, + ArgSpec + ] + >, + ] + >; + HeaderSpec Termios = HeaderSpec< "termios.h", [ @@ -1414,6 +1446,7 @@ def POSIX : StandardSpec<"POSIX"> { Time, Termios, UniStd, - String + String, + Search, ]; } diff --git a/libc/spec/spec.td b/libc/spec/spec.td index b0d5511a4f087..9b689b5eb502a 100644 --- a/libc/spec/spec.td +++ b/libc/spec/spec.td @@ -140,6 +140,11 @@ def SuSecondsT : NamedType<"suseconds_t">; //added because __assert_fail needs it. def UnsignedType : NamedType<"unsigned">; +def ActionType : NamedType<"ACTION">; +def EntryType : NamedType<"ENTRY">; +def EntryTypePtr : PtrType; +def EntryTypePtrPtr : PtrType; + class Macro { string Name = name; } diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 88838eecc53c9..3ab62a4f667d2 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -35,3 +35,4 @@ add_subdirectory(signal) add_subdirectory(spawn) add_subdirectory(threads) add_subdirectory(time) +add_subdirectory(search) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index b939fae3be791..a76b22960f5a5 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -32,6 +32,7 @@ add_header_library( bit.h DEPENDS libc.src.__support.macros.attributes + libc.src.__support.CPP.type_traits ) add_header_library( @@ -230,6 +231,28 @@ add_header_library( libc.src.__support.OSUtil.osutil ) +add_header_library( + hash + HDRS + hash.h + DEPENDS + .bit + .uint128 + libc.src.__support.macros.attributes +) + +add_header_library( + memory_size + HDRS + memory_size.h + DEPENDS + libc.src.__support.CPP.type_traits + libc.src.__support.CPP.limits + libc.src.__support.macros.optimization + libc.src.__support.macros.attributes + libc.src.__support.macros.config +) + add_subdirectory(FPUtil) add_subdirectory(OSUtil) add_subdirectory(StringUtil) @@ -241,3 +264,5 @@ add_subdirectory(RPC) add_subdirectory(threads) add_subdirectory(File) + +add_subdirectory(HashTable) diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt new file mode 100644 index 0000000000000..238d460dacd42 --- /dev/null +++ b/libc/src/__support/HashTable/CMakeLists.txt @@ -0,0 +1,52 @@ +# TODO: `DISABLE_SSE2_OPT` does not quite work yet. +# We will investigate a better way of feature flag control. +add_header_library( + bitmask + HDRS + bitmask.h + FLAGS + DISABLE_SSE2_OPT + DEPENDS + libc.src.__support.bit + libc.src.__support.macros.properties.cpu_features +) + +list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index) +if (NOT ${getrandom_index} EQUAL -1) + message(STATUS "Using getrandom for hashtable randomness") + set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM) + set(randomness_extra_depends + libc.src.sys.random.getrandom libc.src.errno.errno) +endif() + + +add_header_library( + table + HDRS + table.h + DEPENDS + .bitmask + libc.src.__support.memory_size + libc.src.__support.bit + libc.src.__support.CPP.type_traits + libc.src.__support.CPP.new + libc.src.__support.macros.attributes + libc.src.__support.macros.optimization + libc.src.__support.hash + libc.src.string.memset + libc.src.string.strcmp + libc.src.string.strlen + libc.include.llvm-libc-types.ENTRY +) + +add_header_library( + randomness + HDRS + randomness.h + COMPILE_OPTIONS + ${randomness_compile_flags} + DEPENDS + libc.src.__support.hash + libc.src.__support.common + ${randomness_extra_depends} +) diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h new file mode 100644 index 0000000000000..761125feb951d --- /dev/null +++ b/libc/src/__support/HashTable/bitmask.h @@ -0,0 +1,94 @@ +//===-- HashTable BitMasks --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H +#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H + +#include "src/__support/bit.h" +#include "src/__support/macros/properties/cpu_features.h" +#include // size_t +#include // uint8_t, uint64_t + +namespace LIBC_NAMESPACE { +namespace internal { + +// Implementations of the bitmask. +// The backend word type may vary depending on different microarchitectures. +// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer +// corresponding to lanes in a SIMD register. +// +// Notice that this implementation is simplified from traditional swisstable: +// since we do not support deletion, we only need to care about if the highest +// bit is set or not: +// ============================= +// | Slot Status | Bitmask | +// ============================= +// | Available | 0b1xxx'xxxx | +// | Occupied | 0b0xxx'xxxx | +// ============================= +template struct BitMaskAdaptor { + // A masked constant whose bits are all set. + LIBC_INLINE_VAR constexpr static T MASK = WORD_MASK; + // A stride in the bitmask may use multiple bits. + LIBC_INLINE_VAR constexpr static size_t STRIDE = WORD_STRIDE; + + T word; + + // Check if any bit is set inside the word. + LIBC_INLINE constexpr bool any_bit_set() const { return word != 0; } + + // Count trailing zeros with respect to stride. (Assume the bitmask is none + // zero.) + LIBC_INLINE constexpr size_t lowest_set_bit_nonzero() const { + return unsafe_ctz(word) / WORD_STRIDE; + } +}; + +// Not all bitmasks are iterable --- only those who has only MSB set in each +// lane. Hence, we make the types nomially different to distinguish them. +template struct IteratableBitMaskAdaptor : public BitMask { + // Use the bitmask as an iterator. Update the state and return current lowest + // set bit. To make the bitmask iterable, each stride must contain 0 or exact + // 1 set bit. + LIBC_INLINE void remove_lowest_bit() { + // Remove the last set bit inside the word: + // word = 011110100 (original value) + // word - 1 = 011110011 (invert all bits up to the last set bit) + // word & (word - 1) = 011110000 (value with the last bit cleared) + this->word = this->word & (this->word - 1); + } + using value_type = size_t; + using iterator = BitMask; + using const_iterator = BitMask; + LIBC_INLINE size_t operator*() const { + return this->lowest_set_bit_nonzero(); + } + LIBC_INLINE IteratableBitMaskAdaptor &operator++() { + this->remove_lowest_bit(); + return *this; + } + LIBC_INLINE IteratableBitMaskAdaptor begin() { return *this; } + LIBC_INLINE IteratableBitMaskAdaptor end() { return {0}; } + LIBC_INLINE bool operator==(const IteratableBitMaskAdaptor &other) { + return this->word == other.word; + } + LIBC_INLINE bool operator!=(const IteratableBitMaskAdaptor &other) { + return this->word != other.word; + } +}; + +} // namespace internal +} // namespace LIBC_NAMESPACE + +#if defined(LIBC_TARGET_CPU_HAS_SSE2) +#include "sse2/bitmask_impl.inc" +#else +#include "generic/bitmask_impl.inc" +#endif + +#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc new file mode 100644 index 0000000000000..13e08382adf62 --- /dev/null +++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc @@ -0,0 +1,102 @@ +//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/endian.h" + +namespace LIBC_NAMESPACE { +namespace internal { +// Helper function to spread a byte across the whole word. +// Accumutively, the procedure looks like: +// byte = 0x00000000000000ff +// byte | (byte << 8) = 0x000000000000ffff +// byte | (byte << 16) = 0x00000000ffffffff +// byte | (byte << 32) = 0xffffffffffffffff +LIBC_INLINE constexpr uintptr_t repeat_byte(uintptr_t byte) { + size_t shift_amount = 8; + while (shift_amount < sizeof(uintptr_t) * 8) { + byte |= byte << shift_amount; + shift_amount <<= 1; + } + return byte; +} + +using BitMask = BitMaskAdaptor; +using IteratableBitMask = IteratableBitMaskAdaptor; + +struct Group { + uintptr_t data; + + // Load a group of control words from an arbitary address. + LIBC_INLINE static Group load(const void *__restrict addr) { + union { + uintptr_t value; + char bytes[sizeof(uintptr_t)]; + } data; + for (size_t i = 0; i < sizeof(uintptr_t); ++i) + data.bytes[i] = static_cast(addr)[i]; + return {data.value}; + } + + // Find out the lanes equal to the given byte and return the bitmask + // with corresponding bits set. + LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { + // Given byte = 0x10, suppose the data is: + // + // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] + // + // First, we compare the byte using XOR operation: + // + // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0) + // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1) + // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) + // + // Notice that the equal positions will now be 0x00, so if we substract 0x01 + // respective to every byte, it will need to carry the substraction to upper + // bits (assume no carry from the hidden parts) + // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2) + // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3) + // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) + // + // But there may be some bytes whose highest bit is already set after the + // xor operation. To rule out these positions, we AND them with the NOT + // of the XOR result: + // + // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2)) + // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4) + // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) + // + // To make the bitmask iteratable, only one bit can be set in each stride. + // So we AND each byte with 0x80 and keep only the highest bit: + // + // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6) + // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7) + // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8) + // + // However, there are possitbilites for false positives. For example, if the + // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there + // is a key only differs from the searched by the lowest bit. The claims + // are: + // + // - This never happens for `EMPTY` and `DELETED`, only full entries. + // - The check for key equality will catch these. + // - This only happens if there is at least 1 true match. + // - The chance of this happening is very low (< 1% chance per byte). + auto cmp = data ^ repeat_byte(byte); + auto result = LIBC_NAMESPACE::Endian::to_little_endian( + (cmp - repeat_byte(0x01)) & ~cmp & repeat_byte(0x80)); + return {result}; + } + + // Find out the lanes equal to EMPTY or DELETE (highest bit set) and + // return the bitmask with corresponding bits set. + LIBC_INLINE BitMask mask_available() const { + return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)}; + } +}; +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h new file mode 100644 index 0000000000000..bcc91190e9d45 --- /dev/null +++ b/libc/src/__support/HashTable/randomness.h @@ -0,0 +1,62 @@ +//===-- HashTable Randomness ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H +#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H + +#include "src/__support/common.h" +#include "src/__support/hash.h" +#include "src/__support/macros/attributes.h" +#if defined(LIBC_HASHTABLE_USE_GETRANDOM) +#include "src/errno/libc_errno.h" +#include "src/sys/random/getrandom.h" +#endif + +namespace LIBC_NAMESPACE { +namespace internal { +namespace randomness { +// We need an initial state for the hash function. More entropy are to be added +// at the first use and each round of reseeding. The following random numbers +// are generated from https://www.random.org/cgi-bin/randbyte?nbytes=64&format=h +LIBC_INLINE_VAR thread_local HashState state = { + 0x38049a7ea6f5a79b, 0x45cb02147c3f718a, 0x53eb431c12770718, + 0x5b55742bd20a2fcb}; +LIBC_INLINE_VAR thread_local uint64_t counter = 0; +LIBC_INLINE_VAR constexpr uint64_t RESEED_PERIOD = 1024; +LIBC_INLINE uint64_t next_random_seed() { + if (counter % RESEED_PERIOD == 0) { + uint64_t entropy[2]; + entropy[0] = reinterpret_cast(&entropy); + entropy[1] = reinterpret_cast(&state); +#if defined(LIBC_HASHTABLE_USE_GETRANDOM) + int errno_backup = libc_errno; + ssize_t count = sizeof(entropy); + uint8_t *buffer = reinterpret_cast(entropy); + while (count > 0) { + ssize_t len = getrandom(buffer, count, 0); + if (len == -1) { + if (libc_errno == ENOSYS) + break; + continue; + } + count -= len; + buffer += len; + } + libc_errno = errno_backup; +#endif + state.update(&entropy, sizeof(entropy)); + } + state.update(&counter, sizeof(counter)); + counter++; + return state.finish(); +} + +} // namespace randomness +} // namespace internal +} // namespace LIBC_NAMESPACE +#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H diff --git a/libc/src/__support/HashTable/sse2/bitmask_impl.inc b/libc/src/__support/HashTable/sse2/bitmask_impl.inc new file mode 100644 index 0000000000000..6308f2fed6661 --- /dev/null +++ b/libc/src/__support/HashTable/sse2/bitmask_impl.inc @@ -0,0 +1,40 @@ +//===-- HashTable BitMasks SSE2 Implementation ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +namespace LIBC_NAMESPACE { +namespace internal { +// With SSE2, every bitmask is iteratable as +// we use single bit to encode the data. + +using BitMask = BitMaskAdaptor; +using IteratableBitMask = IteratableBitMaskAdaptor; + +struct Group { + __m128i data; + + // Load a group of control words from an arbitary address. + LIBC_INLINE static Group load(const void *__restrict addr) { + return {_mm_loadu_si128(static_cast(addr))}; + } + + // Find out the lanes equal to the given byte and return the bitmask + // with corresponding bits set. + LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const { + auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte)); + auto bitmask = static_cast(_mm_movemask_epi8(cmp)); + return {bitmask}; + } + + LIBC_INLINE BitMask mask_available() const { + auto bitmask = static_cast(_mm_movemask_epi8(data)); + return {bitmask}; + } +}; +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h new file mode 100644 index 0000000000000..ec0ec78869ad5 --- /dev/null +++ b/libc/src/__support/HashTable/table.h @@ -0,0 +1,235 @@ +//===-- Fix-sized Monotonic HashTable ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H +#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H + +#include "include/llvm-libc-types/ENTRY.h" +#include "src/__support/CPP/new.h" +#include "src/__support/CPP/type_traits.h" +#include "src/__support/HashTable/bitmask.h" +#include "src/__support/bit.h" +#include "src/__support/hash.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/memory_size.h" +#include "src/string/memset.h" +#include "src/string/strcmp.h" +#include "src/string/strlen.h" +#include +#include + +namespace LIBC_NAMESPACE { +namespace internal { + +LIBC_INLINE uint8_t secondary_hash(uint64_t hash) { + // top 7 bits of the hash. + return static_cast(hash >> 57); +} + +// Probe sequence based on triangular numbers, which is guaranteed (since our +// table size is a power of two) to visit every group of elements exactly once. +// +// A triangular probe has us jump by 1 more group every time. So first we +// jump by 1 group (meaning we just continue our linear scan), then 2 groups +// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on. +// +// If we set sizeof(Group) to be one unit: +// T[k] = sum {1 + 2 + ... + k} = k * (k + 1) / 2 +// It is provable that T[k] mod 2^m generates a permutation of +// 0, 1, 2, 3, ..., 2^m - 2, 2^m - 1 +// Detailed proof is available at: +// https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/ +struct ProbeSequence { + size_t position; + size_t stride; + size_t entries_mask; + + LIBC_INLINE size_t next() { + position += stride; + position &= entries_mask; + stride += sizeof(Group); + return position; + } +}; + +// The number of entries is at least group width: we do not +// need to do the fixup when we set the control bytes. +// The number of entries is at least 8: we don't have to worry +// about special sizes when check the fullness of the table. +LIBC_INLINE size_t capacity_to_entries(size_t cap) { + if (8 >= sizeof(Group) && cap < 8) + return 8; + if (16 >= sizeof(Group) && cap < 15) + return 16; + if (cap < sizeof(Group)) + cap = sizeof(Group); + // overflow is always checked in allocate() + return next_power_of_two(cap * 8 / 7); +} + +// The heap memory layout for N buckets HashTable is as follows: +// +// ======================= +// | N * Entry | +// ======================= <- align boundary +// | Header | +// ======================= +// | (N + 1) * Byte | +// ======================= +// +// The trailing group part is to make sure we can always load +// a whole group of control bytes. + +struct HashTable { + HashState state; + size_t entries_mask; // number of buckets - 1 + size_t available_slots; // less than capacity +private: + // How many entries are there in the table. + LIBC_INLINE size_t num_of_entries() const { return entries_mask + 1; } + + LIBC_INLINE bool is_full() const { return available_slots == 0; } + + LIBC_INLINE size_t offset_from_entries() const { + size_t entries_size = num_of_entries() * sizeof(ENTRY); + return entries_size + offset_to(entries_size, table_alignment()); + } + + LIBC_INLINE constexpr static size_t table_alignment() { + return alignof(HashTable) > alignof(ENTRY) ? alignof(HashTable) + : alignof(ENTRY); + } + + LIBC_INLINE constexpr static size_t offset_to_groups() { + return sizeof(HashTable); + } + + LIBC_INLINE ENTRY &entry(size_t i) { + return reinterpret_cast(this)[-i - 1]; + } + + LIBC_INLINE uint8_t &control(size_t i) { + uint8_t *ptr = reinterpret_cast(this) + offset_to_groups(); + return ptr[i]; + } + + // We duplicate a group of control bytes to the end. Thus, it is possible that + // we need to set two control bytes at the same time. + LIBC_INLINE void set_ctrl(size_t index, uint8_t value) { + size_t index2 = ((index - sizeof(Group)) & entries_mask) + sizeof(Group); + control(index) = value; + control(index2) = value; + } + +public: + LIBC_INLINE static void deallocate(HashTable *table) { + if (table) { + void *ptr = + reinterpret_cast(table) - table->offset_from_entries(); + operator delete(ptr, std::align_val_t{table_alignment()}); + } + } + LIBC_INLINE static HashTable *allocate(size_t capacity, uint64_t randomness) { + // check if capacity_to_entries overflows MAX_MEM_SIZE + if (capacity > size_t{1} << (8 * sizeof(size_t) - 1 - 3)) + return nullptr; + SafeMemSize entries{capacity_to_entries(capacity)}; + SafeMemSize entries_size = entries * SafeMemSize{sizeof(ENTRY)}; + SafeMemSize align_boundary = entries_size.align_up(table_alignment()); + SafeMemSize ctrl_sizes = entries + SafeMemSize{sizeof(Group)}; + SafeMemSize header_size{offset_to_groups()}; + SafeMemSize total_size = + (align_boundary + header_size + ctrl_sizes).align_up(table_alignment()); + if (!total_size.valid()) + return nullptr; + AllocChecker ac; + + void *mem = operator new(total_size, std::align_val_t{table_alignment()}, + ac); + + HashTable *table = reinterpret_cast( + static_cast(mem) + align_boundary); + if (ac) { + table->entries_mask = entries - 1u; + table->available_slots = entries / 8 * 7; + table->state = HashState{randomness}; + memset(&table->control(0), 0x80, ctrl_sizes); + memset(mem, 0, table->offset_from_entries()); + } + return table; + } + +private: + LIBC_INLINE size_t find(const char *key, uint64_t primary) { + uint8_t secondary = secondary_hash(primary); + ProbeSequence sequence{static_cast(primary), 0, entries_mask}; + while (true) { + size_t pos = sequence.next(); + Group ctrls = Group::load(&control(pos)); + IteratableBitMask masks = ctrls.match_byte(secondary); + for (size_t i : masks) { + size_t index = (pos + i) & entries_mask; + ENTRY &entry = this->entry(index); + if (LIBC_LIKELY(entry.key != nullptr && strcmp(entry.key, key) == 0)) + return index; + } + BitMask available = ctrls.mask_available(); + // Since there is no deletion, the first time we find an available slot + // it is also ready to be used as an insertion point. Therefore, we also + // return the first available slot we find. If such entry is empty, the + // key will be nullptr. + if (LIBC_LIKELY(available.any_bit_set())) { + size_t index = + (pos + available.lowest_set_bit_nonzero()) & entries_mask; + return index; + } + } + } + +private: + LIBC_INLINE ENTRY *insert(ENTRY item, uint64_t primary) { + auto index = find(item.key, primary); + auto slot = &this->entry(index); + // SVr4 and POSIX.1-2001 specify that action is significant only for + // unsuccessful searches, so that an ENTER should not do anything + // for a successful search. + if (slot->key != nullptr) + return slot; + + if (!is_full()) { + set_ctrl(index, secondary_hash(primary)); + slot->key = item.key; + slot->data = item.data; + available_slots--; + return slot; + } + return nullptr; + } + +public: + LIBC_INLINE ENTRY *find(const char *key) { + LIBC_NAMESPACE::internal::HashState hasher = state; + hasher.update(key, strlen(key)); + uint64_t primary = hasher.finish(); + ENTRY &entry = this->entry(find(key, primary)); + if (entry.key == nullptr) + return nullptr; + return &entry; + } + LIBC_INLINE ENTRY *insert(ENTRY item) { + LIBC_NAMESPACE::internal::HashState hasher = state; + hasher.update(item.key, strlen(item.key)); + uint64_t primary = hasher.finish(); + return insert(item, primary); + } +}; +} // namespace internal +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H diff --git a/libc/src/__support/bit.h b/libc/src/__support/bit.h index d0a15c89b7b45..ab2e07744a866 100644 --- a/libc/src/__support/bit.h +++ b/libc/src/__support/bit.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_BIT_H #define LLVM_LIBC_SRC___SUPPORT_BIT_H +#include "src/__support/CPP/type_traits.h" // make_unsigned #include "src/__support/macros/attributes.h" // LIBC_INLINE namespace LIBC_NAMESPACE { @@ -28,6 +29,14 @@ template LIBC_INLINE int constexpr correct_zero(T val, int bits) { } template LIBC_INLINE constexpr int clz(T val); +template <> LIBC_INLINE int clz(unsigned char val) { + return __builtin_clz(static_cast(val)) - + 8 * (sizeof(unsigned int) - sizeof(unsigned char)); +} +template <> LIBC_INLINE int clz(unsigned short val) { + return __builtin_clz(static_cast(val)) - + 8 * (sizeof(unsigned int) - sizeof(unsigned short)); +} template <> LIBC_INLINE int clz(unsigned int val) { return __builtin_clz(val); } @@ -42,6 +51,12 @@ clz(unsigned long long int val) { } template LIBC_INLINE constexpr int ctz(T val); +template <> LIBC_INLINE int ctz(unsigned char val) { + return __builtin_ctz(static_cast(val)); +} +template <> LIBC_INLINE int ctz(unsigned short val) { + return __builtin_ctz(static_cast(val)); +} template <> LIBC_INLINE int ctz(unsigned int val) { return __builtin_ctz(val); } @@ -72,6 +87,31 @@ template LIBC_INLINE constexpr int unsafe_clz(T val) { return __internal::clz(val); } +template LIBC_INLINE constexpr T next_power_of_two(T val) { + if (val == 0) + return 1; + T idx = safe_clz(val - 1); + return static_cast(1) << ((8ull * sizeof(T)) - idx); +} + +template LIBC_INLINE constexpr bool is_power_of_two(T val) { + return val != 0 && (val & (val - 1)) == 0; +} + +template LIBC_INLINE constexpr T offset_to(T val, T align) { + return (-val) & (align - 1); +} + +template LIBC_INLINE constexpr T rotate_left(T val, T amount) { + // Implementation taken from "Safe, Efficient, and Portable Rotate in C/C++" + // https://blog.regehr.org/archives/1063 + // Using the safe version as the rotation pattern is now recognized by both + // GCC and Clang. + using U = cpp::make_unsigned_t; + U v = static_cast(val); + U a = static_cast(amount); + return (v << a) | (v >> ((-a) & (sizeof(U) * 8 - 1))); +} } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_BIT_H diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h new file mode 100644 index 0000000000000..ad12cf79e8d2c --- /dev/null +++ b/libc/src/__support/hash.h @@ -0,0 +1,164 @@ +//===-- Portable string hash function ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H +#define LLVM_LIBC_SRC___SUPPORT_HASH_H + +#include "src/__support/UInt128.h" // UInt128 +#include "src/__support/bit.h" // rotate_left +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include // For uint64_t + +namespace LIBC_NAMESPACE { +namespace internal { + +// Folded multiplication. +// This function multiplies two 64-bit integers and xor the high and +// low 64-bit parts of the result. +LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) { + UInt128 p = static_cast(x) * static_cast(y); + uint64_t low = static_cast(p); + uint64_t high = static_cast(p >> 64); + return low ^ high; +} + +// Read as little endian. +// Shift-and-or implementation does not give a satisfactory code on aarch64. +// Therefore, we use a union to read the value. +template LIBC_INLINE T read_little_endian(const void *ptr) { + const uint8_t *bytes = static_cast(ptr); + union { + T value; + uint8_t buffer[sizeof(T)]; + } data; +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + // Compiler should able to optimize this as a load followed by a byte swap. + // On aarch64 (-mbig-endian), this compiles to the following for int: + // ldr w0, [x0] + // rev w0, w0 + // ret + for (size_t i = 0; i < sizeof(T); ++i) { + data.buffer[i] = bytes[sizeof(T) - i - 1]; + } +#else + for (size_t i = 0; i < sizeof(T); ++i) { + data.buffer[i] = bytes[i]; + } +#endif + return data.value; +} + +// Specialized read functions for small values. size must be <= 8. +LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low, + uint64_t &high) { + const uint8_t *bytes = static_cast(ptr); + if (size >= 2) { + if (size >= 4) { + low = static_cast(read_little_endian(&bytes[0])); + high = + static_cast(read_little_endian(&bytes[size - 4])); + } else { + low = static_cast(read_little_endian(&bytes[0])); + high = static_cast(bytes[size - 1]); + } + } else { + if (size > 0) { + low = static_cast(bytes[0]); + high = static_cast(bytes[0]); + } else { + low = 0; + high = 0; + } + } +} + +// This constant comes from Kunth's prng (it empirically works well). +LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005; +// Rotation amount for mixing. +LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23; + +// Randomly generated values. For now, we use the same values as in aHash as +// they are widely tested. +// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38 +LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = { + {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0, + 0x082efa98ec4e6c89}, + {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd, + 0x3f84d5b5b5470917}, +}; + +// This is a portable string hasher. It is not cryptographically secure. +// The quality of the hash is good enough to pass all tests in SMHasher. +// The implementation is derived from the generic routine of aHash. +class HashState { + uint64_t buffer; + uint64_t pad; + uint64_t extra_keys[2]; + LIBC_INLINE void update(uint64_t low, uint64_t high) { + uint64_t combined = + folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]); + buffer = (buffer + pad) ^ combined; + buffer = rotate_left(buffer, ROTATE); + } + LIBC_INLINE static uint64_t mix(uint64_t seed) { + HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2], + RANDOMNESS[0][3]}; + mixer.update(seed, 0); + return mixer.finish(); + } + +public: + LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c, + uint64_t d) + : buffer(a), pad(b), extra_keys{c, d} {} + LIBC_INLINE HashState(uint64_t seed) { + // Mix one more round of the seed to make it stronger. + uint64_t mixed = mix(seed); + buffer = RANDOMNESS[1][0] ^ mixed; + pad = RANDOMNESS[1][1] ^ mixed; + extra_keys[0] = RANDOMNESS[1][2] ^ mixed; + extra_keys[1] = RANDOMNESS[1][3] ^ mixed; + } + LIBC_INLINE void update(const void *ptr, size_t size) { + uint8_t const *bytes = static_cast(ptr); + buffer = (buffer + size) * MULTIPLE; + uint64_t low, high; + if (size > 8) { + if (size > 16) { + // update tail + low = read_little_endian(&bytes[size - 16]); + high = read_little_endian(&bytes[size - 8]); + update(low, high); + while (size > 16) { + low = read_little_endian(&bytes[0]); + high = read_little_endian(&bytes[8]); + update(low, high); + bytes += 16; + size -= 16; + } + } else { + low = read_little_endian(&bytes[0]); + high = read_little_endian(&bytes[size - 8]); + update(low, high); + } + } else { + read_small_values(ptr, size, low, high); + update(low, high); + } + } + LIBC_INLINE uint64_t finish() { + uint64_t rot = buffer & 63; + uint64_t folded = folded_multiply(buffer, pad); + return rotate_left(folded, rot); + } +}; + +} // namespace internal +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC___SUPPORT_HASH_H diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h new file mode 100644 index 0000000000000..df179a6604714 --- /dev/null +++ b/libc/src/__support/memory_size.h @@ -0,0 +1,72 @@ +//===-- Memory Size ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/__support/CPP/type_traits.h" +#include "src/__support/bit.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE { +namespace internal { +template LIBC_INLINE bool mul_overflow(T a, T b, T *res) { +#if LIBC_HAS_BUILTIN(__builtin_mul_overflow) + return __builtin_mul_overflow(a, b, res); +#else + T max = cpp::numeric_limits::max(); + T min = cpp::numeric_limits::min(); + bool overflow = (b > 0 && (a > max / b || a < min / b)) || + (b < 0 && (a < max / b || a > min / b)); + if (!overflow) + *res = a * b; + return overflow; +#endif +} +// Limit memory size to the max of ssize_t +class SafeMemSize { +private: + using type = cpp::make_signed_t; + type value; + LIBC_INLINE explicit SafeMemSize(type value) : value(value) {} + +public: + LIBC_INLINE_VAR static constexpr size_t MAX_MEM_SIZE = + static_cast(cpp::numeric_limits::max()); + LIBC_INLINE explicit SafeMemSize(size_t value) + : value(value <= MAX_MEM_SIZE ? static_cast(value) : -1) {} + LIBC_INLINE operator size_t() { return static_cast(value); } + LIBC_INLINE bool valid() { return value >= 0; } + LIBC_INLINE SafeMemSize operator+(const SafeMemSize &other) { + type result; + if (LIBC_UNLIKELY((value | other.value) < 0)) + result = -1; + result = value + other.value; + return SafeMemSize{result}; + } + LIBC_INLINE SafeMemSize operator*(const SafeMemSize &other) { + type result; + if (LIBC_UNLIKELY((value | other.value) < 0)) + result = -1; + if (LIBC_UNLIKELY(mul_overflow(value, other.value, &result))) + result = -1; + return SafeMemSize{result}; + } + LIBC_INLINE SafeMemSize align_up(size_t alignment) { + if (!is_power_of_two(alignment) || alignment > MAX_MEM_SIZE || !valid()) + return SafeMemSize{type{-1}}; + + type offset = LIBC_NAMESPACE::offset_to(value, alignment); + + if (LIBC_UNLIKELY(offset > static_cast(MAX_MEM_SIZE) - value)) + return SafeMemSize{type{-1}}; + + return SafeMemSize{value + offset}; + } +}; +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/CMakeLists.txt b/libc/src/search/CMakeLists.txt new file mode 100644 index 0000000000000..4ae5274a3ba98 --- /dev/null +++ b/libc/src/search/CMakeLists.txt @@ -0,0 +1,79 @@ +add_subdirectory(hsearch) + +add_entrypoint_object( + hcreate + SRCS + hcreate.cpp + HDRS + hcreate.h + DEPENDS + libc.src.search.hsearch.global + libc.src.__support.HashTable.table + libc.src.__support.HashTable.randomness + libc.src.errno.errno + libc.include.search +) + +add_entrypoint_object( + hcreate_r + SRCS + hcreate_r.cpp + HDRS + hcreate_r.h + DEPENDS + libc.src.__support.HashTable.table + libc.src.__support.HashTable.randomness + libc.src.errno.errno + libc.include.search +) + +add_entrypoint_object( + hsearch + SRCS + hsearch.cpp + HDRS + hsearch.h + DEPENDS + libc.src.search.hsearch.global + libc.src.__support.HashTable.table + libc.src.__support.libc_assert + libc.src.errno.errno + libc.include.search +) + +add_entrypoint_object( + hsearch_r + SRCS + hsearch_r.cpp + HDRS + hsearch_r.h + DEPENDS + libc.src.__support.HashTable.table + libc.src.errno.errno + libc.include.search +) + +add_entrypoint_object( + hdestroy + SRCS + hdestroy.cpp + HDRS + hdestroy.h + DEPENDS + libc.src.search.hsearch.global + libc.src.__support.HashTable.table + libc.src.__support.libc_assert + libc.include.search +) + +add_entrypoint_object( + hdestroy_r + SRCS + hdestroy_r.cpp + HDRS + hdestroy_r.h + DEPENDS + libc.src.errno.errno + libc.src.__support.HashTable.table + libc.include.search +) diff --git a/libc/src/search/hcreate.cpp b/libc/src/search/hcreate.cpp new file mode 100644 index 0000000000000..9c05e317a2d05 --- /dev/null +++ b/libc/src/search/hcreate.cpp @@ -0,0 +1,28 @@ +//===-- Implementation of hcreate -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hcreate.h" +#include "src/__support/HashTable/randomness.h" +#include "src/__support/HashTable/table.h" +#include "src/errno/libc_errno.h" +#include "src/search/hsearch/global.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(int, hcreate, (size_t capacity)) { + uint64_t randomness = internal::randomness::next_random_seed(); + internal::HashTable *table = + internal::HashTable::allocate(capacity, randomness); + if (table == nullptr) { + libc_errno = ENOMEM; + return 0; + } + internal::global_hash_table = table; + return 1; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hcreate.h b/libc/src/search/hcreate.h new file mode 100644 index 0000000000000..2ac37fb030c26 --- /dev/null +++ b/libc/src/search/hcreate.h @@ -0,0 +1,18 @@ +//===-- Implementation header for hcreate -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HCREATE_H +#define LLVM_LIBC_SRC_SEARCH_HCREATE_H + +#include + +namespace LIBC_NAMESPACE { +int hcreate(size_t capacity); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HCREATE_H diff --git a/libc/src/search/hcreate_r.cpp b/libc/src/search/hcreate_r.cpp new file mode 100644 index 0000000000000..612a45cd0c688 --- /dev/null +++ b/libc/src/search/hcreate_r.cpp @@ -0,0 +1,32 @@ +//===-- Implementation of hcreate_r -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hcreate_r.h" +#include "src/__support/HashTable/randomness.h" +#include "src/__support/HashTable/table.h" +#include "src/errno/libc_errno.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(int, hcreate_r, + (size_t capacity, struct hsearch_data *htab)) { + if (htab == nullptr) { + libc_errno = EINVAL; + return 0; + } + uint64_t randomness = internal::randomness::next_random_seed(); + internal::HashTable *table = + internal::HashTable::allocate(capacity, randomness); + if (table == nullptr) { + libc_errno = ENOMEM; + return 0; + } + htab->__opaque = table; + return 1; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hcreate_r.h b/libc/src/search/hcreate_r.h new file mode 100644 index 0000000000000..e81895ef815c9 --- /dev/null +++ b/libc/src/search/hcreate_r.h @@ -0,0 +1,18 @@ +//===-- Implementation header for hcreate_r ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HCREATE_R_H +#define LLVM_LIBC_SRC_SEARCH_HCREATE_R_H + +#include + +namespace LIBC_NAMESPACE { +int hcreate_r(size_t capacity, struct hsearch_data *htab); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HCREATE_R_H diff --git a/libc/src/search/hdestroy.cpp b/libc/src/search/hdestroy.cpp new file mode 100644 index 0000000000000..1af64f195e326 --- /dev/null +++ b/libc/src/search/hdestroy.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of hdestroy ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hdestroy.h" +#include "src/__support/HashTable/table.h" +#include "src/__support/libc_assert.h" +#include "src/search/hsearch/global.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(void, hdestroy, (void)) { + LIBC_ASSERT(internal::global_hash_table != nullptr); + internal::HashTable::deallocate(internal::global_hash_table); + internal::global_hash_table = nullptr; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hdestroy.h b/libc/src/search/hdestroy.h new file mode 100644 index 0000000000000..b81e309a6bc80 --- /dev/null +++ b/libc/src/search/hdestroy.h @@ -0,0 +1,18 @@ +//===-- Implementation header for hdestroy -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HDESTROY_H +#define LLVM_LIBC_SRC_SEARCH_HDESTROY_H + +#include + +namespace LIBC_NAMESPACE { +void hdestroy(void); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HDESTROY_H diff --git a/libc/src/search/hdestroy_r.cpp b/libc/src/search/hdestroy_r.cpp new file mode 100644 index 0000000000000..e2fda93931f78 --- /dev/null +++ b/libc/src/search/hdestroy_r.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of hdestroy_r ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hdestroy_r.h" +#include "src/__support/HashTable/table.h" +#include "src/errno/libc_errno.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(void, hdestroy_r, (struct hsearch_data * htab)) { + if (htab == nullptr) { + libc_errno = EINVAL; + return; + } + internal::HashTable *table = + static_cast(htab->__opaque); + internal::HashTable::deallocate(table); + htab->__opaque = nullptr; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hdestroy_r.h b/libc/src/search/hdestroy_r.h new file mode 100644 index 0000000000000..503af41794448 --- /dev/null +++ b/libc/src/search/hdestroy_r.h @@ -0,0 +1,18 @@ +//===-- Implementation header for hdestroy_r ---------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H +#define LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H + +#include + +namespace LIBC_NAMESPACE { +void hdestroy_r(struct hsearch_data *htab); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H diff --git a/libc/src/search/hsearch.cpp b/libc/src/search/hsearch.cpp new file mode 100644 index 0000000000000..3a0d09aae835b --- /dev/null +++ b/libc/src/search/hsearch.cpp @@ -0,0 +1,36 @@ +//===-- Implementation of hsearch -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hsearch.h" +#include "src/__support/HashTable/table.h" +#include "src/__support/libc_assert.h" +#include "src/errno/libc_errno.h" +#include "src/search/hsearch/global.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(ENTRY *, hsearch, (ENTRY item, ACTION action)) { + ENTRY *result; + LIBC_ASSERT(internal::global_hash_table != nullptr); + switch (action) { + case FIND: + result = internal::global_hash_table->find(item.key); + if (result == nullptr) { + libc_errno = ESRCH; + } + break; + case ENTER: + result = internal::global_hash_table->insert(item); + if (result == nullptr) { + libc_errno = ENOMEM; + } + break; + } + return result; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hsearch.h b/libc/src/search/hsearch.h new file mode 100644 index 0000000000000..32dc073a49b83 --- /dev/null +++ b/libc/src/search/hsearch.h @@ -0,0 +1,18 @@ +//===-- Implementation header for hsearch -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HSEARCH_H +#define LLVM_LIBC_SRC_SEARCH_HSEARCH_H + +#include // ENTRY, ACTION + +namespace LIBC_NAMESPACE { +ENTRY *hsearch(ENTRY item, ACTION action); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HSEARCH_H diff --git a/libc/src/search/hsearch/CMakeLists.txt b/libc/src/search/hsearch/CMakeLists.txt new file mode 100644 index 0000000000000..17289f03d0628 --- /dev/null +++ b/libc/src/search/hsearch/CMakeLists.txt @@ -0,0 +1,7 @@ +add_object_library( + global + SRCS + global.cpp + HDRS + global.h +) diff --git a/libc/src/search/hsearch/global.cpp b/libc/src/search/hsearch/global.cpp new file mode 100644 index 0000000000000..b6782ada50de4 --- /dev/null +++ b/libc/src/search/hsearch/global.cpp @@ -0,0 +1,13 @@ +//===-- Global hashtable implementation -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +namespace LIBC_NAMESPACE { +namespace internal { +struct HashTable *global_hash_table = nullptr; +} +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hsearch/global.h b/libc/src/search/hsearch/global.h new file mode 100644 index 0000000000000..292008cb0c807 --- /dev/null +++ b/libc/src/search/hsearch/global.h @@ -0,0 +1,13 @@ +//===-- Global hashtable header -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +namespace LIBC_NAMESPACE { +namespace internal { +extern struct HashTable *global_hash_table; +} +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hsearch_r.cpp b/libc/src/search/hsearch_r.cpp new file mode 100644 index 0000000000000..958fba7c00d0d --- /dev/null +++ b/libc/src/search/hsearch_r.cpp @@ -0,0 +1,42 @@ +//===-- Implementation of hsearch_r -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/search/hsearch_r.h" +#include "src/__support/HashTable/table.h" +#include "src/errno/libc_errno.h" + +namespace LIBC_NAMESPACE { +LLVM_LIBC_FUNCTION(int, hsearch_r, + (ENTRY item, ACTION action, ENTRY **retval, + struct hsearch_data *htab)) { + if (htab == nullptr) { + libc_errno = EINVAL; + return 0; + } + internal::HashTable *table = + static_cast(htab->__opaque); + switch (action) { + case FIND: + *retval = table->find(item.key); + if (*retval == nullptr) { + libc_errno = ESRCH; + return 0; + } + break; + case ENTER: + *retval = table->insert(item); + if (*retval == nullptr) { + libc_errno = ENOMEM; + return 0; + } + break; + } + return 1; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/search/hsearch_r.h b/libc/src/search/hsearch_r.h new file mode 100644 index 0000000000000..d36094c2eba58 --- /dev/null +++ b/libc/src/search/hsearch_r.h @@ -0,0 +1,19 @@ +//===-- Implementation header for hsearch_r ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H +#define LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H + +#include // ENTRY, ACTION + +namespace LIBC_NAMESPACE { +int hsearch_r(ENTRY item, ACTION action, ENTRY **retval, + struct hsearch_data *htab); +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index 52452cd1037db..c45b94f364397 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -46,6 +46,7 @@ add_subdirectory(stdlib) add_subdirectory(inttypes) add_subdirectory(stdio) add_subdirectory(wchar) +add_subdirectory(search) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(fcntl) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 2920535fbaa56..2b9fa93bb548e 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -126,6 +126,32 @@ add_libc_test( libc.src.__support.char_vector ) +add_libc_test( + hash_test + SUITE + libc-support-tests + SRCS + hash_test.cpp + DEPENDS + libc.src.__support.hash + libc.src.__support.CPP.new + libc.src.stdlib.rand + libc.src.stdlib.srand + libc.src.string.memset + UNIT_TEST_ONLY + # Aligned Allocation is not supported in hermetic builds. +) + +add_libc_test( + memory_size_test + SUITE + libc-support-tests + SRCS + memory_size_test.cpp + DEPENDS + libc.src.__support.memory_size +) + add_executable( libc_str_to_float_comparison_test str_to_float_comparison_test.cpp @@ -155,3 +181,4 @@ add_subdirectory(File) add_subdirectory(RPC) add_subdirectory(OSUtil) add_subdirectory(FPUtil) +add_subdirectory(HashTable) diff --git a/libc/test/src/__support/HashTable/CMakeLists.txt b/libc/test/src/__support/HashTable/CMakeLists.txt new file mode 100644 index 0000000000000..ee8dde107c3fe --- /dev/null +++ b/libc/test/src/__support/HashTable/CMakeLists.txt @@ -0,0 +1,33 @@ +add_libc_test( + bitmask_test + SUITE + libc-support-tests + SRCS + bitmask_test.cpp + DEPENDS + libc.src.__support.HashTable.bitmask +) + +add_libc_test( + table_test + SUITE + libc-support-tests + SRCS + table_test.cpp + DEPENDS + libc.src.__support.HashTable.randomness + libc.src.__support.HashTable.table + libc.src.__support.common + UNIT_TEST_ONLY +) + +add_libc_test( + group_test + SUITE + libc-support-tests + SRCS + group_test.cpp + DEPENDS + libc.src.__support.HashTable.bitmask + libc.src.stdlib.rand +) diff --git a/libc/test/src/__support/HashTable/bitmask_test.cpp b/libc/test/src/__support/HashTable/bitmask_test.cpp new file mode 100644 index 0000000000000..c816c5d106388 --- /dev/null +++ b/libc/test/src/__support/HashTable/bitmask_test.cpp @@ -0,0 +1,69 @@ +//===-- Unittests for bitmask ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/HashTable/bitmask.h" +#include "test/UnitTest/Test.h" +namespace LIBC_NAMESPACE { +namespace internal { + +using ShortBitMask = BitMaskAdaptor; +using LargeBitMask = BitMaskAdaptor; + +TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideLowestSetBit) { + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + if (ShortBitMask{data}.any_bit_set()) { + ASSERT_EQ(ShortBitMask{data}.lowest_set_bit_nonzero(), i); + data <<= 1; + } + } +} + +TEST(LlvmLibcHashTableBitMaskTest, MultiBitStrideLowestSetBit) { + uint64_t data = 0xffff'ffff'ffff'ffff; + for (size_t i = 0; i < 8; ++i) { + for (size_t j = 0; j < 8; ++j) { + if (LargeBitMask{data}.any_bit_set()) { + ASSERT_EQ(LargeBitMask{data}.lowest_set_bit_nonzero(), i); + data <<= 1; + } + } + } +} + +TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideIteration) { + using Iter = IteratableBitMaskAdaptor; + uint16_t data = 0xffff; + for (size_t i = 0; i < 16; ++i) { + Iter iter = {data}; + size_t j = i; + for (auto x : iter) { + ASSERT_EQ(x, j); + j++; + } + ASSERT_EQ(j, size_t{16}); + data <<= 1; + } +} + +TEST(LlvmLibcHashTableBitMaskTest, MultiBitStrideIteration) { + using Iter = IteratableBitMaskAdaptor; + uint64_t data = Iter::MASK; + for (size_t i = 0; i < 8; ++i) { + Iter iter = {data}; + size_t j = i; + for (auto x : iter) { + ASSERT_EQ(x, j); + j++; + } + ASSERT_EQ(j, size_t{8}); + data <<= Iter::STRIDE; + } +} +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/HashTable/group_test.cpp b/libc/test/src/__support/HashTable/group_test.cpp new file mode 100644 index 0000000000000..907908335863a --- /dev/null +++ b/libc/test/src/__support/HashTable/group_test.cpp @@ -0,0 +1,90 @@ +//===-- Unittests for control group ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/HashTable/bitmask.h" + +#include "src/stdlib/rand.h" +#include "test/UnitTest/Test.h" +#include + +namespace LIBC_NAMESPACE { +namespace internal { + +struct ByteArray { + alignas(Group) uint8_t data[sizeof(Group) + 1]{}; +}; + +TEST(LlvmLibcHashTableBitMaskTest, Match) { + // Any pair of targets have bit differences not only at the lowest bit. + // No False positive. + uint8_t targets[4] = {0x00, 0x11, 0xFF, 0x0F}; + size_t count[4] = {0, 0, 0, 0}; + size_t appearance[4][sizeof(Group)]; + ByteArray array{}; + + union { + uintptr_t random; + int data[sizeof(uintptr_t) / sizeof(int)]; + }; + + for (int &i : data) + i = rand(); + + for (size_t i = 0; i < sizeof(Group); ++i) { + size_t choice = random % 4; + random /= 4; + array.data[i] = targets[choice]; + appearance[choice][count[choice]++] = i; + } + + for (size_t t = 0; t < sizeof(targets); ++t) { + auto bitmask = Group::load(array.data).match_byte(targets[t]); + for (size_t i = 0; i < count[t]; ++i) { + size_t iterated = 0; + for (size_t position : bitmask) { + ASSERT_EQ(appearance[t][iterated], position); + iterated++; + } + ASSERT_EQ(count[t], iterated); + } + } +} + +TEST(LlvmLibcHashTableBitMaskTest, MaskAvailable) { + uint8_t values[3] = {0x00, 0x0F, 0x80}; + + for (size_t i = 0; i < sizeof(Group); ++i) { + ByteArray array{}; + + union { + uintptr_t random; + int data[sizeof(uintptr_t) / sizeof(int)]; + }; + + for (int &j : data) + j = rand(); + + ASSERT_FALSE(Group::load(array.data).mask_available().any_bit_set()); + + array.data[i] = 0x80; + for (size_t j = 0; j < sizeof(Group); ++j) { + if (i == j) + continue; + size_t sample_space = 2 + (j > i); + size_t choice = random % sample_space; + random /= sizeof(values); + array.data[j] = values[choice]; + } + + auto mask = Group::load(array.data).mask_available(); + ASSERT_TRUE(mask.any_bit_set()); + ASSERT_EQ(mask.lowest_set_bit_nonzero(), i); + } +} +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/HashTable/table_test.cpp b/libc/test/src/__support/HashTable/table_test.cpp new file mode 100644 index 0000000000000..f0aa82f2d5c76 --- /dev/null +++ b/libc/test/src/__support/HashTable/table_test.cpp @@ -0,0 +1,77 @@ +//===-- Unittests for table -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/HashTable/randomness.h" +#include "src/__support/HashTable/table.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE { +namespace internal { +TEST(LlvmLibcTableTest, AllocationAndDeallocation) { + size_t caps[] = {0, 1, 2, 3, 4, 7, 11, 37, 1024, 5261, 19999}; + const char *keys[] = {"", "a", "ab", "abc", + "abcd", "abcde", "abcdef", "abcdefg", + "abcdefgh", "abcdefghi", "abcdefghij"}; + for (size_t i : caps) { + HashTable *table = HashTable::allocate(i, 1); + ASSERT_NE(table, static_cast(nullptr)); + for (const char *key : keys) { + ASSERT_EQ(table->find(key), static_cast(nullptr)); + } + HashTable::deallocate(table); + } + ASSERT_EQ(HashTable::allocate(-1, 0), static_cast(nullptr)); + HashTable::deallocate(nullptr); +} + +TEST(LlvmLibcTableTest, Insertion) { + union key { + uint64_t value; + char bytes[8]; + } keys[256]; + for (size_t k = 0; k < 256; ++k) { + keys[k].value = LIBC_NAMESPACE::Endian::to_little_endian(k); + } + constexpr size_t CAP = next_power_of_two((sizeof(Group) + 1) * 8 / 7) / 8 * 7; + static_assert(CAP + 1 < 256, "CAP is too large for this test."); + HashTable *table = + HashTable::allocate(sizeof(Group) + 1, randomness::next_random_seed()); + ASSERT_NE(table, static_cast(nullptr)); + + // insert to full capacity. + for (size_t i = 0; i < CAP; ++i) { + ASSERT_NE(table->insert({keys[i].bytes, keys[i].bytes}), + static_cast(nullptr)); + } + + // one more insert should fail. + ASSERT_EQ(table->insert({keys[CAP + 1].bytes, keys[CAP + 1].bytes}), + static_cast(nullptr)); + + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(strcmp(table->find(keys[i].bytes)->key, keys[i].bytes), 0); + } + for (size_t i = CAP; i < 256; ++i) { + ASSERT_EQ(table->find(keys[i].bytes), static_cast(nullptr)); + } + + // do not replace old value + for (size_t i = 0; i < CAP; ++i) { + ASSERT_NE(table->insert({keys[i].bytes, reinterpret_cast(i)}), + static_cast(nullptr)); + } + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(table->find(keys[i].bytes)->data, + reinterpret_cast(keys[i].bytes)); + } + + HashTable::deallocate(table); +} + +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/bit_test.cpp b/libc/test/src/__support/bit_test.cpp index 09d9c2f0a4ed8..e585735394e2c 100644 --- a/libc/test/src/__support/bit_test.cpp +++ b/libc/test/src/__support/bit_test.cpp @@ -15,4 +15,53 @@ TEST(LlvmLibcBlockBitTest, TODO) { // TODO Implement me. } +TEST(LlvmLibcBlockBitTest, OffsetTo) { + ASSERT_EQ(offset_to(0, 512), 0); + ASSERT_EQ(offset_to(1, 512), 511); + ASSERT_EQ(offset_to(2, 512), 510); + ASSERT_EQ(offset_to(13, 1), 0); + ASSERT_EQ(offset_to(13, 4), 3); + for (unsigned int i = 0; i < 31; ++i) { + ASSERT_EQ((offset_to(i, 1u << i) + i) % (1u << i), 0u); + } +} + +TEST(LlvmLibcBlockBitTest, RotateLeft) { + { + unsigned current = 1; + for (unsigned i = 0; i < 8 * sizeof(unsigned); ++i) { + ASSERT_EQ(1u << i, current); + ASSERT_EQ(current, rotate_left(1u, i)); + current = rotate_left(current, 1u); + } + ASSERT_EQ(current, 1u); + } + { + int current = 1; + for (int i = 0; i < 8 * static_cast(sizeof(int)); ++i) { + ASSERT_EQ(1 << i, current); + ASSERT_EQ(current, rotate_left(1, i)); + current = rotate_left(current, 1); + } + ASSERT_EQ(current, 1); + } +} + +TEST(LlvmLibcBlockBitTest, NextPowerOfTwo) { + ASSERT_EQ(1u, next_power_of_two(0u)); + for (unsigned int i = 0; i < 31; ++i) { + ASSERT_EQ(1u << (i + 1), next_power_of_two((1u << i) + 1)); + ASSERT_EQ(1u << i, next_power_of_two(1u << i)); + } +} + +TEST(LlvmLibcBlockBitTest, IsPowerOfTwo) { + ASSERT_FALSE(is_power_of_two(0u)); + ASSERT_TRUE(is_power_of_two(1u)); + for (unsigned int i = 1; i < 31; ++i) { + ASSERT_TRUE(is_power_of_two(1u << i)); + ASSERT_FALSE(is_power_of_two((1u << i) + 1)); + } +} + } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/hash_test.cpp b/libc/test/src/__support/hash_test.cpp new file mode 100644 index 0000000000000..612efd544c66f --- /dev/null +++ b/libc/test/src/__support/hash_test.cpp @@ -0,0 +1,140 @@ +//===-- Unittests for hash ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/new.h" +#include "src/__support/hash.h" +#include "src/stdlib/rand.h" +#include "src/stdlib/srand.h" +#include "src/string/memset.h" +#include "test/UnitTest/Test.h" + +template struct AlignedMemory { + T *data; + size_t offset; + std::align_val_t alignment; + AlignedMemory(size_t size, size_t alignment, size_t offset) + : offset(offset), alignment{alignment} { + size_t sz = size * sizeof(T); + size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment; + LIBC_NAMESPACE::AllocChecker ac; + data = static_cast(operator new(aligned, this->alignment, ac)); + data += offset % alignment; + } + ~AlignedMemory() { operator delete(data - offset, alignment); } +}; + +size_t sizes[] = {0, 1, 23, 59, 1024, 5261}; +char values[] = {0, 1, 23, 59, 102, -1}; + +// Hash value should not change with different alignments. +TEST(LlvmLibcHashTest, SanityCheck) { + for (size_t sz : sizes) { + for (uint8_t val : values) { + uint64_t hash; + { + AlignedMemory mem(sz, 64, 0); + LIBC_NAMESPACE::memset(mem.data, val, sz); + LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; + state.update(mem.data, sz); + hash = state.finish(); + } + for (size_t offset = 1; offset < 64; ++offset) { + AlignedMemory mem(sz, 64, offset); + LIBC_NAMESPACE::memset(mem.data, val, sz); + LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef}; + state.update(mem.data, sz); + ASSERT_EQ(hash, state.finish()); + } + } + } +} + +static inline size_t popcnt(uint64_t x) { + size_t count = 0; + while (x) { + count += x & 1; + x >>= 1; + } + return count; +} + +// Mutate a single bit in a rather large input. The hash should change +// significantly. At least one fifth of the bits should not match. +TEST(LlvmLibcHashTest, Avalanche) { + for (size_t sz : sizes) { + for (uint8_t val : values) { + uint64_t hash; + AlignedMemory mem(sz, 64, 0); + LIBC_NAMESPACE::memset(mem.data, val, sz); + { + LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; + state.update(mem.data, sz); + hash = state.finish(); + } + for (size_t i = 0; i < sz; ++i) { + for (size_t j = 0; j < 8; ++j) { + uint8_t mask = 1 << j; + mem.data[i] ^= mask; + { + LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890}; + state.update(mem.data, sz); + uint64_t new_hash = state.finish(); + ASSERT_GE(popcnt(hash ^ new_hash), size_t{13}); + } + mem.data[i] ^= mask; + } + } + } + } +} + +// Hash a random sequence of input. The LSB should be uniform enough such that +// values spread across the entire range. +TEST(LlvmLibcHashTest, UniformLSB) { + LIBC_NAMESPACE::srand(0xffffffff); + for (size_t sz : sizes) { + AlignedMemory counters(sz, sizeof(size_t), 0); + LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t)); + for (size_t i = 0; i < 200 * sz; ++i) { + int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), + LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), + LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(), + LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()}; + { + LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b}; + state.update(randomness, sizeof(randomness)); + uint64_t hash = state.finish(); + counters.data[hash % sz]++; + } + } + for (size_t i = 0; i < sz; ++i) { + ASSERT_GE(counters.data[i], size_t{140}); + ASSERT_LE(counters.data[i], size_t{260}); + } + } +} + +// Hash a low entropy sequence. The MSB should be uniform enough such that +// there is no significant bias even if the value range is small. +// Top 7 bits is examined because it will be used as a secondary key in +// the hash table. +TEST(LlvmLibcHashTest, UniformMSB) { + size_t sz = 1 << 7; + AlignedMemory counters(sz, sizeof(size_t), 0); + LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t)); + for (size_t i = 0; i < 200 * sz; ++i) { + LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8}; + state.update(&i, sizeof(i)); + uint64_t hash = state.finish(); + counters.data[hash >> 57]++; + } + for (size_t i = 0; i < sz; ++i) { + ASSERT_GE(counters.data[i], size_t{140}); + ASSERT_LE(counters.data[i], size_t{260}); + } +} diff --git a/libc/test/src/__support/memory_size_test.cpp b/libc/test/src/__support/memory_size_test.cpp new file mode 100644 index 0000000000000..98b6a613e62fb --- /dev/null +++ b/libc/test/src/__support/memory_size_test.cpp @@ -0,0 +1,85 @@ +//===-- Unittests for MemorySize ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/memory_size.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE { +namespace internal { +static inline constexpr size_t SAFE_MEM_SIZE_TEST_LIMIT = + static_cast(cpp::numeric_limits>::max()); + +TEST(LlvmLibcMemSizeTest, Constuction) { + ASSERT_FALSE(SafeMemSize{static_cast(-1)}.valid()); + ASSERT_FALSE(SafeMemSize{static_cast(-2)}.valid()); + ASSERT_FALSE(SafeMemSize{static_cast(-1024 + 33)}.valid()); + ASSERT_FALSE(SafeMemSize{static_cast(-1024 + 66)}.valid()); + ASSERT_FALSE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT + 1}.valid()); + ASSERT_FALSE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT + 13}.valid()); + + ASSERT_TRUE(SafeMemSize{static_cast(1)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(1024 + 13)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(2048 - 13)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(4096 + 1)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(8192 - 1)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(16384 + 15)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(32768 * 3)}.valid()); + ASSERT_TRUE(SafeMemSize{static_cast(65536 * 13)}.valid()); + ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT}.valid()); + ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT - 1}.valid()); + ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT - 13}.valid()); +} + +TEST(LlvmLibcMemSizeTest, Addition) { + auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT}; + auto half = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 2}; + auto third = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 3}; + + ASSERT_TRUE(half.valid()); + ASSERT_TRUE(third.valid()); + ASSERT_TRUE((half + half).valid()); + ASSERT_TRUE((third + third + third).valid()); + ASSERT_TRUE((half + third).valid()); + + ASSERT_FALSE((max + SafeMemSize{static_cast(1)}).valid()); + ASSERT_FALSE((third + third + third + third).valid()); + ASSERT_FALSE((half + half + half).valid()); +} + +TEST(LlvmLibcMemSizeTest, Multiplication) { + auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT}; + auto half = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 2}; + auto third = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 3}; + + ASSERT_TRUE((max * SafeMemSize{static_cast(1)}).valid()); + ASSERT_TRUE((max * SafeMemSize{static_cast(0)}).valid()); + + ASSERT_FALSE((max * SafeMemSize{static_cast(2)}).valid()); + ASSERT_FALSE((half * half).valid()); + ASSERT_FALSE((half * SafeMemSize{static_cast(3)}).valid()); + ASSERT_FALSE((third * SafeMemSize{static_cast(4)}).valid()); +} + +TEST(LlvmLibcMemSizeTest, AlignUp) { + size_t sizes[] = { + 0, 1, 8, 13, 60, 97, 128, 1024, 5124, 5120, + }; + for (size_t i = 2; i <= 16; ++i) { + size_t alignment = 1 << i; + for (size_t size : sizes) { + auto safe_size = SafeMemSize{size}; + auto safe_aligned_size = safe_size.align_up(alignment); + ASSERT_TRUE(safe_aligned_size.valid()); + ASSERT_EQ(static_cast(safe_aligned_size) % alignment, size_t{0}); + } + } + auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT}; + ASSERT_FALSE(max.align_up(8).valid()); +} +} // namespace internal +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/search/CMakeLists.txt b/libc/test/src/search/CMakeLists.txt new file mode 100644 index 0000000000000..d624f14430949 --- /dev/null +++ b/libc/test/src/search/CMakeLists.txt @@ -0,0 +1,16 @@ +add_custom_target(libc_search_unittests) +add_libc_unittest( + hsearch_test + SUITE + libc_search_unittests + SRCS + hsearch_test.cpp + DEPENDS + libc.src.search.hsearch_r + libc.src.search.hcreate_r + libc.src.search.hdestroy_r + libc.src.search.hsearch + libc.src.search.hcreate + libc.src.search.hdestroy + libc.src.errno.errno +) diff --git a/libc/test/src/search/hsearch_test.cpp b/libc/test/src/search/hsearch_test.cpp new file mode 100644 index 0000000000000..bc9dea748758a --- /dev/null +++ b/libc/test/src/search/hsearch_test.cpp @@ -0,0 +1,124 @@ +//===-- Unittests for hsearch ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/HashTable/table.h" +#include "src/__support/bit.h" +#include "src/search/hcreate.h" +#include "src/search/hcreate_r.h" +#include "src/search/hdestroy.h" +#include "src/search/hdestroy_r.h" +#include "src/search/hsearch.h" +#include "test/UnitTest/ErrnoSetterMatcher.h" +#include "test/UnitTest/Test.h" +#include + +TEST(LlvmLibcHsearchTest, CreateTooLarge) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + struct hsearch_data hdata; + ASSERT_THAT(LIBC_NAMESPACE::hcreate(-1), Fails(ENOMEM, 0)); + ASSERT_THAT(LIBC_NAMESPACE::hcreate_r(-1, &hdata), Fails(ENOMEM, 0)); +} + +TEST(LlvmLibcHSearchTest, CreateInvalid) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + ASSERT_THAT(LIBC_NAMESPACE::hcreate_r(16, nullptr), Fails(EINVAL, 0)); +} + +TEST(LlvmLibcHSearchTest, CreateValid) { + struct hsearch_data hdata; + ASSERT_GT(LIBC_NAMESPACE::hcreate_r(1, &hdata), 0); + LIBC_NAMESPACE::hdestroy_r(&hdata); + + ASSERT_GT(LIBC_NAMESPACE::hcreate(1), 0); + LIBC_NAMESPACE::hdestroy(); +} + +char search_data[] = "1234567890abcdefghijklmnopqrstuvwxyz" + "1234567890abcdefghijklmnopqrstuvwxyz" + "1234567890abcdefghijklmnopqrstuvwxyz" + "1234567890abcdefghijklmnopqrstuvwxyz" + "1234567890abcdefghijklmnopqrstuvwxyz"; +char search_data2[] = + "@@@@@@@@@@@@@@!!!!!!!!!!!!!!!!!###########$$$$$$$$$$^^^^^^&&&&&&&&"; + +constexpr size_t GROUP_SIZE = sizeof(LIBC_NAMESPACE::internal::Group); +constexpr size_t CAP = + LIBC_NAMESPACE::next_power_of_two((GROUP_SIZE + 1) * 8 / 7) / 8 * 7; +static_assert(CAP < sizeof(search_data), "CAP too large"); + +TEST(LlvmLibcHSearchTest, InsertTooMany) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); + + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, ENTER)->key, + &search_data[i]); + } + ASSERT_THAT(static_cast( + LIBC_NAMESPACE::hsearch({search_data2, nullptr}, ENTER)), + Fails(ENOMEM, static_cast(nullptr))); + LIBC_NAMESPACE::hdestroy(); +} + +TEST(LlvmLibcHSearchTest, NotFound) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); + ASSERT_THAT(static_cast( + LIBC_NAMESPACE::hsearch({search_data2, nullptr}, FIND)), + Fails(ESRCH, static_cast(nullptr))); + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, ENTER)->key, + &search_data[i]); + } + ASSERT_THAT(static_cast( + LIBC_NAMESPACE::hsearch({search_data2, nullptr}, FIND)), + Fails(ESRCH, static_cast(nullptr))); + LIBC_NAMESPACE::hdestroy(); +} + +TEST(LlvmLibcHSearchTest, Found) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch( + {&search_data[i], reinterpret_cast(i)}, ENTER) + ->key, + &search_data[i]); + } + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data, + reinterpret_cast(i)); + } + LIBC_NAMESPACE::hdestroy(); +} + +TEST(LlvmLibcHSearchTest, OnlyInsertWhenNotFound) { + using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; + ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0); + for (size_t i = 0; i < CAP / 7 * 5; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch( + {&search_data[i], reinterpret_cast(i)}, ENTER) + ->key, + &search_data[i]); + } + for (size_t i = 0; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch( + {&search_data[i], reinterpret_cast(1000 + i)}, ENTER) + ->key, + &search_data[i]); + } + for (size_t i = 0; i < CAP / 7 * 5; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data, + reinterpret_cast(i)); + } + for (size_t i = CAP / 7 * 5; i < CAP; ++i) { + ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data, + reinterpret_cast(1000 + i)); + } + LIBC_NAMESPACE::hdestroy(); +}