From f0be855e7c535a02219b77f98dc3d2f14f834f4c Mon Sep 17 00:00:00 2001 From: Joe Jevnik Date: Mon, 9 Mar 2020 21:42:42 -0400 Subject: [PATCH] ENH: don't vendor the hash implementation --- include/libpy/hash.h | 52 ++++---------------------------------------- tests/test_hash.cc | 6 ----- 2 files changed, 4 insertions(+), 54 deletions(-) diff --git a/include/libpy/hash.h b/include/libpy/hash.h index b98ab7f7..a24a9d7c 100644 --- a/include/libpy/hash.h +++ b/include/libpy/hash.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace py { @@ -35,59 +36,14 @@ auto hash_tuple(const std::tuple& t) { return std::apply)>(hash_many, t); } -namespace detail { -inline std::size_t unaligned_load(const char* p) { - std::size_t result; - __builtin_memcpy(&result, p, sizeof(result)); - return result; -} - -// Loads n bytes, where 1 <= n < 8. -inline std::size_t load_bytes(const char* p, int n) { - std::size_t result = 0; - --n; - do { - result = (result << 8) + static_cast(p[n]); - } while (--n >= 0); - return result; -} - -inline std::size_t shift_mix(std::size_t v) { - return v ^ (v >> 47); -} - -constexpr std::size_t hash_seed = static_cast(0xc70f6907UL); -constexpr std::size_t hash_mul = (static_cast(0xc6a4a793UL) << 32UL) + - static_cast(0x5bd1e995UL); -} // namespace detail - -/** Hash a buffer of characters using the same algorithm a libstdc++ - `std::hash`. +/** Hash a buffer of characters using the same algorithm as + `std::hash` @param buf The buffer to hash. @param len The length of the buffer. @return The hash of the string. */ inline std::size_t hash_buffer(const char* buf, std::size_t len) { - // Remove the bytes not divisible by the sizeof(size_t). This - // allows the main loop to process the data as 64-bit integers. - const int len_aligned = len & ~0x7; - const char* const end = buf + len_aligned; - std::size_t hash = detail::hash_seed ^ (len * detail::hash_mul); - for (const char* p = buf; p != end; p += 8) { - const std::size_t data = detail::shift_mix(detail::unaligned_load(p) * - detail::hash_mul) * - detail::hash_mul; - hash ^= data; - hash *= detail::hash_mul; - } - if ((len & 0x7) != 0) { - const std::size_t data = detail::load_bytes(end, len & 0x7); - hash ^= data; - hash *= detail::hash_mul; - } - hash = detail::shift_mix(hash) * detail::hash_mul; - hash = detail::shift_mix(hash); - return hash; + return std::hash{}(std::string_view{buf, len}); } } // namespace py diff --git a/tests/test_hash.cc b/tests/test_hash.cc index c8f4e96f..5c586a89 100644 --- a/tests/test_hash.cc +++ b/tests/test_hash.cc @@ -18,17 +18,11 @@ std::string random_string(RandomEngine& g) { } TEST(hash, buffer) { -#if !defined(_LIBCPP_VERSION) - // XXX: This asserts that `py::hash_buffer` does the same thing as - // `std::hash`; however, it is a copy of the libstdc++ - // algorithm. libc++ uses a different hash algorithm which doesn't produce - // the same results. std::mt19937 g(1868655980); for (std::size_t n = 0; n < 1000; ++n) { std::string s = random_string(g); EXPECT_EQ(std::hash{}(s), py::hash_buffer(s.data(), s.size())); } -#endif } } // namespace test_hash