Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 4 additions & 48 deletions include/libpy/hash.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <array>
#include <string_view>
#include <tuple>

namespace py {
Expand Down Expand Up @@ -35,59 +36,14 @@ auto hash_tuple(const std::tuple<Ts...>& t) {
return std::apply<decltype(hash_many<Ts...>)>(hash_many, t);
}

namespace detail {
inline std::size_t unaligned_load(const char* p) {
std::size_t result;
__builtin_memcpy(&result, p, sizeof(result));
return result;
}

// Loads n bytes, where 1 <= n < 8.
inline std::size_t load_bytes(const char* p, int n) {
std::size_t result = 0;
--n;
do {
result = (result << 8) + static_cast<unsigned char>(p[n]);
} while (--n >= 0);
return result;
}

inline std::size_t shift_mix(std::size_t v) {
return v ^ (v >> 47);
}

constexpr std::size_t hash_seed = static_cast<std::size_t>(0xc70f6907UL);
constexpr std::size_t hash_mul = (static_cast<std::size_t>(0xc6a4a793UL) << 32UL) +
static_cast<std::size_t>(0x5bd1e995UL);
} // namespace detail

/** Hash a buffer of characters using the same algorithm a libstdc++
`std::hash<std::string>`.
/** Hash a buffer of characters using the same algorithm as
`std::hash<std::string_view>`

@param buf The buffer to hash.
@param len The length of the buffer.
@return The hash of the string.
*/
inline std::size_t hash_buffer(const char* buf, std::size_t len) {
// Remove the bytes not divisible by the sizeof(size_t). This
// allows the main loop to process the data as 64-bit integers.
const int len_aligned = len & ~0x7;
const char* const end = buf + len_aligned;
std::size_t hash = detail::hash_seed ^ (len * detail::hash_mul);
for (const char* p = buf; p != end; p += 8) {
const std::size_t data = detail::shift_mix(detail::unaligned_load(p) *
detail::hash_mul) *
detail::hash_mul;
hash ^= data;
hash *= detail::hash_mul;
}
if ((len & 0x7) != 0) {
const std::size_t data = detail::load_bytes(end, len & 0x7);
hash ^= data;
hash *= detail::hash_mul;
}
hash = detail::shift_mix(hash) * detail::hash_mul;
hash = detail::shift_mix(hash);
return hash;
return std::hash<std::string_view>{}(std::string_view{buf, len});
}
} // namespace py
6 changes: 0 additions & 6 deletions tests/test_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,11 @@ std::string random_string(RandomEngine& g) {
}

TEST(hash, buffer) {
#if !defined(_LIBCPP_VERSION)
// XXX: This asserts that `py::hash_buffer` does the same thing as
// `std::hash<std::string>`; however, it is a copy of the libstdc++
// algorithm. libc++ uses a different hash algorithm which doesn't produce
// the same results.
std::mt19937 g(1868655980);

for (std::size_t n = 0; n < 1000; ++n) {
std::string s = random_string(g);
EXPECT_EQ(std::hash<std::string>{}(s), py::hash_buffer(s.data(), s.size()));
}
#endif
}
} // namespace test_hash