Skip to content

Commit

Permalink
[libc++] Don't call key_eq in unordered_map/set rehashing routine
Browse files Browse the repository at this point in the history
As of now containers key_eq might get called when rehashing happens, which is redundant for unique keys containers.

Reviewed By: #libc, philnik, Mordante

Differential Revision: https://reviews.llvm.org/D128021
  • Loading branch information
itrofimow authored and mordante committed Jul 10, 2022
1 parent 393e12b commit 3085e42
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 81 deletions.
14 changes: 14 additions & 0 deletions libcxx/benchmarks/ContainerBenchmarks.h
Expand Up @@ -135,6 +135,20 @@ static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
}
}

template <class Container, class GenInputs>
static void BM_Rehash(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
c.max_load_factor(3.0);
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(c);
const auto bucket_count = c.bucket_count();
while (st.KeepRunning()) {
c.rehash(bucket_count + 1);
c.rehash(bucket_count);
benchmark::ClobberMemory();
}
}

} // end namespace ContainerBenchmarks

#endif // BENCHMARK_CONTAINER_BENCHMARKS_H
35 changes: 35 additions & 0 deletions libcxx/benchmarks/unordered_set_operations.bench.cpp
Expand Up @@ -104,6 +104,27 @@ struct UInt64Hash2 {
}
};

// The sole purpose of this comparator is to be used in BM_Rehash, where
// we need something slow enough to be easily noticable in benchmark results.
// The default implementation of operator== for strings seems to be a little
// too fast for that specific benchmark to reliably show a noticeable
// improvement, but unoptimized bytewise comparison fits just right.
// Early return is there just for convenience, since we only compare strings
// of equal length in BM_Rehash.
struct SlowStringEq {
SlowStringEq() = default;
inline TEST_ALWAYS_INLINE
bool operator()(const std::string& lhs, const std::string& rhs) const {
if (lhs.size() != rhs.size()) return false;

bool eq = true;
for (size_t i = 0; i < lhs.size(); ++i) {
eq &= lhs[i] == rhs[i];
}
return eq;
}
};

//----------------------------------------------------------------------------//
// BM_Hash
// ---------------------------------------------------------------------------//
Expand Down Expand Up @@ -266,6 +287,20 @@ BENCHMARK_CAPTURE(BM_FindRehash,
std::unordered_set<std::string>{},
getRandomStringInputs)->Arg(TestNumInputs);

//----------------------------------------------------------------------------//
// BM_Rehash
// ---------------------------------------------------------------------------//

BENCHMARK_CAPTURE(BM_Rehash,
unordered_set_string_arg,
std::unordered_set<std::string, std::hash<std::string>, SlowStringEq>{},
getRandomStringInputs)->Arg(TestNumInputs);

BENCHMARK_CAPTURE(BM_Rehash,
unordered_set_int_arg,
std::unordered_set<int>{},
getRandomIntegerInputs<int>)->Arg(TestNumInputs);

///////////////////////////////////////////////////////////////////////////////
BENCHMARK_CAPTURE(BM_InsertDuplicate,
unordered_set_int,
Expand Down
47 changes: 30 additions & 17 deletions libcxx/include/__hash_table
Expand Up @@ -1146,9 +1146,16 @@ public:
#endif

void clear() _NOEXCEPT;
void rehash(size_type __n);
_LIBCPP_INLINE_VISIBILITY void reserve(size_type __n)
{rehash(static_cast<size_type>(ceil(__n / max_load_factor())));}
_LIBCPP_INLINE_VISIBILITY void __rehash_unique(size_type __n) { __rehash<true>(__n); }
_LIBCPP_INLINE_VISIBILITY void __rehash_multi(size_type __n) { __rehash<false>(__n); }
_LIBCPP_INLINE_VISIBILITY void __reserve_unique(size_type __n)
{
__rehash_unique(static_cast<size_type>(ceil(__n / max_load_factor())));
}
_LIBCPP_INLINE_VISIBILITY void __reserve_multi(size_type __n)
{
__rehash_multi(static_cast<size_type>(ceil(__n / max_load_factor())));
}

_LIBCPP_INLINE_VISIBILITY
size_type bucket_count() const _NOEXCEPT
Expand Down Expand Up @@ -1285,7 +1292,8 @@ public:
#endif // _LIBCPP_ENABLE_DEBUG_MODE

private:
void __rehash(size_type __n);
template <bool _UniqueKeys> void __rehash(size_type __n);
template <bool _UniqueKeys> void __do_rehash(size_type __n);

template <class ..._Args>
__node_holder __construct_node(_Args&& ...__args);
Expand Down Expand Up @@ -1790,7 +1798,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(
}
if (size()+1 > __bc * max_load_factor() || __bc == 0)
{
rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
__rehash_unique(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
size_type(ceil(float(size() + 1) / max_load_factor()))));
}
return nullptr;
Expand Down Expand Up @@ -1862,7 +1870,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(
size_type __bc = bucket_count();
if (size()+1 > __bc * max_load_factor() || __bc == 0)
{
rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
__rehash_multi(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
size_type(ceil(float(size() + 1) / max_load_factor()))));
__bc = bucket_count();
}
Expand Down Expand Up @@ -1956,7 +1964,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(
size_type __bc = bucket_count();
if (size()+1 > __bc * max_load_factor() || __bc == 0)
{
rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
__rehash_multi(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
size_type(ceil(float(size() + 1) / max_load_factor()))));
__bc = bucket_count();
}
Expand Down Expand Up @@ -2004,7 +2012,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const&
__node_holder __h = __construct_node_hash(__hash, _VSTD::forward<_Args>(__args)...);
if (size()+1 > __bc * max_load_factor() || __bc == 0)
{
rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
__rehash_unique(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
size_type(ceil(float(size() + 1) / max_load_factor()))));
__bc = bucket_count();
__chash = __constrain_hash(__hash, __bc);
Expand Down Expand Up @@ -2207,8 +2215,9 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_merge_multi(
#endif // _LIBCPP_STD_VER > 14

template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void
__hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n)
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n)
_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
if (__n == 1)
Expand All @@ -2217,7 +2226,7 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
__n = __next_prime(__n);
size_type __bc = bucket_count();
if (__n > __bc)
__rehash(__n);
__do_rehash<_UniqueKeys>(__n);
else if (__n < __bc)
{
__n = _VSTD::max<size_type>
Expand All @@ -2227,13 +2236,14 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
__next_prime(size_t(ceil(float(size()) / max_load_factor())))
);
if (__n < __bc)
__rehash(__n);
__do_rehash<_UniqueKeys>(__n);
}
}

template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc)
__hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc)
{
std::__debug_db_invalidate_all(this);
__pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
Expand Down Expand Up @@ -2268,11 +2278,14 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc)
else
{
__next_pointer __np = __cp;
for (; __np->__next_ != nullptr &&
key_eq()(__cp->__upcast()->__value_,
__np->__next_->__upcast()->__value_);
__np = __np->__next_)
;
if _LIBCPP_CONSTEXPR_AFTER_CXX14 (!_UniqueKeys)
{
for (; __np->__next_ != nullptr &&
key_eq()(__cp->__upcast()->__value_,
__np->__next_->__upcast()->__value_);
__np = __np->__next_)
;
}
__pp->__next_ = __np->__next_;
__np->__next_ = __bucket_list_[__chash]->__next_;
__bucket_list_[__chash]->__next_ = __cp;
Expand Down
24 changes: 12 additions & 12 deletions libcxx/include/ext/hash_map
Expand Up @@ -605,7 +605,7 @@ public:
{return __table_.bucket_size(__n);}

_LIBCPP_INLINE_VISIBILITY
void resize(size_type __n) {__table_.rehash(__n);}
void resize(size_type __n) {__table_.__rehash_unique(__n);}

private:
__node_holder __construct_node(const key_type& __k);
Expand All @@ -616,7 +616,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
size_type __n, const hasher& __hf, const key_equal& __eql)
: __table_(__hf, __eql)
{
__table_.rehash(__n);
__table_.__rehash_unique(__n);
}

template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
Expand All @@ -625,7 +625,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
const allocator_type& __a)
: __table_(__hf, __eql, __a)
{
__table_.rehash(__n);
__table_.__rehash_unique(__n);
}

template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
Expand All @@ -643,7 +643,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
const hasher& __hf, const key_equal& __eql)
: __table_(__hf, __eql)
{
__table_.rehash(__n);
__table_.__rehash_unique(__n);
insert(__first, __last);
}

Expand All @@ -654,7 +654,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
const hasher& __hf, const key_equal& __eql, const allocator_type& __a)
: __table_(__hf, __eql, __a)
{
__table_.rehash(__n);
__table_.__rehash_unique(__n);
insert(__first, __last);
}

Expand All @@ -663,7 +663,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
const hash_map& __u)
: __table_(__u.__table_)
{
__table_.rehash(__u.bucket_count());
__table_.__rehash_unique(__u.bucket_count());
insert(__u.begin(), __u.end());
}

Expand Down Expand Up @@ -874,15 +874,15 @@ public:
{return __table_.bucket_size(__n);}

_LIBCPP_INLINE_VISIBILITY
void resize(size_type __n) {__table_.rehash(__n);}
void resize(size_type __n) {__table_.__rehash_multi(__n);}
};

template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
size_type __n, const hasher& __hf, const key_equal& __eql)
: __table_(__hf, __eql)
{
__table_.rehash(__n);
__table_.__rehash_multi(__n);
}

template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
Expand All @@ -891,7 +891,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
const allocator_type& __a)
: __table_(__hf, __eql, __a)
{
__table_.rehash(__n);
__table_.__rehash_multi(__n);
}

template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
Expand All @@ -909,7 +909,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
const hasher& __hf, const key_equal& __eql)
: __table_(__hf, __eql)
{
__table_.rehash(__n);
__table_.__rehash_multi(__n);
insert(__first, __last);
}

Expand All @@ -920,7 +920,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
const hasher& __hf, const key_equal& __eql, const allocator_type& __a)
: __table_(__hf, __eql, __a)
{
__table_.rehash(__n);
__table_.__rehash_multi(__n);
insert(__first, __last);
}

Expand All @@ -929,7 +929,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
const hash_multimap& __u)
: __table_(__u.__table_)
{
__table_.rehash(__u.bucket_count());
__table_.__rehash_multi(__u.bucket_count());
insert(__u.begin(), __u.end());
}

Expand Down

0 comments on commit 3085e42

Please sign in to comment.