From 118e9f1aa375393393805156501efe52931c39d7 Mon Sep 17 00:00:00 2001 From: Eddie Date: Sun, 17 Dec 2023 00:21:49 -0800 Subject: [PATCH 1/5] Needed since vscode has many glitches if building outside sources --- .gitignore | 5 +++++ inc/zoo/{traits => tr}/is_container.h | 0 2 files changed, 5 insertions(+) create mode 100644 .gitignore rename inc/zoo/{traits => tr}/is_container.h (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..64d2f48d --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Vscode does not like to build outside of the source tree +# (multiple glitches) + +.vscode +test/.vscode diff --git a/inc/zoo/traits/is_container.h b/inc/zoo/tr/is_container.h similarity index 100% rename from inc/zoo/traits/is_container.h rename to inc/zoo/tr/is_container.h From 19e959857fb9a1a7c953e9dd2d800b3f28a49847 Mon Sep 17 00:00:00 2001 From: Eddie Date: Sun, 17 Dec 2023 13:48:12 -0800 Subject: [PATCH 2/5] Fixes MSVC build bugs --- compiler_bugs/msvc/sfinae.cpp | 38 + inc/zoo/AlignedStorage.h | 12 +- inc/zoo/algorithm/cfs.h | 20 +- inc/zoo/algorithm/quicksort.h | 260 ++--- inc/zoo/map/RobinHood.h | 1272 ++++++++++++------------- inc/zoo/map/RobinHoodAlt.h | 388 ++++---- inc/zoo/meta/log.h | 31 +- inc/zoo/meta/popcount.h | 5 + inc/zoo/swar/SWAR.h | 39 +- inc/zoo/{tr => traits}/is_container.h | 0 inc/zoo/util/container_insertion.h | 86 +- inc/zoo/util/range_equivalence.h | 92 +- test/AlignedStorage.cpp | 5 +- test/CMakeLists.txt | 222 +++-- test/FunctionPolicy.cpp | 1 + test/inc/zoo/var.h | 2 +- test/inc/zoo/variant.h | 4 +- test/swar/BasicOperations.cpp | 12 +- 18 files changed, 1304 insertions(+), 1185 deletions(-) create mode 100644 compiler_bugs/msvc/sfinae.cpp rename inc/zoo/{tr => traits}/is_container.h (100%) diff --git a/compiler_bugs/msvc/sfinae.cpp b/compiler_bugs/msvc/sfinae.cpp new file mode 100644 index 00000000..5d981b19 --- /dev/null +++ b/compiler_bugs/msvc/sfinae.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +template +constexpr auto Constructible_v = std::is_constructible_v; + +template +struct ATemplate { + Q space_; + + template + constexpr static auto FitsInSpace_v = sizeof(T) <= sizeof(Q); + + template + std::enable_if_t< + FitsInSpace_v + && + #ifdef TRIGGER_MSVC_SFINAE_BUG + Constructible_v + #else + std::is_constructible_v + #endif + , + T * + > + sfinaeFunction(Args &&...args) { + T *rv = new(static_cast(&space_)) T(std::forward(args)...); + return rv; + } + +}; + +auto triggerError(ATemplate &m) { + return m.sfinaeFunction(nullptr); +} + +int main(int, const char *[]) { return 0; } diff --git a/inc/zoo/AlignedStorage.h b/inc/zoo/AlignedStorage.h index 4689aeca..7b3a2f31 100644 --- a/inc/zoo/AlignedStorage.h +++ b/inc/zoo/AlignedStorage.h @@ -39,7 +39,13 @@ struct Constructible: {}; template -constexpr auto Constructible_v = Constructible::value; +constexpr +#ifndef _MSC_VER + auto +#else + bool +#endif +Constructible_v = Constructible::value; template void destroy(T &t) noexcept { t.~T(); } @@ -122,13 +128,13 @@ struct AlignedStorage { template #define PP_ZOO_BUILD_EXPRESSION \ - impl::build(*as(), std::forward(args)...) - auto build(Args &&...args) noexcept(noexcept(PP_ZOO_BUILD_EXPRESSION)) -> + impl::build(*this->as(), std::forward(args)...) std::enable_if_t< SuitableType() && impl::Constructible_v, T * > + build(Args &&...args) noexcept(noexcept(PP_ZOO_BUILD_EXPRESSION)) { PP_ZOO_BUILD_EXPRESSION; #undef PP_ZOO_BUILD_EXPRESSION diff --git a/inc/zoo/algorithm/cfs.h b/inc/zoo/algorithm/cfs.h index 505250ea..9fd92583 100644 --- a/inc/zoo/algorithm/cfs.h +++ b/inc/zoo/algorithm/cfs.h @@ -1,29 +1,24 @@ #ifndef ZOO_CFS_CACHE_FRIENDLY_SEARCH #define ZOO_CFS_CACHE_FRIENDLY_SEARCH -#include +#include "zoo/algorithm/less.h" +#include "zoo/meta/log.h" #ifndef SIMPLIFY_INCLUDES // because of std::declval needed to default comparator #include // because of std::decay needed to decay deferenced iterator #include + +#include #endif namespace zoo { -constexpr unsigned long long log2Floor(unsigned long long arg) { - return 63 - __builtin_clzll(arg); -} - -constexpr unsigned long long log2Ceiling(unsigned long long arg) { - return 63 - __builtin_clzll(2*arg - 1); -} - template void transformToCFS(Output output, Input base, Input end) { auto s = end - base; - auto logP = log2Floor(s + 1); // n + auto logP = meta::logFloor(s + 1); // n auto power2 = 1ul << logP; auto fullSubtreeSize = power2 - 1; // Full tree has (2^n) - 1 elements @@ -52,7 +47,8 @@ void transformToCFS(Output output, Input base, Input end) { } // now just write the excess leaves - for(auto ndx = 0ul, top = 2*excess; ndx < top; ndx += 2) { + auto top = 2*excess; + for(auto ndx = 0ll; ndx < top; ndx += 2) { *output++ = *(base + ndx); } } @@ -167,7 +163,7 @@ struct ValidResult { template auto validHeap( - I base, int current, int max, Comparator c = Comparator{} + I base, long current, long max, Comparator c = Comparator{} ) -> ValidResult { for(;;) { auto higherSubtree = current*2 + 2; diff --git a/inc/zoo/algorithm/quicksort.h b/inc/zoo/algorithm/quicksort.h index a53669c9..234851b0 100644 --- a/inc/zoo/algorithm/quicksort.h +++ b/inc/zoo/algorithm/quicksort.h @@ -1,130 +1,130 @@ -#ifndef ZOO_QUICKSORT -#define ZOO_QUICKSORT - -#include // for moveRotate - -#include - -#include // for temporary storage -#include - -namespace zoo { - -template -struct ImplicitPivotResult { - FI pivot_; - long bias_; -}; - -/// \tparam FI is a forward iterator -/// \pre b != e -template -auto implicitPivotPartition(FI b, FI e, Comparison cmp) -> - ImplicitPivotResult -{ - auto bias = 0; - auto pivot = b++; - /*if(e == b) { return pivot; } - if(cmp(*b, *pivot)) { - auto third = next(b); - if(third == e) { - moveRotation(*pivot, *b); - return pivot; - } - }*/ - for(; b != e; ++b) { - // invariant: ..., L0, P == *pivot, G0, G1, ... Gn, *b - // where Lx means lower-than-pivot and Gx higher-equal-to-pivot - if(!cmp(*b, *pivot)) { - ++bias; - continue; - } - --bias; - // ..., L1, P == *pivot, G0, G1, ..., Gn, L0 == *b, X0, ... - // The pivot is greater than the element: - // insert *b into the lower partition: - // 1. *b goes into the pivot position - // 2. the pivot increases by one - // 3. the element at pivot + 1, the new pivot, must be greater - // than or equal to any Lx, the value of *pivot satisfies this - // property. - // These requirements can be satisfied by rotating the elements - // at positions (pivot, b, pivot + 1) - // ..., L1, L0, P == *pivot, G1, ..., Gn, G0 == *b, X0, ... - auto oldPivot = pivot++; - if(b == pivot) { - moveRotation(*oldPivot, *pivot); - } else { - moveRotation(*oldPivot, *b, *pivot); - } - // tmp = *b, *b = *1, *1 = *0, *0 = tmp - /*moveRotation(*oldPivot, *b); - moveRotation(*b, *pivot);*/ - } - return {pivot, bias}; -} - -template -void quicksort(I begin, I end, Comparison cmp = Comparison{}) { - if(begin == end) { return; } - - constexpr static const auto FrameCount = 64; - struct Frame { - I b, e; - }; - std::array stack; - auto index = 0; - - for(;;) { - auto result = implicitPivotPartition(begin, end, cmp); - auto pivot = result.pivot_; - auto bias = result.bias_; - auto higherBegin = next(pivot); - if(higherBegin == end) { // no higher-recursion needed - if(begin != pivot) { - end = pivot; // then just do lower recursion - continue; // without leaving a frame - } - // there is no lower-recursion either - if(!index) { return; } - auto &frame = stack[--index]; - begin = frame.b; - end = frame.e; - continue; - } - // higher recursion needed - if(begin == pivot) { // no lower recursion needed - begin = higherBegin; // becomes the higher recursion - continue; - } - // both lower and higher recursions needed, make frame for the larger - // partition: - // The smaller partition is less than or equal to half the elements: - // size(smaller) <= size/2 => depth of recursion <= log2(N) - if(0 < bias) { // lower partition is smaller - stack[index] = { higherBegin, end }; - end = pivot; - } else { // higher partition is smaller - stack[index] = { begin, pivot }; - begin = higherBegin; - } - if(FrameCount <= ++index) { - throw std::runtime_error("quicksort stack exhausted"); - } - } -} - -template -bool is_sorted(FI begin, FI end, Comparison cmp = Comparison{}) { - if(begin == end) { return true; } - auto old = begin++; - while(begin != end) { - if(not cmp(*old, *begin)) { return false; } - old = begin++; - } - return true; -} - -} - -#endif +#ifndef ZOO_QUICKSORT +#define ZOO_QUICKSORT + +#include // for moveRotate + +#include + +#include // for temporary storage +#include + +namespace zoo { + +template +struct ImplicitPivotResult { + FI pivot_; + long bias_; +}; + +/// \tparam FI is a forward iterator +/// \pre b != e +template +auto implicitPivotPartition(FI b, FI e, Comparison cmp) -> + ImplicitPivotResult +{ + auto bias = 0; + auto pivot = b++; + /*if(e == b) { return pivot; } + if(cmp(*b, *pivot)) { + auto third = next(b); + if(third == e) { + moveRotation(*pivot, *b); + return pivot; + } + }*/ + for(; b != e; ++b) { + // invariant: ..., L0, P == *pivot, G0, G1, ... Gn, *b + // where Lx means lower-than-pivot and Gx higher-equal-to-pivot + if(!cmp(*b, *pivot)) { + ++bias; + continue; + } + --bias; + // ..., L1, P == *pivot, G0, G1, ..., Gn, L0 == *b, X0, ... + // The pivot is greater than the element: + // insert *b into the lower partition: + // 1. *b goes into the pivot position + // 2. the pivot increases by one + // 3. the element at pivot + 1, the new pivot, must be greater + // than or equal to any Lx, the value of *pivot satisfies this + // property. + // These requirements can be satisfied by rotating the elements + // at positions (pivot, b, pivot + 1) + // ..., L1, L0, P == *pivot, G1, ..., Gn, G0 == *b, X0, ... + auto oldPivot = pivot++; + if(b == pivot) { + moveRotation(*oldPivot, *pivot); + } else { + moveRotation(*oldPivot, *b, *pivot); + } + // tmp = *b, *b = *1, *1 = *0, *0 = tmp + /*moveRotation(*oldPivot, *b); + moveRotation(*b, *pivot);*/ + } + return {pivot, bias}; +} + +template +void quicksort(I begin, I end, Comparison cmp = Comparison{}) { + if(begin == end) { return; } + + constexpr static const auto FrameCount = 64; + struct Frame { + I b, e; + }; + std::array stack; + auto index = 0; + + for(;;) { + auto result = implicitPivotPartition(begin, end, cmp); + auto pivot = result.pivot_; + auto bias = result.bias_; + auto higherBegin = next(pivot); + if(higherBegin == end) { // no higher-recursion needed + if(begin != pivot) { + end = pivot; // then just do lower recursion + continue; // without leaving a frame + } + // there is no lower-recursion either + if(!index) { return; } + auto &frame = stack[--index]; + begin = frame.b; + end = frame.e; + continue; + } + // higher recursion needed + if(begin == pivot) { // no lower recursion needed + begin = higherBegin; // becomes the higher recursion + continue; + } + // both lower and higher recursions needed, make frame for the larger + // partition: + // The smaller partition is less than or equal to half the elements: + // size(smaller) <= size/2 => depth of recursion <= log2(N) + if(0 < bias) { // lower partition is smaller + stack[index] = { higherBegin, end }; + end = pivot; + } else { // higher partition is smaller + stack[index] = { begin, pivot }; + begin = higherBegin; + } + if(FrameCount <= ++index) { + throw std::runtime_error("quicksort stack exhausted"); + } + } +} + +template +bool is_sorted(FI begin, FI end, Comparison cmp = Comparison{}) { + if(begin == end) { return true; } + auto old = begin++; + while(begin != end) { + if(not cmp(*old, *begin)) { return false; } + old = begin++; + } + return true; +} + +} + +#endif diff --git a/inc/zoo/map/RobinHood.h b/inc/zoo/map/RobinHood.h index a58235a1..f25c805e 100644 --- a/inc/zoo/map/RobinHood.h +++ b/inc/zoo/map/RobinHood.h @@ -1,636 +1,636 @@ -#ifndef ZOO_ROBINHOOD_H -#define ZOO_ROBINHOOD_H - -#include "zoo/map/RobinHoodUtil.h" -#include "zoo/AlignedStorage.h" - -#ifndef ZOO_CONFIG_DEEP_ASSERTIONS - #define ZOO_CONFIG_DEEP_ASSERTIONS 0 -#endif - -#include -#include -#include -#include - -#if ZOO_CONFIG_DEEP_ASSERTIONS - #include -#endif - -namespace zoo { -namespace rh { - -struct RobinHoodException: std::runtime_error { - using std::runtime_error::runtime_error; -}; - -struct MaximumProbeSequenceLengthExceeded: RobinHoodException { - using RobinHoodException::RobinHoodException; -}; -struct RelocationStackExhausted: RobinHoodException { - using RobinHoodException::RobinHoodException; -}; - -template -struct RH_Backend { - using Metadata = impl::Metadata; - - constexpr static inline auto Width = Metadata::NBits; - Metadata *md_; - - /*! \brief SWAR check for a potential match - The invariant in Robin Hood is that the element being looked for, the "needle", is "richer" - than the elements already present, the "haystack". - "Richer" means that the PSL is smaller. - A PSL of 0 can only happen in the haystack, to indicate the slot is empty, this is "richest". - The first time the needle has a PSL greater than the haystacks' means the matching will fail, - because the hypothetical prior insertion would have "stolen" that slot. - If there is an equal, it would start a sequence of potential matches. To determine an actual match: - 1. A cheap SWAR check of hoisted hashes - 2. If there are still potential matches (now also the hoisted hashes), fall back to non-SWAR, - or iterative and expensive "deep equality" test for each potential match, outside of this function - - The above makes it very important to detect the first case in which the PSL is greater equal to the needle. - We call this the "deadline". - Because we assume the LITTLE ENDIAN byte ordering, the first element would be the least significant - non-false Boolean SWAR. - - Note about performance: - Every "early exit" faces a big justification hurdle, the proportion of cases - they intercept to be large enough that the branch prediction penalty of the entropy introduced is - overcompensated. - */ - - /// Boolean SWAR true in the first element/lane of the needle strictly poorer than its corresponding - /// haystack - constexpr static auto - firstInvariantBreakage(Metadata needle, Metadata haystack) { - auto nPSL = needle.PSLs(); - auto hPSL = haystack.PSLs(); - auto theyKeepInvariant = - greaterEqual_MSB_off(hPSL, nPSL); - // BTW, the reason to have encoded the PSLs in the least - // significant bits is to be able to call the cheaper version - // _MSB_off here - - auto theyBreakInvariant = not theyKeepInvariant; - // because we make the assumption of LITTLE ENDIAN byte ordering, - // we're interested in the elements up to the first haystack-richer - auto firstBreakage = swar::isolateLSB(theyBreakInvariant.value()); - return firstBreakage; - } - - // This should be more generic: if PSLs breach a broadcast PSL, saturate - // This should be more generic: if a SWAR breaches a SWAR condition, saturate. - constexpr static auto - needlePSLSaturation(Metadata nPSL) { - // create a saturator for max PSL. If any needle saturates, all later PSLs will be set to saturated. - constexpr auto saturatedPSL = broadcast(Metadata(Metadata::MaxPSL)); - //auto nPSL = needle.PSLs(); - auto saturation = greaterEqual_MSB_off(nPSL, saturatedPSL); - auto invertSatMask = ((swar::isolateLSB(saturation.value()) - 1) ); - auto satMask = (~(swar::isolateLSB(saturation.value()) - 1) ); - //if (not bool(saturation)) return std::tuple{nPSL, false}; - // Least sig lane is saturated, all more sig must be made saturated. - auto needlePSLsToSaturate = Metadata{satMask & saturatedPSL.value()}; - // addition might have overflown nPSL before entering function - return std::tuple{Metadata{nPSL.PSLs() | needlePSLsToSaturate}, bool(saturation)}; // saturated at any point, last swar to check. - } - - constexpr static impl::MatchResult - potentialMatches( - Metadata needle, Metadata haystack - ) noexcept { - // We need to determine if there are potential matches to consider - auto sames = equals(needle, haystack); - auto deadline = firstInvariantBreakage(needle, haystack); - // In a valid haystack, the PSLs can grow at most by 1 per entry. - // If a PSL is richer than the needle in any place, because the - // needle, by construction, always grows at least by 1 per entry, - // then the PSL won't be equal again. - // There is no need to filter potential matches using the deadline - // as previous versions of the code did. - return { - deadline, - sames - }; - } - - /*! \brief converts the given starting PSL and reduced hash code into a SWAR-ready needle - - The given needle would have a PSL as the starting (PSL + 1) in the first slot, the "+ 1" is because - the count starts at 1, in this way, a haystack PSL of 0 is always "richer" - */ - constexpr static auto makeNeedle(U startingPSL, U hoistedHash) { - constexpr auto Ones = meta::BitmaskMaker::value; - constexpr auto Progression = Ones * Ones; - auto core = startingPSL | (hoistedHash << PSL_Bits); - auto broadcasted = broadcast(Metadata(core)); - auto startingPSLmadePotentialPSLs = Metadata(Progression) + broadcasted; - return startingPSLmadePotentialPSLs; - } - - template - inline constexpr - std::tuple - findMisaligned_assumesSkarupkeTail( - U hoistedHash, int homeIndex, const KeyComparer &kc - ) const noexcept __attribute__((always_inline)); -}; - -template -template -inline constexpr -std::tuple::Metadata> -RH_Backend::findMisaligned_assumesSkarupkeTail( - U hoistedHash, int homeIndex, const KeyComparer &kc - ) const noexcept { - auto misalignment = homeIndex % Metadata::NSlots; - auto baseIndex = homeIndex / Metadata::NSlots; - auto base = this->md_ + baseIndex; - - constexpr auto Ones = meta::BitmaskMaker::value; - constexpr auto Progression = Metadata{Ones * Ones}; - constexpr auto AllNSlots = - Metadata{meta::BitmaskMaker::value}; - MisalignedGenerator_Dynamic p(base, int(Metadata::NBits * misalignment)); - auto index = homeIndex; - auto needle = makeNeedle(0, hoistedHash); - - for(;;) { - auto hay = *p; - auto result = potentialMatches(needle, hay); - auto positives = result.potentialMatches; - while(positives.value()) { - auto matchSubIndex = positives.lsbIndex(); - auto matchIndex = index + matchSubIndex; - // Possible specialist optimization to kick off all possible - // matches to an array (like chaining evict) and check them - // later. - if(kc(matchIndex)) { - return std::tuple(matchIndex, U(0), Metadata(0)); - } - positives = Metadata{swar::clearLSB(positives.value())}; - } - auto deadline = result.deadline; - if(deadline) { - // The deadline is relative to the misalignment. - // To build an absolute deadline, there are two cases: - // the bit falls in the first SWAR or the second SWAR. - // The same applies for needle. - // in general, for example a misaglignment of 6: - // { . | . | . | . | . | . | . | .}{ . | . | . | . | . | . | . | . } - // { a | b | c | d | e | f | g | h } - // shift left (to higher bits) by the misalignment - // { 0 | 0 | 0 | 0 | 0 | 0 | a | b } - // shift right (to lower bits) by NSlots - misalignment: - // { c | d | e | f | g | h | 0 | 0 } - // One might hope undefined behavior might be reasonable (zero - // result, unchanged result), but ARM proves that undefined - // behavior is indeed undefined, so we do our right shift as a - // double: shift by n-1, then shift by 1. - auto mdd = Metadata{deadline}; - auto toAbsolute = [](auto v, auto ma) { - auto shiftedLeft = v.shiftLanesLeft(ma); - auto shiftedRight = - v.shiftLanesRight(Metadata::NSlots - ma - 1).shiftLanesRight(1); - return Metadata{shiftedLeft | shiftedRight}; - }; - auto position = index + Metadata{deadline}.lsbIndex(); - return - std::tuple( - position, - toAbsolute(mdd, misalignment).value(), - toAbsolute(needle, misalignment) - ); - } - // Skarupke's tail allows us to not have to worry about the end - // of the metadata - ++p; - index += Metadata::NSlots; - needle = needle + AllNSlots; - } - } - -template -struct KeyValuePairWrapper { - using type = std::pair; - AlignedStorageFor pair_; - - template - void build(Initializers &&...izers) - noexcept(noexcept(pair_.template build(std::forward(izers)...))) - { - pair_.template build(std::forward(izers)...); - } - - template - KeyValuePairWrapper &operator=(RHS &&rhs) - noexcept(noexcept(std::declval() = std::forward(rhs))) - { - *pair_.template as() = std::forward(rhs); - return *this; - } - - void destroy() noexcept { pair_.template destroy(); } - - auto valuePtr() const noexcept { - auto deconstified = const_cast(this); - return deconstified->pair_.template as(); - } - - auto &value() noexcept { return *valuePtr(); } - const auto &value() const noexcept { return const_cast(this)->value(); } -}; - -template< - typename K, - typename MV, - size_t RequestedSize_, - int PSL_Bits, int HashBits, - typename Hash = std::hash, - typename KE = std::equal_to, - typename U = std::uint64_t, - typename Scatter = FibonacciScatter, - typename RangeReduce = LemireReduce, - typename HashReduce = TopHashReducer -> -struct RH_Frontend_WithSkarupkeTail { - using Backend = RH_Backend; - using MD = typename Backend::Metadata; - - constexpr static inline auto RequestedSize = RequestedSize_; - constexpr static inline auto LongestEncodablePSL = (1 << PSL_Bits); - constexpr static inline auto WithTail = - RequestedSize + - LongestEncodablePSL // the Skarupke tail - ; - constexpr static inline auto SWARCount = - ( - WithTail + - MD::NSlots - 1 // to calculate the ceiling rounding - ) / MD::NSlots - ; - constexpr static inline auto SlotCount = SWARCount * MD::NSlots; - constexpr static inline auto HighestSafePSL = - LongestEncodablePSL - MD::NSlots - 1; - - using MetadataCollection = std::array; - using value_type = std::pair; - - MetadataCollection md_; - /// \todo Scatter key and value in a flavor - std::array, SlotCount> values_; - size_t elementCount_; - - RH_Frontend_WithSkarupkeTail() noexcept: elementCount_(0) { - for(auto &mde: md_) { mde = MD{0}; } - } - - template - void traverse(Callable &&c) const { - for(size_t swarIndex = 0; swarIndex < SWARCount; ++swarIndex) { - auto PSLs = md_[swarIndex].PSLs(); - auto occupied = booleans(PSLs); - while(occupied) { - auto intraIndex = occupied.lsbIndex(); - c(swarIndex, intraIndex); - occupied = occupied.clearLSB(); - } - } - } - - ~RH_Frontend_WithSkarupkeTail() { - traverse([thy=this](std::size_t sI, std::size_t intra) { - thy->values_[intra + sI * MD::NSlots].destroy(); - }); - } - - RH_Frontend_WithSkarupkeTail(const RH_Frontend_WithSkarupkeTail &model): - RH_Frontend_WithSkarupkeTail() - { - model.traverse([thy=this,other=&model](std::size_t sI, std::size_t intra) { - auto index = intra + sI * MD::NSlots; - thy->values_[index].build(other->values_[index].value()); - thy->md_[sI] = thy->md_[sI].blitElement(other->md_[sI], intra); - ++thy->elementCount_; - }); - } - - RH_Frontend_WithSkarupkeTail(RH_Frontend_WithSkarupkeTail &&donor) noexcept: - md_(donor.md_), elementCount_(donor.elementCount_) - { - traverse([thy=this, other=&donor](std::size_t sI, std::size_t intra) { - auto index = intra + sI * MD::NSlots; - thy->values_[index].build(std::move(other->values_[index].value())); - }); - } - - - auto findParameters(const K &k) const noexcept { - auto [hoisted, homeIndex] = - findBasicParameters< - K, RequestedSize, HashBits, U, - Hash, Scatter, RangeReduce, HashReduce - >(k); - return - std::tuple{ - hoisted, - homeIndex, - [thy = this, &k](size_t ndx) noexcept { - return KE{}(thy->values_[ndx].value().first, k); - } - }; - } - - template - auto insert(ValuteTypeCompatible &&val) { - auto &k = val.first; - auto &mv = val.second; - auto [hoistedT, homeIndexT, kc] = findParameters(k); - auto hoisted = hoistedT; - auto homeIndex = homeIndexT; - auto thy = const_cast(this); - Backend be{thy->md_.data()}; - auto [iT, deadlineT, needleT] = - be.findMisaligned_assumesSkarupkeTail(hoisted, homeIndex, kc); - auto index = iT; - if(HighestSafePSL < index - homeIndex) { - throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, from finding"); - } - auto deadline = deadlineT; - if(!deadline) { return std::pair{iterator(values_.data() + index), false}; } - auto needle = needleT; - auto rv = - insertionEvictionChain( - index, deadline, needle, - std::forward(val) - ); - ++elementCount_; - return rv; - } - - // Do the chain of relocations - // From this point onward, the hashes don't matter except for the - // updates to the metadata, the relocations - template - auto insertionEvictionChain( - std::size_t index, - U deadline, - MD needle, - VTC &&val - ) { - auto &k = val.first; - auto &mv = val.second; - auto swarIndex = index / MD::Lanes; - auto intraIndex = index % MD::Lanes; - auto mdp = this->md_.data() + swarIndex; - - // Because we have not decided about strong versus basic exception - // safety guarantee, for the time being we will just put a very large - // number here. - constexpr auto MaxRelocations = 150000; - std::array relocations; - std::array newElements; - auto relocationsCount = 0; - auto elementToInsert = needle.at(intraIndex); - - // The very last element in the metadata will always have a psl of 0 - // this serves as a sentinel for insertions, the only place to make - // sure the table has not been exhausted is an eviction chain that - // ends in the sentinel - // Also, the encoding for the PSL may be exhausted - for(;;) { - // Loop invariant: - // deadline, index, swarIndex, intraIndex, elementToInsert correct - // mdp points to the haystack that gave the deadline - auto md = *mdp; - auto evictedPSL = md.PSLs().at(intraIndex); - if(0 == evictedPSL) { // end of eviction chain! - if(SlotCount - 1 <= index) { - throw MaximumProbeSequenceLengthExceeded("full table"); - } - if(0 == relocationsCount) { // direct build of a new value - values_[index].build( - std::piecewise_construct, - std::tuple(std::forward(val).first), - std::tuple(std::forward(val).second) - ); - *mdp = mdp->blitElement(intraIndex, elementToInsert); - return std::pair{iterator(values_.data() + index), true}; - } - // the last element is special because it is a - // move-construction, not a move-assignment - --relocationsCount; - auto fromIndex = relocations[relocationsCount]; - values_[index].build( - std::move(values_[fromIndex].value()) - ); - md_[swarIndex] = - md_[swarIndex].blitElement( - intraIndex, elementToInsert - ); - elementToInsert = newElements[relocationsCount]; - index = fromIndex; - swarIndex = index / MD::NSlots; - intraIndex = index % MD::NSlots; - // do the pair relocations - while(relocationsCount--) { - fromIndex = relocations[relocationsCount]; - values_[index].value() = - std::move(values_[fromIndex].value()); - md_[swarIndex] = - md_[swarIndex].blitElement(intraIndex, elementToInsert); - elementToInsert = newElements[relocationsCount]; - index = fromIndex; - swarIndex = index / MD::NSlots; - intraIndex = index % MD::NSlots; - } - values_[index].value() = std::forward(val); - md_[swarIndex] = - md_[swarIndex].blitElement(intraIndex, elementToInsert); - return std::pair{iterator(values_.data() + index), true}; - } - if(HighestSafePSL < evictedPSL) { - throw MaximumProbeSequenceLengthExceeded("Encoding insertion"); - } - - // evict the "deadline" element: - // first, insert the element in its place (it "stole") - // find the place for the evicted: when Robin Hood breaks again. - - // for this search, we need to make a search needle with only - // the PSL being evicted. - - // "push" the index of the element that will be evicted - relocations[relocationsCount] = index; - // we have a place for the element being inserted, at this index - newElements[relocationsCount++] = elementToInsert; - if(MaxRelocations <= relocationsCount) { - throw RelocationStackExhausted("Relocation Stack"); - } - - // now the insertion will be for the old metadata entry - elementToInsert = md.hashes().at(intraIndex); - - // now, where should the evicted element go to? - // assemble a new needle - - // Constants relevant for the rest - constexpr auto Ones = meta::BitmaskMaker::value; - // | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | - constexpr auto ProgressionFromOne = MD(Ones * Ones); - // | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | - constexpr auto ProgressionFromZero = - MD(ProgressionFromOne - MD(Ones)); - // | 0 | 1 | 2 | 3 | ... | 7 | - constexpr auto BroadcastSWAR_ElementCount = - MD(meta::BitmaskMaker::value); - // | 8 | 8 | 8 | 8 | ... | 8 | - constexpr auto SWARIterationAddendumBase = - ProgressionFromZero + BroadcastSWAR_ElementCount; - // | 8 | 9 | ... | 15 | - - auto broadcastedEvictedPSL = broadcast(MD(evictedPSL)); - auto evictedPSLWithProgressionFromZero = - broadcastedEvictedPSL + ProgressionFromZero; - // | ePSL+0 | ePSL+1 | ePSL+2 | ePSL+3 | ... | ePSL+7 | - auto needlePSLs = - evictedPSLWithProgressionFromZero.shiftLanesLeft(intraIndex); - // zeroes make the new needle - // "richer" in all elements lower than the deadline - // because of the progression starts with 0 - // the "deadline" element will have equal PSL, not - // "poorer". - // assuming the deadline happened in the index 2: - // needlePSLs = | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | - // find the place for the new needle, without checking the keys. - auto haystackPSLs = md.PSLs(); - // haystack < needle => !(haystack >= needle) - auto breaksRobinHood = - not greaterEqual_MSB_off(haystackPSLs, needlePSLs); - if(!bool(breaksRobinHood)) { - // no place for the evicted element found in this swar. - // increment the PSLs in the needle to check the next haystack - - // for the next swar, we will want (continuing the assumption - // of the deadline happening at index 2) - // old needle: - // | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | - // desired new needle PSLs: - // | ePSL+6 | ePSL+7 | ePSL+8 | ePSL+9 | ... | ePSL+13 | - // from evictedPSLWithProgressionFromZero, - // shift "right" NLanes - intraIndex (keep the last two lanes): - // | ePSL+6 | ePSL+7 | 0 | ... | 0 | - auto lowerPart = - evictedPSLWithProgressionFromZero. - shiftLanesRight(MD::Lanes - intraIndex - 1). - shiftLanesRight(1); - // the other part, of +8 onwards, is BroadcastElementCount, - // shifted: - // | 8 | 8 | 8 | 8 | ... | 8 | - // shifted two lanes: - // | 0 | 0 | 8 | 8 | ... | 8 | - // - auto topAdd = - BroadcastSWAR_ElementCount.shiftLanesLeft(intraIndex); - needlePSLs = needlePSLs + lowerPart + topAdd; - for(;;) { // hunt for the next deadline - ++swarIndex; - // should the maintenance of `index` be replaced - // with pointer arithmetic on mdp? - index += MD::NSlots; - ++mdp; - haystackPSLs = mdp->PSLs(); - breaksRobinHood = - not greaterEqual_MSB_off(haystackPSLs, needlePSLs); - if(breaksRobinHood) { break; } - evictedPSL += MD::NSlots; - if(HighestSafePSL < evictedPSL) { - throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, insertion"); - } - needlePSLs = needlePSLs + BroadcastSWAR_ElementCount; - } - } - deadline = swar::isolateLSB(breaksRobinHood.value()); - intraIndex = breaksRobinHood.lsbIndex(); - index = swarIndex * MD::NSlots + intraIndex; - elementToInsert = elementToInsert | needlePSLs.at(intraIndex); - } - } - - struct const_iterator { - const KeyValuePairWrapper *p_; - - // note: ++ not yet implemented, we can't iterate ;-) - - const value_type &operator*() noexcept { return p_->value(); } - const value_type *operator->() noexcept { return &p_->value(); } - - bool operator==(const const_iterator &other) const noexcept { - return p_ == other.p_; - } - - bool operator!=(const const_iterator &other) const noexcept { - return p_ == other.p_; - } - - const_iterator(const KeyValuePairWrapper *p): p_(p) {} - const_iterator(const const_iterator &) = default; - }; - - struct iterator: const_iterator { - value_type *ncp() { return const_cast(&this->p_->value()); } - value_type &operator*() noexcept { return *ncp(); } - value_type *operator->() noexcept { return ncp(); } - using const_iterator::const_iterator; - }; - - const_iterator begin() const noexcept { return this->values_.data(); } - const_iterator end() const noexcept { - return this->values_.data() + this->values_.size(); - } - - inline iterator find(const K &k) noexcept __attribute__((always_inline)); - - const_iterator find(const K &k) const noexcept { - const_cast(this)->find(k); - } - - auto displacement(const_iterator from, const_iterator to) { - return to.p_ - from.p_; - } -}; - -template< - typename K, - typename MV, - size_t RequestedSize_, - int PSL_Bits, int HashBits, - typename Hash, - typename KE, - typename U, - typename Scatter, - typename RangeReduce, - typename HashReduce -> -auto -RH_Frontend_WithSkarupkeTail< - K, MV, RequestedSize_, PSL_Bits, HashBits, Hash, KE, U, Scatter, - RangeReduce, HashReduce ->::find(const K &k) noexcept -> iterator -{ - auto [hoisted, homeIndex, keyChecker] = findParameters(k); - Backend be{this->md_.data()}; - auto [index, deadline, dontcare] = - be.findMisaligned_assumesSkarupkeTail( - hoisted, homeIndex, keyChecker - ); - return deadline ? values_.end() : values_.data() + index; - } - -} // rh - -} // swar, zoo - -#endif +#ifndef ZOO_ROBINHOOD_H +#define ZOO_ROBINHOOD_H + +#include "zoo/map/RobinHoodUtil.h" +#include "zoo/AlignedStorage.h" + +#ifndef ZOO_CONFIG_DEEP_ASSERTIONS + #define ZOO_CONFIG_DEEP_ASSERTIONS 0 +#endif + +#include +#include +#include +#include + +#if ZOO_CONFIG_DEEP_ASSERTIONS + #include +#endif + +namespace zoo { +namespace rh { + +struct RobinHoodException: std::runtime_error { + using std::runtime_error::runtime_error; +}; + +struct MaximumProbeSequenceLengthExceeded: RobinHoodException { + using RobinHoodException::RobinHoodException; +}; +struct RelocationStackExhausted: RobinHoodException { + using RobinHoodException::RobinHoodException; +}; + +template +struct RH_Backend { + using Metadata = impl::Metadata; + + constexpr static inline auto Width = Metadata::NBits; + Metadata *md_; + + /*! \brief SWAR check for a potential match + The invariant in Robin Hood is that the element being looked for, the "needle", is "richer" + than the elements already present, the "haystack". + "Richer" means that the PSL is smaller. + A PSL of 0 can only happen in the haystack, to indicate the slot is empty, this is "richest". + The first time the needle has a PSL greater than the haystacks' means the matching will fail, + because the hypothetical prior insertion would have "stolen" that slot. + If there is an equal, it would start a sequence of potential matches. To determine an actual match: + 1. A cheap SWAR check of hoisted hashes + 2. If there are still potential matches (now also the hoisted hashes), fall back to non-SWAR, + or iterative and expensive "deep equality" test for each potential match, outside of this function + + The above makes it very important to detect the first case in which the PSL is greater equal to the needle. + We call this the "deadline". + Because we assume the LITTLE ENDIAN byte ordering, the first element would be the least significant + non-false Boolean SWAR. + + Note about performance: + Every "early exit" faces a big justification hurdle, the proportion of cases + they intercept to be large enough that the branch prediction penalty of the entropy introduced is + overcompensated. + */ + + /// Boolean SWAR true in the first element/lane of the needle strictly poorer than its corresponding + /// haystack + constexpr static auto + firstInvariantBreakage(Metadata needle, Metadata haystack) { + auto nPSL = needle.PSLs(); + auto hPSL = haystack.PSLs(); + auto theyKeepInvariant = + greaterEqual_MSB_off(hPSL, nPSL); + // BTW, the reason to have encoded the PSLs in the least + // significant bits is to be able to call the cheaper version + // _MSB_off here + + auto theyBreakInvariant = not theyKeepInvariant; + // because we make the assumption of LITTLE ENDIAN byte ordering, + // we're interested in the elements up to the first haystack-richer + auto firstBreakage = swar::isolateLSB(theyBreakInvariant.value()); + return firstBreakage; + } + + // This should be more generic: if PSLs breach a broadcast PSL, saturate + // This should be more generic: if a SWAR breaches a SWAR condition, saturate. + constexpr static auto + needlePSLSaturation(Metadata nPSL) { + // create a saturator for max PSL. If any needle saturates, all later PSLs will be set to saturated. + constexpr auto saturatedPSL = broadcast(Metadata(Metadata::MaxPSL)); + //auto nPSL = needle.PSLs(); + auto saturation = greaterEqual_MSB_off(nPSL, saturatedPSL); + auto invertSatMask = ((swar::isolateLSB(saturation.value()) - 1) ); + auto satMask = (~(swar::isolateLSB(saturation.value()) - 1) ); + //if (not bool(saturation)) return std::tuple{nPSL, false}; + // Least sig lane is saturated, all more sig must be made saturated. + auto needlePSLsToSaturate = Metadata{satMask & saturatedPSL.value()}; + // addition might have overflown nPSL before entering function + return std::tuple{Metadata{nPSL.PSLs() | needlePSLsToSaturate}, bool(saturation)}; // saturated at any point, last swar to check. + } + + constexpr static impl::MatchResult + potentialMatches( + Metadata needle, Metadata haystack + ) noexcept { + // We need to determine if there are potential matches to consider + auto sames = equals(needle, haystack); + auto deadline = firstInvariantBreakage(needle, haystack); + // In a valid haystack, the PSLs can grow at most by 1 per entry. + // If a PSL is richer than the needle in any place, because the + // needle, by construction, always grows at least by 1 per entry, + // then the PSL won't be equal again. + // There is no need to filter potential matches using the deadline + // as previous versions of the code did. + return { + deadline, + sames + }; + } + + /*! \brief converts the given starting PSL and reduced hash code into a SWAR-ready needle + + The given needle would have a PSL as the starting (PSL + 1) in the first slot, the "+ 1" is because + the count starts at 1, in this way, a haystack PSL of 0 is always "richer" + */ + constexpr static auto makeNeedle(U startingPSL, U hoistedHash) { + constexpr auto Ones = meta::BitmaskMaker::value; + constexpr auto Progression = Ones * Ones; + auto core = startingPSL | (hoistedHash << PSL_Bits); + auto broadcasted = broadcast(Metadata(core)); + auto startingPSLmadePotentialPSLs = Metadata(Progression) + broadcasted; + return startingPSLmadePotentialPSLs; + } + + template + inline constexpr + std::tuple + findMisaligned_assumesSkarupkeTail( + U hoistedHash, int homeIndex, const KeyComparer &kc + ) const noexcept __attribute__((always_inline)); +}; + +template +template +inline constexpr +std::tuple::Metadata> +RH_Backend::findMisaligned_assumesSkarupkeTail( + U hoistedHash, int homeIndex, const KeyComparer &kc + ) const noexcept { + auto misalignment = homeIndex % Metadata::NSlots; + auto baseIndex = homeIndex / Metadata::NSlots; + auto base = this->md_ + baseIndex; + + constexpr auto Ones = meta::BitmaskMaker::value; + constexpr auto Progression = Metadata{Ones * Ones}; + constexpr auto AllNSlots = + Metadata{meta::BitmaskMaker::value}; + MisalignedGenerator_Dynamic p(base, int(Metadata::NBits * misalignment)); + auto index = homeIndex; + auto needle = makeNeedle(0, hoistedHash); + + for(;;) { + auto hay = *p; + auto result = potentialMatches(needle, hay); + auto positives = result.potentialMatches; + while(positives.value()) { + auto matchSubIndex = positives.lsbIndex(); + auto matchIndex = index + matchSubIndex; + // Possible specialist optimization to kick off all possible + // matches to an array (like chaining evict) and check them + // later. + if(kc(matchIndex)) { + return std::tuple(matchIndex, U(0), Metadata(0)); + } + positives = Metadata{swar::clearLSB(positives.value())}; + } + auto deadline = result.deadline; + if(deadline) { + // The deadline is relative to the misalignment. + // To build an absolute deadline, there are two cases: + // the bit falls in the first SWAR or the second SWAR. + // The same applies for needle. + // in general, for example a misaglignment of 6: + // { . | . | . | . | . | . | . | .}{ . | . | . | . | . | . | . | . } + // { a | b | c | d | e | f | g | h } + // shift left (to higher bits) by the misalignment + // { 0 | 0 | 0 | 0 | 0 | 0 | a | b } + // shift right (to lower bits) by NSlots - misalignment: + // { c | d | e | f | g | h | 0 | 0 } + // One might hope undefined behavior might be reasonable (zero + // result, unchanged result), but ARM proves that undefined + // behavior is indeed undefined, so we do our right shift as a + // double: shift by n-1, then shift by 1. + auto mdd = Metadata{deadline}; + auto toAbsolute = [](auto v, auto ma) { + auto shiftedLeft = v.shiftLanesLeft(ma); + auto shiftedRight = + v.shiftLanesRight(Metadata::NSlots - ma - 1).shiftLanesRight(1); + return Metadata{shiftedLeft | shiftedRight}; + }; + auto position = index + Metadata{deadline}.lsbIndex(); + return + std::tuple( + position, + toAbsolute(mdd, misalignment).value(), + toAbsolute(needle, misalignment) + ); + } + // Skarupke's tail allows us to not have to worry about the end + // of the metadata + ++p; + index += Metadata::NSlots; + needle = needle + AllNSlots; + } + } + +template +struct KeyValuePairWrapper { + using type = std::pair; + AlignedStorageFor pair_; + + template + void build(Initializers &&...izers) + noexcept(noexcept(pair_.template build(std::forward(izers)...))) + { + pair_.template build(std::forward(izers)...); + } + + template + KeyValuePairWrapper &operator=(RHS &&rhs) + noexcept(noexcept(std::declval() = std::forward(rhs))) + { + *pair_.template as() = std::forward(rhs); + return *this; + } + + void destroy() noexcept { pair_.template destroy(); } + + auto valuePtr() const noexcept { + auto deconstified = const_cast(this); + return deconstified->pair_.template as(); + } + + auto &value() noexcept { return *valuePtr(); } + const auto &value() const noexcept { return const_cast(this)->value(); } +}; + +template< + typename K, + typename MV, + size_t RequestedSize_, + int PSL_Bits, int HashBits, + typename Hash = std::hash, + typename KE = std::equal_to, + typename U = std::uint64_t, + typename Scatter = FibonacciScatter, + typename RangeReduce = LemireReduce, + typename HashReduce = TopHashReducer +> +struct RH_Frontend_WithSkarupkeTail { + using Backend = RH_Backend; + using MD = typename Backend::Metadata; + + constexpr static inline auto RequestedSize = RequestedSize_; + constexpr static inline auto LongestEncodablePSL = (1 << PSL_Bits); + constexpr static inline auto WithTail = + RequestedSize + + LongestEncodablePSL // the Skarupke tail + ; + constexpr static inline auto SWARCount = + ( + WithTail + + MD::NSlots - 1 // to calculate the ceiling rounding + ) / MD::NSlots + ; + constexpr static inline auto SlotCount = SWARCount * MD::NSlots; + constexpr static inline auto HighestSafePSL = + LongestEncodablePSL - MD::NSlots - 1; + + using MetadataCollection = std::array; + using value_type = std::pair; + + MetadataCollection md_; + /// \todo Scatter key and value in a flavor + std::array, SlotCount> values_; + size_t elementCount_; + + RH_Frontend_WithSkarupkeTail() noexcept: elementCount_(0) { + for(auto &mde: md_) { mde = MD{0}; } + } + + template + void traverse(Callable &&c) const { + for(size_t swarIndex = 0; swarIndex < SWARCount; ++swarIndex) { + auto PSLs = md_[swarIndex].PSLs(); + auto occupied = booleans(PSLs); + while(occupied) { + auto intraIndex = occupied.lsbIndex(); + c(swarIndex, intraIndex); + occupied = occupied.clearLSB(); + } + } + } + + ~RH_Frontend_WithSkarupkeTail() { + traverse([thy=this](std::size_t sI, std::size_t intra) { + thy->values_[intra + sI * MD::NSlots].destroy(); + }); + } + + RH_Frontend_WithSkarupkeTail(const RH_Frontend_WithSkarupkeTail &model): + RH_Frontend_WithSkarupkeTail() + { + model.traverse([thy=this,other=&model](std::size_t sI, std::size_t intra) { + auto index = intra + sI * MD::NSlots; + thy->values_[index].build(other->values_[index].value()); + thy->md_[sI] = thy->md_[sI].blitElement(other->md_[sI], intra); + ++thy->elementCount_; + }); + } + + RH_Frontend_WithSkarupkeTail(RH_Frontend_WithSkarupkeTail &&donor) noexcept: + md_(donor.md_), elementCount_(donor.elementCount_) + { + traverse([thy=this, other=&donor](std::size_t sI, std::size_t intra) { + auto index = intra + sI * MD::NSlots; + thy->values_[index].build(std::move(other->values_[index].value())); + }); + } + + + auto findParameters(const K &k) const noexcept { + auto [hoisted, homeIndex] = + findBasicParameters< + K, RequestedSize, HashBits, U, + Hash, Scatter, RangeReduce, HashReduce + >(k); + return + std::tuple{ + hoisted, + homeIndex, + [thy = this, &k](size_t ndx) noexcept { + return KE{}(thy->values_[ndx].value().first, k); + } + }; + } + + template + auto insert(ValuteTypeCompatible &&val) { + auto &k = val.first; + auto &mv = val.second; + auto [hoistedT, homeIndexT, kc] = findParameters(k); + auto hoisted = hoistedT; + auto homeIndex = homeIndexT; + auto thy = const_cast(this); + Backend be{thy->md_.data()}; + auto [iT, deadlineT, needleT] = + be.findMisaligned_assumesSkarupkeTail(hoisted, homeIndex, kc); + auto index = iT; + if(HighestSafePSL < index - homeIndex) { + throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, from finding"); + } + auto deadline = deadlineT; + if(!deadline) { return std::pair{iterator(values_.data() + index), false}; } + auto needle = needleT; + auto rv = + insertionEvictionChain( + index, deadline, needle, + std::forward(val) + ); + ++elementCount_; + return rv; + } + + // Do the chain of relocations + // From this point onward, the hashes don't matter except for the + // updates to the metadata, the relocations + template + auto insertionEvictionChain( + std::size_t index, + U deadline, + MD needle, + VTC &&val + ) { + auto &k = val.first; + auto &mv = val.second; + auto swarIndex = index / MD::Lanes; + auto intraIndex = index % MD::Lanes; + auto mdp = this->md_.data() + swarIndex; + + // Because we have not decided about strong versus basic exception + // safety guarantee, for the time being we will just put a very large + // number here. + constexpr auto MaxRelocations = 150000; + std::array relocations; + std::array newElements; + auto relocationsCount = 0; + auto elementToInsert = needle.at(intraIndex); + + // The very last element in the metadata will always have a psl of 0 + // this serves as a sentinel for insertions, the only place to make + // sure the table has not been exhausted is an eviction chain that + // ends in the sentinel + // Also, the encoding for the PSL may be exhausted + for(;;) { + // Loop invariant: + // deadline, index, swarIndex, intraIndex, elementToInsert correct + // mdp points to the haystack that gave the deadline + auto md = *mdp; + auto evictedPSL = md.PSLs().at(intraIndex); + if(0 == evictedPSL) { // end of eviction chain! + if(SlotCount - 1 <= index) { + throw MaximumProbeSequenceLengthExceeded("full table"); + } + if(0 == relocationsCount) { // direct build of a new value + values_[index].build( + std::piecewise_construct, + std::tuple(std::forward(val).first), + std::tuple(std::forward(val).second) + ); + *mdp = mdp->blitElement(intraIndex, elementToInsert); + return std::pair{iterator(values_.data() + index), true}; + } + // the last element is special because it is a + // move-construction, not a move-assignment + --relocationsCount; + auto fromIndex = relocations[relocationsCount]; + values_[index].build( + std::move(values_[fromIndex].value()) + ); + md_[swarIndex] = + md_[swarIndex].blitElement( + intraIndex, elementToInsert + ); + elementToInsert = newElements[relocationsCount]; + index = fromIndex; + swarIndex = index / MD::NSlots; + intraIndex = index % MD::NSlots; + // do the pair relocations + while(relocationsCount--) { + fromIndex = relocations[relocationsCount]; + values_[index].value() = + std::move(values_[fromIndex].value()); + md_[swarIndex] = + md_[swarIndex].blitElement(intraIndex, elementToInsert); + elementToInsert = newElements[relocationsCount]; + index = fromIndex; + swarIndex = index / MD::NSlots; + intraIndex = index % MD::NSlots; + } + values_[index].value() = std::forward(val); + md_[swarIndex] = + md_[swarIndex].blitElement(intraIndex, elementToInsert); + return std::pair{iterator(values_.data() + index), true}; + } + if(HighestSafePSL < evictedPSL) { + throw MaximumProbeSequenceLengthExceeded("Encoding insertion"); + } + + // evict the "deadline" element: + // first, insert the element in its place (it "stole") + // find the place for the evicted: when Robin Hood breaks again. + + // for this search, we need to make a search needle with only + // the PSL being evicted. + + // "push" the index of the element that will be evicted + relocations[relocationsCount] = index; + // we have a place for the element being inserted, at this index + newElements[relocationsCount++] = elementToInsert; + if(MaxRelocations <= relocationsCount) { + throw RelocationStackExhausted("Relocation Stack"); + } + + // now the insertion will be for the old metadata entry + elementToInsert = md.hashes().at(intraIndex); + + // now, where should the evicted element go to? + // assemble a new needle + + // Constants relevant for the rest + constexpr auto Ones = meta::BitmaskMaker::value; + // | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | + constexpr auto ProgressionFromOne = MD(Ones * Ones); + // | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + constexpr auto ProgressionFromZero = + MD(ProgressionFromOne - MD(Ones)); + // | 0 | 1 | 2 | 3 | ... | 7 | + constexpr auto BroadcastSWAR_ElementCount = + MD(meta::BitmaskMaker::value); + // | 8 | 8 | 8 | 8 | ... | 8 | + constexpr auto SWARIterationAddendumBase = + ProgressionFromZero + BroadcastSWAR_ElementCount; + // | 8 | 9 | ... | 15 | + + auto broadcastedEvictedPSL = broadcast(MD(evictedPSL)); + auto evictedPSLWithProgressionFromZero = + broadcastedEvictedPSL + ProgressionFromZero; + // | ePSL+0 | ePSL+1 | ePSL+2 | ePSL+3 | ... | ePSL+7 | + auto needlePSLs = + evictedPSLWithProgressionFromZero.shiftLanesLeft(intraIndex); + // zeroes make the new needle + // "richer" in all elements lower than the deadline + // because of the progression starts with 0 + // the "deadline" element will have equal PSL, not + // "poorer". + // assuming the deadline happened in the index 2: + // needlePSLs = | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | + // find the place for the new needle, without checking the keys. + auto haystackPSLs = md.PSLs(); + // haystack < needle => !(haystack >= needle) + auto breaksRobinHood = + not greaterEqual_MSB_off(haystackPSLs, needlePSLs); + if(!bool(breaksRobinHood)) { + // no place for the evicted element found in this swar. + // increment the PSLs in the needle to check the next haystack + + // for the next swar, we will want (continuing the assumption + // of the deadline happening at index 2) + // old needle: + // | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | + // desired new needle PSLs: + // | ePSL+6 | ePSL+7 | ePSL+8 | ePSL+9 | ... | ePSL+13 | + // from evictedPSLWithProgressionFromZero, + // shift "right" NLanes - intraIndex (keep the last two lanes): + // | ePSL+6 | ePSL+7 | 0 | ... | 0 | + auto lowerPart = + evictedPSLWithProgressionFromZero. + shiftLanesRight(MD::Lanes - intraIndex - 1). + shiftLanesRight(1); + // the other part, of +8 onwards, is BroadcastElementCount, + // shifted: + // | 8 | 8 | 8 | 8 | ... | 8 | + // shifted two lanes: + // | 0 | 0 | 8 | 8 | ... | 8 | + // + auto topAdd = + BroadcastSWAR_ElementCount.shiftLanesLeft(intraIndex); + needlePSLs = needlePSLs + lowerPart + topAdd; + for(;;) { // hunt for the next deadline + ++swarIndex; + // should the maintenance of `index` be replaced + // with pointer arithmetic on mdp? + index += MD::NSlots; + ++mdp; + haystackPSLs = mdp->PSLs(); + breaksRobinHood = + not greaterEqual_MSB_off(haystackPSLs, needlePSLs); + if(breaksRobinHood) { break; } + evictedPSL += MD::NSlots; + if(HighestSafePSL < evictedPSL) { + throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, insertion"); + } + needlePSLs = needlePSLs + BroadcastSWAR_ElementCount; + } + } + deadline = swar::isolateLSB(breaksRobinHood.value()); + intraIndex = breaksRobinHood.lsbIndex(); + index = swarIndex * MD::NSlots + intraIndex; + elementToInsert = elementToInsert | needlePSLs.at(intraIndex); + } + } + + struct const_iterator { + const KeyValuePairWrapper *p_; + + // note: ++ not yet implemented, we can't iterate ;-) + + const value_type &operator*() noexcept { return p_->value(); } + const value_type *operator->() noexcept { return &p_->value(); } + + bool operator==(const const_iterator &other) const noexcept { + return p_ == other.p_; + } + + bool operator!=(const const_iterator &other) const noexcept { + return p_ == other.p_; + } + + const_iterator(const KeyValuePairWrapper *p): p_(p) {} + const_iterator(const const_iterator &) = default; + }; + + struct iterator: const_iterator { + value_type *ncp() { return const_cast(&this->p_->value()); } + value_type &operator*() noexcept { return *ncp(); } + value_type *operator->() noexcept { return ncp(); } + using const_iterator::const_iterator; + }; + + const_iterator begin() const noexcept { return this->values_.data(); } + const_iterator end() const noexcept { + return this->values_.data() + this->values_.size(); + } + + inline iterator find(const K &k) noexcept __attribute__((always_inline)); + + const_iterator find(const K &k) const noexcept { + const_cast(this)->find(k); + } + + auto displacement(const_iterator from, const_iterator to) { + return to.p_ - from.p_; + } +}; + +template< + typename K, + typename MV, + size_t RequestedSize_, + int PSL_Bits, int HashBits, + typename Hash, + typename KE, + typename U, + typename Scatter, + typename RangeReduce, + typename HashReduce +> +auto +RH_Frontend_WithSkarupkeTail< + K, MV, RequestedSize_, PSL_Bits, HashBits, Hash, KE, U, Scatter, + RangeReduce, HashReduce +>::find(const K &k) noexcept -> iterator +{ + auto [hoisted, homeIndex, keyChecker] = findParameters(k); + Backend be{this->md_.data()}; + auto [index, deadline, dontcare] = + be.findMisaligned_assumesSkarupkeTail( + hoisted, homeIndex, keyChecker + ); + return deadline ? values_.end() : values_.data() + index; + } + +} // rh + +} // swar, zoo + +#endif diff --git a/inc/zoo/map/RobinHoodAlt.h b/inc/zoo/map/RobinHoodAlt.h index 5f8fa484..df869e4b 100644 --- a/inc/zoo/map/RobinHoodAlt.h +++ b/inc/zoo/map/RobinHoodAlt.h @@ -1,194 +1,194 @@ - -#pragma once - -#include "zoo/swar/SWAR.h" - -#include -#include - -namespace zoo { - -namespace rh { - -using u64 = uint64_t; -using u32 = uint32_t; -using u16 = uint16_t; -using u8 = uint8_t; - -template struct SlotOperations { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - static constexpr inline auto SlotOnes = - meta::BitmaskMaker::value; - // for 64 bit size, 8 bit meta, 0x0807'0605'0403'0201ull - static constexpr inline auto PSLProgression = SlotOnes * SlotOnes; - static constexpr T allOnes = - meta::BitmaskMaker::value; - - static constexpr auto needlePSL(T currentPSL) { - return broadcast(SM{currentPSL}) + SM{PSLProgression}; - } - - // At the position the needle psl exceeds the haystack psl, a match becomes - // impossible. Only elements _before_ the exceeding element can match. - // pslNeedle must be PSLProgression + startingPSLValue - static constexpr auto deadline(SM pslHaystack, SM pslNeedle) { - // We must ensure the MSBs of the psl blocks are off. Since we store - // PSLs in a swar with sublanes in the least bits, this guarantee - // holds. - auto satisfied = greaterEqual_MSB_off(pslHaystack, pslNeedle); - auto broken = not satisfied; - return swar::isolateLSB(broken.value()); - } - - // Has no intrinsic binding to the metadata, just easier to write with - // using decls. - static constexpr auto attemptMatch( - SM haystack, SM needleHashes, SM needlePSL) { - const auto haystackPSL = SM{SSL::LeastMask.value() & haystack.value()}; - const auto d = deadline(haystackPSL, needlePSL); // breaks abstraction - const auto needle = needleHashes | needlePSL; - - const auto matches = equals(haystack, needle); - const auto searchEnds = d ? 1 : 0; - // Returned value is MSB boolean array with 'finality' bit on at - // position 0. Breaks if PSLs are width 1 (but so does everything else) - return SM{searchEnds | ((d - 1) & matches.value())}; - } -}; - -/// Slot metadata provides the affordances of -/// 'attempt to match this needle of hashes and PSLs' -/// Contains sizeof(T)/(NBitsHash,NBitsPSL) hashes and PSLs -template struct SlotMetadata { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - // PSL are stored in least sig bits, Hashes are stored in most sig bits. - // This allows us to do fast greaterequal checks for PSLs (with the hashes - // blitted out) and fast equality checks for hash bits (as equality checks - // do not need carry bits. - SSL data_; - - constexpr auto PSLs() const noexcept{ - return data_.least(); - } - - constexpr auto Hashes() const noexcept { - return data_.most(); - } - - constexpr auto attemptMatch(SM needleHashes, SM needlePSL) { - return SlotOperations::attemptMatch( - data_, needleHashes, needlePSL); - } -}; - -/// BlockProvider provides affordances of -/// 'give me the slot metadata that contains position p' -/// 'set slot metadata at index that contains position p' -/// 'set key at position p' -/// 'check this concrete key against position p' -template struct SlotSplitKeys { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - - std::array keys_; - std::array metadata_; - - Key keyAt(int pos) const { - return keys_[pos]; - } - void setKey(int pos, Key k) { - keys_[pos] = k; - } - constexpr int posToSlot(int pos) const { return pos/SM::Lanes; } - void setSlotAt(int idx, T v) { - metadata_[idx] = v; - } - - // Track to avoid second divide? - SSL slotAt(int pos) const { return metadata_[posToSlot(pos)]; } - bool keyCheck(int major, int minor, Key k) const { return k == keyAt(major * SM::Lanes + minor); } - bool keyCheck(int pos, Key k) const { return k == keyAt(pos); } -}; - -/// RobinHood tables provide affordances of -/// 'lookup this key' -/// 'insert this key' -/// 'delete this key' -/// Split locale of key value when both are present. -/// Position is major + minor - -template -struct Hasher { - Key operator()(Key k) { return k; } -}; - -template> -struct RH { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - Meta m_; - Hash h_; - - bool exists(Key k) { - return findSlot(k).second; - } - - struct MajorMinorInserted { - int major; - int minor; - bool inserted; - }; - - /// pos, hash - std::pair twoNumbers(Key k) { - auto hash = h_(k); - const u32 pos = mapToSlotLemireReduction(fibonacciIndexModulo(hash)); - const T thinhash = badMixer (hash); - return {pos, thinhash}; - } - - std::pair twoNumbersBad(Key k) { - return {1, 1}; - } - - /// Find slot can mean 'psl too short/no slot', 'found and in slot', 'not found but (richer|empty) slot' - /// Currently bug: 'psl too short/no slot' not handled correctly. - /// Returns major/minor to attempt to avoid divisions. - template - MajorMinorInserted findSlot(Key k, KeyCheck kc) { - const auto twoNum = twoNumbersBad(k); - const auto pos = twoNum.first; - const auto hash = twoNum.second; - const auto major = m_.posToSlot(pos); - const auto haystack = m_.slotAt(pos); - // PSL is off by one territory - constexpr auto exactlyOne = SM{1}; - auto minor = 0; - while(true) { - const auto matchResult = SlotOperations::matchAttempt(haystack, hash, 0); - auto finished = exactlyOne & matchResult; - auto matches = exactlyOne & ~matchResult; - while (matches) { - auto minor = matches.lsbIndex(); // Lane offset - if (m_.keyCheck(major + minor, k)) { - return {major, minor, true}; // 'found and in slot' - } - matches = SM{matches.clearLSB()}; - } - // minor points at slot that the key currently fits in. - if (finished) { - return {major, minor, false}; // 'not found, richer or empty slot' - } - } - } -}; - - -} // namespace rh -} // namespace zoo + +#pragma once + +#include "zoo/swar/SWAR.h" + +#include +#include + +namespace zoo { + +namespace rh { + +using u64 = uint64_t; +using u32 = uint32_t; +using u16 = uint16_t; +using u8 = uint8_t; + +template struct SlotOperations { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + static constexpr inline auto SlotOnes = + meta::BitmaskMaker::value; + // for 64 bit size, 8 bit meta, 0x0807'0605'0403'0201ull + static constexpr inline auto PSLProgression = SlotOnes * SlotOnes; + static constexpr T allOnes = + meta::BitmaskMaker::value; + + static constexpr auto needlePSL(T currentPSL) { + return broadcast(SM{currentPSL}) + SM{PSLProgression}; + } + + // At the position the needle psl exceeds the haystack psl, a match becomes + // impossible. Only elements _before_ the exceeding element can match. + // pslNeedle must be PSLProgression + startingPSLValue + static constexpr auto deadline(SM pslHaystack, SM pslNeedle) { + // We must ensure the MSBs of the psl blocks are off. Since we store + // PSLs in a swar with sublanes in the least bits, this guarantee + // holds. + auto satisfied = greaterEqual_MSB_off(pslHaystack, pslNeedle); + auto broken = not satisfied; + return swar::isolateLSB(broken.value()); + } + + // Has no intrinsic binding to the metadata, just easier to write with + // using decls. + static constexpr auto attemptMatch( + SM haystack, SM needleHashes, SM needlePSL) { + const auto haystackPSL = SM{SSL::LeastMask.value() & haystack.value()}; + const auto d = deadline(haystackPSL, needlePSL); // breaks abstraction + const auto needle = needleHashes | needlePSL; + + const auto matches = equals(haystack, needle); + const auto searchEnds = d ? 1 : 0; + // Returned value is MSB boolean array with 'finality' bit on at + // position 0. Breaks if PSLs are width 1 (but so does everything else) + return SM{searchEnds | ((d - 1) & matches.value())}; + } +}; + +/// Slot metadata provides the affordances of +/// 'attempt to match this needle of hashes and PSLs' +/// Contains sizeof(T)/(NBitsHash,NBitsPSL) hashes and PSLs +template struct SlotMetadata { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + // PSL are stored in least sig bits, Hashes are stored in most sig bits. + // This allows us to do fast greaterequal checks for PSLs (with the hashes + // blitted out) and fast equality checks for hash bits (as equality checks + // do not need carry bits. + SSL data_; + + constexpr auto PSLs() const noexcept{ + return data_.least(); + } + + constexpr auto Hashes() const noexcept { + return data_.most(); + } + + constexpr auto attemptMatch(SM needleHashes, SM needlePSL) { + return SlotOperations::attemptMatch( + data_, needleHashes, needlePSL); + } +}; + +/// BlockProvider provides affordances of +/// 'give me the slot metadata that contains position p' +/// 'set slot metadata at index that contains position p' +/// 'set key at position p' +/// 'check this concrete key against position p' +template struct SlotSplitKeys { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + + std::array keys_; + std::array metadata_; + + Key keyAt(int pos) const { + return keys_[pos]; + } + void setKey(int pos, Key k) { + keys_[pos] = k; + } + constexpr int posToSlot(int pos) const { return pos/SM::Lanes; } + void setSlotAt(int idx, T v) { + metadata_[idx] = v; + } + + // Track to avoid second divide? + SSL slotAt(int pos) const { return metadata_[posToSlot(pos)]; } + bool keyCheck(int major, int minor, Key k) const { return k == keyAt(major * SM::Lanes + minor); } + bool keyCheck(int pos, Key k) const { return k == keyAt(pos); } +}; + +/// RobinHood tables provide affordances of +/// 'lookup this key' +/// 'insert this key' +/// 'delete this key' +/// Split locale of key value when both are present. +/// Position is major + minor + +template +struct Hasher { + Key operator()(Key k) { return k; } +}; + +template> +struct RH { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + Meta m_; + Hash h_; + + bool exists(Key k) { + return findSlot(k).second; + } + + struct MajorMinorInserted { + int major; + int minor; + bool inserted; + }; + + /// pos, hash + std::pair twoNumbers(Key k) { + auto hash = h_(k); + const u32 pos = mapToSlotLemireReduction(fibonacciIndexModulo(hash)); + const T thinhash = badMixer (hash); + return {pos, thinhash}; + } + + std::pair twoNumbersBad(Key k) { + return {1, 1}; + } + + /// Find slot can mean 'psl too short/no slot', 'found and in slot', 'not found but (richer|empty) slot' + /// Currently bug: 'psl too short/no slot' not handled correctly. + /// Returns major/minor to attempt to avoid divisions. + template + MajorMinorInserted findSlot(Key k, KeyCheck kc) { + const auto twoNum = twoNumbersBad(k); + const auto pos = twoNum.first; + const auto hash = twoNum.second; + const auto major = m_.posToSlot(pos); + const auto haystack = m_.slotAt(pos); + // PSL is off by one territory + constexpr auto exactlyOne = SM{1}; + auto minor = 0; + while(true) { + const auto matchResult = SlotOperations::matchAttempt(haystack, hash, 0); + auto finished = exactlyOne & matchResult; + auto matches = exactlyOne & ~matchResult; + while (matches) { + auto minor = matches.lsbIndex(); // Lane offset + if (m_.keyCheck(major + minor, k)) { + return {major, minor, true}; // 'found and in slot' + } + matches = SM{matches.clearLSB()}; + } + // minor points at slot that the key currently fits in. + if (finished) { + return {major, minor, false}; // 'not found, richer or empty slot' + } + } + } +}; + + +} // namespace rh +} // namespace zoo diff --git a/inc/zoo/meta/log.h b/inc/zoo/meta/log.h index d02a555c..84011561 100644 --- a/inc/zoo/meta/log.h +++ b/inc/zoo/meta/log.h @@ -5,28 +5,37 @@ namespace zoo { namespace meta { -constexpr int logFloor(uint64_t arg) { - return 63 - __builtin_clzll(arg); -} - -constexpr int logCeiling(uint64_t arg) { - auto floorLog = logFloor(arg); - return floorLog + ((arg ^ (1ull << floorLog)) ? 1 : 0); -} - /// The algorithm is, from the perspective of the most significant bit set, to copy it /// downward to all positions. /// First copy it once, meaning a group of two copies of the two most significant bit /// Then copy it again, making a group of four copies, then 8 copies... template -constexpr int logCeiling_WithoutIntrinsic(T value) { +constexpr int logFloor_WithoutIntrinsic(T value) { constexpr auto NBitsTotal = sizeof(T) * 8; for(auto groupSize = 1; groupSize < NBitsTotal; groupSize <<= 1) { value |= (value >> groupSize); } return PopcountLogic, T>::execute(value) - 1; } - + +#ifdef _MSC_VER +constexpr int logFloor(uint64_t arg) { + return logFloor_WithoutIntrinsic(arg); +} +#else +constexpr int logFloor(uint64_t arg) { + return 63 - __builtin_clzll(arg); +} +#endif + +constexpr int logCeiling(uint64_t arg) { + auto floorLog = logFloor(arg); + return + floorLog + + // turn off the most significant bit and convert to 1 or 0 + ((arg ^ (1ull << floorLog)) ? 1 : 0); +} + }} #endif diff --git a/inc/zoo/meta/popcount.h b/inc/zoo/meta/popcount.h index 5b57b364..3b00056b 100644 --- a/inc/zoo/meta/popcount.h +++ b/inc/zoo/meta/popcount.h @@ -69,6 +69,10 @@ constexpr T PopcountLogic::execute(T input) { Recursion::execute((input >> HalvedGroupSize) & CombiningMask); } +#ifdef _MSC_VER +template +using PopcountIntrinsic = PopcountLogic; +#else template struct PopcountIntrinsic { constexpr static T execute(T input) { @@ -84,6 +88,7 @@ struct PopcountIntrinsic { return rv; } }; +#endif }} diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index f982adba..d6872632 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -27,13 +27,14 @@ constexpr uint64_t popcount(uint64_t a) noexcept { /// Index into the bits of the type T that contains the MSB. template constexpr std::make_unsigned_t msbIndex(T v) noexcept { - return 8*sizeof(T) - 1 - __builtin_clzll(v); + return meta::logFloor(v); } /// Index into the bits of the type T that contains the LSB. template constexpr std::make_unsigned_t lsbIndex(T v) noexcept { - return __builtin_ctzll(v) + 1; + // ~v & (v - 1) turns on all trailing zeroes, zeroes the rest + return meta::logFloor(1 + (~v & (v - 1))); } /// Core abstraction around SIMD Within A Register (SWAR). Specifies 'lanes' @@ -44,12 +45,14 @@ constexpr std::make_unsigned_t lsbIndex(T v) noexcept { template struct SWAR { using type = T; - constexpr static inline auto NBits = NBits_; - constexpr static inline auto Lanes = sizeof(T) * 8 / NBits; - constexpr static inline auto NSlots = Lanes; - constexpr static T BitMod = sizeof(T)*8 % NBits; - constexpr static T ValidBitsCount = sizeof(T)*8 - BitMod; - constexpr static T AllOnes = (BitMod == 0) ? ~(T(0)) : ((T(1) << ValidBitsCount) -1); + constexpr static inline std::make_unsigned_t + NBits = NBits_, + BitWidth = sizeof(T) * 8, + Lanes = BitWidth / NBits, + NSlots = Lanes, + PaddingBitsCount = BitWidth % NBits, + SignificantBitsCount = BitWidth - PaddingBitsCount, + AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount; SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} @@ -92,13 +95,21 @@ struct SWAR { /// The SWAR lane index that contains the MSB. It is not the bit index of the MSB. /// IE: 4 bit wide 32 bit SWAR: 0x0040'0000 will return 5, not 22 (0 indexed). - constexpr int top() const noexcept { return msbIndex(m_v) / NBits; } - constexpr int lsbIndex() const noexcept { return __builtin_ctzll(m_v) / NBits; } + constexpr auto top() const noexcept { return msbIndex(m_v) / NBits; } + constexpr auto lsbIndex() const noexcept { return swar::lsbIndex(m_v) / NBits; } constexpr SWAR setBit(int index, int bit) const noexcept { return SWAR(m_v | (T(1) << (index * NBits + bit))); } + constexpr SWAR shiftLanesLeft(int laneCount) const noexcept { + return SWAR(value() << (NBits * laneCount)); + } + + constexpr SWAR shiftLanesRight(int laneCount) const noexcept { + return SWAR(value() >> (NBits * laneCount)); + } + constexpr auto blitElement(int index, T value) const noexcept { auto elementMask = ((T(1) << NBits) - 1) << (index * NBits); return SWAR((m_v & ~elementMask) | (value << (index * NBits))); @@ -110,14 +121,6 @@ struct SWAR { return (*this & ~IsolationMask) | (other & IsolationMask); } - constexpr SWAR shiftLanesLeft(int laneCount) const noexcept { - return SWAR(value() << (NBits * laneCount)); - } - - constexpr SWAR shiftLanesRight(int laneCount) const noexcept { - return SWAR(value() >> (NBits * laneCount)); - } - T m_v; }; diff --git a/inc/zoo/tr/is_container.h b/inc/zoo/traits/is_container.h similarity index 100% rename from inc/zoo/tr/is_container.h rename to inc/zoo/traits/is_container.h diff --git a/inc/zoo/util/container_insertion.h b/inc/zoo/util/container_insertion.h index d816f7eb..f1f6603a 100644 --- a/inc/zoo/util/container_insertion.h +++ b/inc/zoo/util/container_insertion.h @@ -1,43 +1,43 @@ -#ifndef ZOO_CONTAINER_INSERTION -#define ZOO_CONTAINER_INSERTION - -#include -#include - -namespace zoo { - -template -struct is_insertable_impl: std::false_type {}; -template -struct is_insertable_impl< - T, - std::void_t() << std::declval() - )> ->: std::true_type {}; - -} - -namespace std { - -template -auto operator<<(std::ostream &out, const C &a) --> std::enable_if_t< - not(zoo::is_insertable_impl::value) && zoo::is_container_v, - std::ostream & -> { - out << '('; - auto current{cbegin(a)}, sentry{cend(a)}; - if(current != sentry) { - for(;;) { - out << *current++; - if(sentry == current) { break; } - out << ", "; - } - } - return out << ')'; -} - -} - -#endif +#ifndef ZOO_CONTAINER_INSERTION +#define ZOO_CONTAINER_INSERTION + +#include +#include + +namespace zoo { + +template +struct is_insertable_impl: std::false_type {}; +template +struct is_insertable_impl< + T, + std::void_t() << std::declval() + )> +>: std::true_type {}; + +} + +namespace std { + +template +auto operator<<(std::ostream &out, const C &a) +-> std::enable_if_t< + not(zoo::is_insertable_impl::value) && zoo::is_container_v, + std::ostream & +> { + out << '('; + auto current{cbegin(a)}, sentry{cend(a)}; + if(current != sentry) { + for(;;) { + out << *current++; + if(sentry == current) { break; } + out << ", "; + } + } + return out << ')'; +} + +} + +#endif diff --git a/inc/zoo/util/range_equivalence.h b/inc/zoo/util/range_equivalence.h index 97fc14c5..6c514d79 100644 --- a/inc/zoo/util/range_equivalence.h +++ b/inc/zoo/util/range_equivalence.h @@ -1,46 +1,46 @@ -#ifndef ZOO_RANGE_EQUIVALENCE -#define ZOO_RANGE_EQUIVALENCE - -#include - -namespace zoo { - -template -auto operator==(const C1 &l, const C2 &r) --> std::enable_if_t< - zoo::is_container_v and - zoo::is_container_v, - bool -> -{ - auto lb{cbegin(l)}, le{cend(l)}; - auto rb{cbegin(r)}, re{cend(r)}; - for(;;++lb, ++rb){ - if(lb == le) { return rb == re; } // termination at the same time - if(rb == re) { return false; } // r has fewer elements - if(not(*lb == *rb)) { return false; } - } - return true; -} - -template -auto weaklySame(const C1 &l, const C2 &r) --> std::enable_if_t< - zoo::is_container_v and - zoo::is_container_v, - bool -> -{ - auto lb{cbegin(l)}, le{cend(l)}; - auto rb{cbegin(r)}, re{cend(r)}; - for(;;++lb, ++rb){ - if(lb == le) { return rb == re; } // termination at the same time - if(rb == re) { return false; } // r has fewer elements - if(*lb < *rb || *rb < *lb) { return false; } - } - return true; -} - -} - -#endif +#ifndef ZOO_RANGE_EQUIVALENCE +#define ZOO_RANGE_EQUIVALENCE + +#include + +namespace zoo { + +template +auto operator==(const C1 &l, const C2 &r) +-> std::enable_if_t< + zoo::is_container_v and + zoo::is_container_v, + bool +> +{ + auto lb{cbegin(l)}, le{cend(l)}; + auto rb{cbegin(r)}, re{cend(r)}; + for(;;++lb, ++rb){ + if(lb == le) { return rb == re; } // termination at the same time + if(rb == re) { return false; } // r has fewer elements + if(not(*lb == *rb)) { return false; } + } + return true; +} + +template +auto weaklySame(const C1 &l, const C2 &r) +-> std::enable_if_t< + zoo::is_container_v and + zoo::is_container_v, + bool +> +{ + auto lb{cbegin(l)}, le{cend(l)}; + auto rb{cbegin(r)}, re{cend(r)}; + for(;;++lb, ++rb){ + if(lb == le) { return rb == re; } // termination at the same time + if(rb == re) { return false; } // r has fewer elements + if(*lb < *rb || *rb < *lb) { return false; } + } + return true; +} + +} + +#endif diff --git a/test/AlignedStorage.cpp b/test/AlignedStorage.cpp index 11667f82..765f1b62 100644 --- a/test/AlignedStorage.cpp +++ b/test/AlignedStorage.cpp @@ -35,14 +35,14 @@ static_assert(alignof(A1::space_) == 1, "specific alignment not honored"); template std::false_type MayCallBuild(...); template -auto MayCallBuild(int, As &&...as) -> +auto MayCallBuild(As &&...as) -> decltype( std::declval().template build(std::forward(as)...), std::true_type{} ); template constexpr auto MayCallBuild_(As &&...as) { - return decltype(MayCallBuild(0, std::forward(as)...))::value; + return decltype(MayCallBuild(std::forward(as)...))::value; } static_assert( @@ -160,6 +160,7 @@ struct Typical { Typical() = default; Typical(const Typical &) = default; Typical(Typical &&) = default; + Typical(long s): state_(s) {} long state_; }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 244de302..0f1e47d3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,12 +1,6 @@ cmake_minimum_required (VERSION 3.8) set(CMAKE_BUILD_TYPE Debug) -set(CMAKE_CXX_STANDARD 17) - -set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g") -# set(CMAKE_EXE_LINKER_FLAGS_UBSAN -fsanitize=undefined) -# set(CMAKE_SHARED_LINKER_FLAGS_UBSAN -fsanitize=undefined) -set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer") project(ZooTest VERSION 1.0 LANGUAGES CXX) @@ -16,80 +10,146 @@ configure_file ( "${PROJECT_BINARY_DIR}/ZooTestConfig.h" ) -add_subdirectory(third_party EXCLUDE_FROM_ALL) -enable_testing() - -include_directories( - "${PROJECT_BINARY_DIR}" - ./inc - ../inc - ${TEST_THIRD_PARTY_INCLUDE_PATH} -) +if(MSVC) + # MSVC specific configuration + # Avoids multiple problems + + # Due to multiple SFINAE bugs, forced upgrade to C++ 20 to use "concepts" + set(CMAKE_CXX_STANDARD 20) + + # Set the policy to use the new behavior + if(POLICY CMP0067) + cmake_policy(SET CMP0067 NEW) + message(STATUS "Set policy") + endif() + + include_directories( + "${PROJECT_BINARY_DIR}" + ./inc + ../inc + ./third_party/Catch2/single_include + ) + + set( + ZOO_TEST_SOURCES + catch_main.cpp + any.cpp AlignedStorage.cpp AnyCallable.cpp AnyCallSignature.cpp + AnyExtended.cpp GenericPolicy.cpp FunctionPolicy.cpp + swar/BasicOperations.cpp + # map/BasicMap.cpp + # map/RobinHood.test.cpp + # map/RobinHood.hybrid.test.cpp + algorithm/cfs.cpp + algorithm/quicksort.cpp + egyptian.cpp var.cpp variant.cpp CopyMoveAbilities.cpp + ) + + # Simple executable for MSVC + add_executable(ZooTest ${ZOO_TEST_SOURCES}) + target_compile_definitions(ZooTest PUBLIC AVOID_MSVC_BUG_SFINAE_INVALID_EXPLICIT_TEMPLATE_ARGUMENT) + + try_compile( + MSVC_BUG_BUILD_RESULT + ${CMAKE_BINARY_DIR}/temporary + SOURCES + ${CMAKE_SOURCE_DIR}/../compiler_bugs/msvc/sfinae.cpp + CMAKE_FLAGS "-DCMAKE_CXX_STANDARD=17" + COMPILE_DEFINITIONS + -DTRIGGER_MSVC_SFINAE_BUG + OUTPUT_VARIABLE RESULT + ) + if(MSVC_BUG_BUILD_RESULT) + MESSAGE(FATAL_ERROR "Compilation of MSVC bug file succeeded, was the compiler bug fixed? ${RESULT}") + else() + MESSAGE(STATUS "File with MSVC bug build failed as expected ${RESULT}") + endif() + try_compile( + MSVC_OK_BUILD_RESULT + ${CMAKE_BINARY_DIR}/temporary + SOURCES + ${CMAKE_SOURCE_DIR}/../compiler_bugs/msvc/sfinae.cpp + CMAKE_FLAGS -DCMAKE_CXX_STANDARD=20 + OUTPUT_VARIABLE MSVC_OK_OUTPUT + ) + if(MSVC_OK_BUILD_RESULT) + MESSAGE(STATUS "CMAKE try_compile succeeded as expected: ${MSVC_OK_OUTPUT}") + else() + MESSAGE(FATAL_ERROR "CMAKE try_compile of non-problematic file did not succeed: ${MSVC_OK_OUTPUT}") + endif() + +else() + # Non-MSVC specific configuration (original content) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g") + set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer") + + add_subdirectory(third_party EXCLUDE_FROM_ALL) + enable_testing() + + include_directories( + "${PROJECT_BINARY_DIR}" + ./inc + ../inc + ${TEST_THIRD_PARTY_INCLUDE_PATH} + ) + + if("UBSAN" STREQUAL "${CMAKE_BUILD_TYPE}") + set(ADDITIONAL_SOURCES "ubsan.cpp") + endif() + + message(STATUS "Additional sources:" ${ADDITIONAL_SOURCES}) + + set(CURRENT_EXECUTABLE "zooTest${CMAKE_BUILD_TYPE}") + + set(CATCH2_MAIN_SOURCE catch_main.cpp) + set( + TYPE_ERASURE_SOURCES + any.cpp AlignedStorage.cpp AnyCallable.cpp AnyCallSignature.cpp + AnyExtended.cpp GenericPolicy.cpp FunctionPolicy.cpp + ) + set( + SWAR_SOURCES + swar/BasicOperations.cpp + ) + set( + MAP_SOURCES + map/BasicMap.cpp map/RobinHood.test.cpp map/RobinHood.hybrid.test.cpp + ) + set(ALGORITHM_SOURCES algorithm/cfs.cpp algorithm/quicksort.cpp) + set(MISCELLANEA_SOURCES egyptian.cpp var.cpp variant.cpp CopyMoveAbilities.cpp) + set( + ZOO_TEST_SOURCES + ${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES} + ${SWAR_SOURCES} + ${MISCELLANEA_SOURCES} + ) + + add_library(Catch2Main OBJECT ${CATCH2_MAIN_SOURCE}) + add_library(AlgorithmTest OBJECT ${ALGORITHM_SOURCES}) + add_library(TypeErasureTest OBJECT ${TYPE_ERASURE_SOURCES}) + add_library(SWARTest OBJECT ${SWAR_SOURCES}) + add_library(MapTest OBJECT ${MAP_SOURCES}) + add_library(Uncategorized OBJECT ${MISCELLANEA_SOURCES}) + + add_executable( + ${CURRENT_EXECUTABLE} ${ADDITIONAL_SOURCES} + ) + target_link_libraries(${CURRENT_EXECUTABLE} Catch2Main AlgorithmTest TypeErasureTest SWARTest Uncategorized) + + add_executable(algorithm2 $) + target_link_libraries(algorithm2 AlgorithmTest) + add_executable(type_erasure $) + target_link_libraries(type_erasure TypeErasureTest) + add_executable(swar $) + target_link_libraries(swar SWARTest) + add_executable(mapt $) + target_link_libraries(mapt MapTest) + + # CMake build: library tests + set(TEST_APP_NAME "${CURRENT_EXECUTABLE}Test") + add_executable(${TEST_APP_NAME} ${ZOO_TEST_SOURCES}) + include_directories(${TEST_THIRD_PARTY_INCLUDE_PATH}) + enable_testing() + ParseAndAddCatchTests(${TEST_APP_NAME}) -if("UBSAN" STREQUAL "${CMAKE_BUILD_TYPE}") - set(ADDITIONAL_SOURCES "ubsan.cpp") endif() - -message(STATUS "Additional sources:" ${ADDITIONAL_SOURCES}) - -set(CURRENT_EXECUTABLE "zooTest${CMAKE_BUILD_TYPE}") - -set(CATCH2_MAIN_SOURCE catch_main.cpp) -set( - TYPE_ERASURE_SOURCES - any.cpp AlignedStorage.cpp AnyCallable.cpp AnyCallSignature.cpp - AnyExtended.cpp GenericPolicy.cpp FunctionPolicy.cpp -) -set( - SWAR_SOURCES - swar/BasicOperations.cpp -) -set( - MAP_SOURCES - map/BasicMap.cpp map/RobinHood.test.cpp map/RobinHood.hybrid.test.cpp -) -set(ALGORITHM_SOURCES algorithm/cfs.cpp algorithm/quicksort.cpp) -set(MISCELLANEA_SOURCES egyptian.cpp var.cpp variant.cpp CopyMoveAbilities.cpp) -set( - ZOO_TEST_SOURCES - ${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES} - ${SWAR_SOURCES} - ${MISCELLANEA_SOURCES} -) - -add_library(Catch2Main OBJECT ${CATCH2_MAIN_SOURCE}) -add_library(AlgorithmTest OBJECT ${ALGORITHM_SOURCES}) -add_library(TypeErasureTest OBJECT ${TYPE_ERASURE_SOURCES}) -add_library(SWARTest OBJECT ${SWAR_SOURCES}) -add_library(MapTest OBJECT ${MAP_SOURCES}) -add_library(Uncategorized OBJECT ${MISCELLANEA_SOURCES}) - -add_executable( - ${CURRENT_EXECUTABLE} ${ADDITIONAL_SOURCES} -) -target_link_libraries(${CURRENT_EXECUTABLE} Catch2Main AlgorithmTest TypeErasureTest SWARTest Uncategorized) - -add_executable(algorithm2 $) -target_link_libraries(algorithm2 AlgorithmTest) -add_executable(type_erasure $) -target_link_libraries(type_erasure TypeErasureTest) -add_executable(swar $) -target_link_libraries(swar SWARTest) -add_executable(mapt $) -target_link_libraries(mapt MapTest) - -# CMake build : library tests - -#configure variables -set(TEST_APP_NAME "${CURRENT_EXECUTABLE}Test") - -add_executable(${TEST_APP_NAME} ${ZOO_TEST_SOURCES}) - -#set includes -include_directories(${TEST_THIRD_PARTY_INCLUDE_PATH}) - -# Turn on CMake testing capabilities -enable_testing() - -#parse catch tests -ParseAndAddCatchTests(${TEST_APP_NAME}) diff --git a/test/FunctionPolicy.cpp b/test/FunctionPolicy.cpp index 7438cdf1..74fbd67c 100644 --- a/test/FunctionPolicy.cpp +++ b/test/FunctionPolicy.cpp @@ -173,6 +173,7 @@ TEST_CASE( F fun; REQUIRE(!fun); auto anythingTriviallyDestructible = [](){}; + static_assert(std::is_trivially_destructible_v); fun = anythingTriviallyDestructible; CHECK(fun); REQUIRE(!fun.isDefault()); diff --git a/test/inc/zoo/var.h b/test/inc/zoo/var.h index f817031c..3a4bd8cf 100644 --- a/test/inc/zoo/var.h +++ b/test/inc/zoo/var.h @@ -171,7 +171,7 @@ struct Var: visit( [&](auto &&m) { using Source = meta::remove_cr_t; - meta::move_in_place(as(), std::move(m)); + meta::move_in_place(this->as(), std::move(m)); }, v ); diff --git a/test/inc/zoo/variant.h b/test/inc/zoo/variant.h index ed30f9cd..893f73ab 100644 --- a/test/inc/zoo/variant.h +++ b/test/inc/zoo/variant.h @@ -90,7 +90,7 @@ struct Variant { visit( [&](const auto &c) { using Source = meta::remove_cr_t; - meta::copy_in_place(as(), c); + meta::copy_in_place(this->as(), c); }, v ); @@ -100,7 +100,7 @@ struct Variant { visit( [&](auto &&m) { using Source = meta::remove_cr_t; - meta::move_in_place(as(), std::move(m)); + meta::move_in_place(this->as(), std::move(m)); }, v ); diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 9c6e8d42..b4480e54 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -159,12 +159,12 @@ static_assert(0x0E0E'0E0E == u32(broadcast<8>(SWAR<8, u32>(0x0000'000E)))); static_assert(0x6B6B'6B6B == u32(broadcast<8>(SWAR<8, u32>(0x0000'006B)))); static_assert(0x0808'0808'0808'0808ull == u64(broadcast<8>(SWAR<8, u64>(0x0000'0000'0000'0008ull)))); -static_assert(2 == lsbIndex(1<<1)); -static_assert(4 == lsbIndex(1<<3)); -static_assert(6 == lsbIndex(1<<5)); -static_assert(9 == lsbIndex(1<<8)); -static_assert(18 == lsbIndex(1<<17)); -static_assert(31 == lsbIndex(1<<30)); +static_assert(1 == lsbIndex(1<<1)); +static_assert(3 == lsbIndex(1<<3)); +static_assert(5 == lsbIndex(1<<5)); +static_assert(8 == lsbIndex(1<<8)); +static_assert(17 == lsbIndex(1<<17)); +static_assert(30 == lsbIndex(1<<30)); /* These tests were not catching errors known to have been present From c25704d242b09bae98244573da2d6ae8cfccdf87 Mon Sep 17 00:00:00 2001 From: Eddie Date: Wed, 20 Dec 2023 02:33:07 -0800 Subject: [PATCH 3/5] Minor corrections, corrects end of line (dos2unix) --- compiler_bugs/msvc/sfinae.cpp | 78 +- inc/zoo/AlignedStorage.h | 18 +- inc/zoo/algorithm/cfs.h | 5 +- inc/zoo/algorithm/quicksort.h | 261 +++--- inc/zoo/map/RobinHood.h | 1272 ++++++++++++++-------------- inc/zoo/map/RobinHoodAlt.h | 388 ++++----- inc/zoo/meta/log.h | 2 + inc/zoo/swar/SWAR.h | 2 + inc/zoo/traits/is_container.h | 5 + inc/zoo/util/container_insertion.h | 86 +- inc/zoo/util/range_equivalence.h | 92 +- test/CMakeLists.txt | 2 +- 12 files changed, 1114 insertions(+), 1097 deletions(-) diff --git a/compiler_bugs/msvc/sfinae.cpp b/compiler_bugs/msvc/sfinae.cpp index 5d981b19..41fa3d63 100644 --- a/compiler_bugs/msvc/sfinae.cpp +++ b/compiler_bugs/msvc/sfinae.cpp @@ -1,38 +1,40 @@ -#include -#include -#include - -template -constexpr auto Constructible_v = std::is_constructible_v; - -template -struct ATemplate { - Q space_; - - template - constexpr static auto FitsInSpace_v = sizeof(T) <= sizeof(Q); - - template - std::enable_if_t< - FitsInSpace_v - && - #ifdef TRIGGER_MSVC_SFINAE_BUG - Constructible_v - #else - std::is_constructible_v - #endif - , - T * - > - sfinaeFunction(Args &&...args) { - T *rv = new(static_cast(&space_)) T(std::forward(args)...); - return rv; - } - -}; - -auto triggerError(ATemplate &m) { - return m.sfinaeFunction(nullptr); -} - -int main(int, const char *[]) { return 0; } +#include +#include +#include + +template +constexpr auto Constructible_v = std::is_constructible_v; + +template +struct ATemplate { + alignas(alignof(Q)) char space_[sizeof(Q)]; + + template + constexpr static auto FitsInSpace_v = sizeof(T) <= sizeof(Q); + + template + std::enable_if_t< + FitsInSpace_v + && + #ifndef TRIGGER_MSVC_SFINAE_BUG + bool( + #endif + Constructible_v + #ifndef TRIGGER_MSVC_SFINAE_BUG + ) + #endif + , + T * + > + sfinaeFunction(Args &&...args) { + T *rv = new(static_cast(space_)) T(std::forward(args)...); + return rv; + } + +}; + +auto triggerError(ATemplate &m) { + return m.sfinaeFunction(nullptr); +} + +int main(int, const char *[]) { return 0; } diff --git a/inc/zoo/AlignedStorage.h b/inc/zoo/AlignedStorage.h index 7b3a2f31..ee1a0250 100644 --- a/inc/zoo/AlignedStorage.h +++ b/inc/zoo/AlignedStorage.h @@ -39,13 +39,7 @@ struct Constructible: {}; template -constexpr -#ifndef _MSC_VER - auto -#else - bool -#endif -Constructible_v = Constructible::value; +constexpr auto Constructible_v = Constructible::value; template void destroy(T &t) noexcept { t.~T(); } @@ -131,7 +125,15 @@ struct AlignedStorage { impl::build(*this->as(), std::forward(args)...) std::enable_if_t< SuitableType() && - impl::Constructible_v, + #ifdef _MSC_VER + bool( + #endif + impl::Constructible_v + #ifdef _MSC_VER + ) + #endif + , + T * > build(Args &&...args) noexcept(noexcept(PP_ZOO_BUILD_EXPRESSION)) diff --git a/inc/zoo/algorithm/cfs.h b/inc/zoo/algorithm/cfs.h index 9fd92583..87ae04aa 100644 --- a/inc/zoo/algorithm/cfs.h +++ b/inc/zoo/algorithm/cfs.h @@ -10,7 +10,10 @@ // because of std::decay needed to decay deferenced iterator #include -#include +#ifdef _MSC_VER +#include +#endif + #endif namespace zoo { diff --git a/inc/zoo/algorithm/quicksort.h b/inc/zoo/algorithm/quicksort.h index 234851b0..5e438812 100644 --- a/inc/zoo/algorithm/quicksort.h +++ b/inc/zoo/algorithm/quicksort.h @@ -1,130 +1,131 @@ -#ifndef ZOO_QUICKSORT -#define ZOO_QUICKSORT - -#include // for moveRotate - -#include - -#include // for temporary storage -#include - -namespace zoo { - -template -struct ImplicitPivotResult { - FI pivot_; - long bias_; -}; - -/// \tparam FI is a forward iterator -/// \pre b != e -template -auto implicitPivotPartition(FI b, FI e, Comparison cmp) -> - ImplicitPivotResult -{ - auto bias = 0; - auto pivot = b++; - /*if(e == b) { return pivot; } - if(cmp(*b, *pivot)) { - auto third = next(b); - if(third == e) { - moveRotation(*pivot, *b); - return pivot; - } - }*/ - for(; b != e; ++b) { - // invariant: ..., L0, P == *pivot, G0, G1, ... Gn, *b - // where Lx means lower-than-pivot and Gx higher-equal-to-pivot - if(!cmp(*b, *pivot)) { - ++bias; - continue; - } - --bias; - // ..., L1, P == *pivot, G0, G1, ..., Gn, L0 == *b, X0, ... - // The pivot is greater than the element: - // insert *b into the lower partition: - // 1. *b goes into the pivot position - // 2. the pivot increases by one - // 3. the element at pivot + 1, the new pivot, must be greater - // than or equal to any Lx, the value of *pivot satisfies this - // property. - // These requirements can be satisfied by rotating the elements - // at positions (pivot, b, pivot + 1) - // ..., L1, L0, P == *pivot, G1, ..., Gn, G0 == *b, X0, ... - auto oldPivot = pivot++; - if(b == pivot) { - moveRotation(*oldPivot, *pivot); - } else { - moveRotation(*oldPivot, *b, *pivot); - } - // tmp = *b, *b = *1, *1 = *0, *0 = tmp - /*moveRotation(*oldPivot, *b); - moveRotation(*b, *pivot);*/ - } - return {pivot, bias}; -} - -template -void quicksort(I begin, I end, Comparison cmp = Comparison{}) { - if(begin == end) { return; } - - constexpr static const auto FrameCount = 64; - struct Frame { - I b, e; - }; - std::array stack; - auto index = 0; - - for(;;) { - auto result = implicitPivotPartition(begin, end, cmp); - auto pivot = result.pivot_; - auto bias = result.bias_; - auto higherBegin = next(pivot); - if(higherBegin == end) { // no higher-recursion needed - if(begin != pivot) { - end = pivot; // then just do lower recursion - continue; // without leaving a frame - } - // there is no lower-recursion either - if(!index) { return; } - auto &frame = stack[--index]; - begin = frame.b; - end = frame.e; - continue; - } - // higher recursion needed - if(begin == pivot) { // no lower recursion needed - begin = higherBegin; // becomes the higher recursion - continue; - } - // both lower and higher recursions needed, make frame for the larger - // partition: - // The smaller partition is less than or equal to half the elements: - // size(smaller) <= size/2 => depth of recursion <= log2(N) - if(0 < bias) { // lower partition is smaller - stack[index] = { higherBegin, end }; - end = pivot; - } else { // higher partition is smaller - stack[index] = { begin, pivot }; - begin = higherBegin; - } - if(FrameCount <= ++index) { - throw std::runtime_error("quicksort stack exhausted"); - } - } -} - -template -bool is_sorted(FI begin, FI end, Comparison cmp = Comparison{}) { - if(begin == end) { return true; } - auto old = begin++; - while(begin != end) { - if(not cmp(*old, *begin)) { return false; } - old = begin++; - } - return true; -} - -} - -#endif +#ifndef ZOO_QUICKSORT +#define ZOO_QUICKSORT + +#include // for moveRotate + +#include + +#include // for temporary storage +#include +#include + +namespace zoo { + +template +struct ImplicitPivotResult { + FI pivot_; + long bias_; +}; + +/// \tparam FI is a forward iterator +/// \pre b != e +template +auto implicitPivotPartition(FI b, FI e, Comparison cmp) -> + ImplicitPivotResult +{ + auto bias = 0; + auto pivot = b++; + /*if(e == b) { return pivot; } + if(cmp(*b, *pivot)) { + auto third = next(b); + if(third == e) { + moveRotation(*pivot, *b); + return pivot; + } + }*/ + for(; b != e; ++b) { + // invariant: ..., L0, P == *pivot, G0, G1, ... Gn, *b + // where Lx means lower-than-pivot and Gx higher-equal-to-pivot + if(!cmp(*b, *pivot)) { + ++bias; + continue; + } + --bias; + // ..., L1, P == *pivot, G0, G1, ..., Gn, L0 == *b, X0, ... + // The pivot is greater than the element: + // insert *b into the lower partition: + // 1. *b goes into the pivot position + // 2. the pivot increases by one + // 3. the element at pivot + 1, the new pivot, must be greater + // than or equal to any Lx, the value of *pivot satisfies this + // property. + // These requirements can be satisfied by rotating the elements + // at positions (pivot, b, pivot + 1) + // ..., L1, L0, P == *pivot, G1, ..., Gn, G0 == *b, X0, ... + auto oldPivot = pivot++; + if(b == pivot) { + moveRotation(*oldPivot, *pivot); + } else { + moveRotation(*oldPivot, *b, *pivot); + } + // tmp = *b, *b = *1, *1 = *0, *0 = tmp + /*moveRotation(*oldPivot, *b); + moveRotation(*b, *pivot);*/ + } + return {pivot, bias}; +} + +template +void quicksort(I begin, I end, Comparison cmp = Comparison{}) { + if(begin == end) { return; } + + constexpr static const auto FrameCount = 64; + struct Frame { + I b, e; + }; + std::array stack; + auto index = 0; + + for(;;) { + auto result = implicitPivotPartition(begin, end, cmp); + auto pivot = result.pivot_; + auto bias = result.bias_; + auto higherBegin = next(pivot); + if(higherBegin == end) { // no higher-recursion needed + if(begin != pivot) { + end = pivot; // then just do lower recursion + continue; // without leaving a frame + } + // there is no lower-recursion either + if(!index) { return; } + auto &frame = stack[--index]; + begin = frame.b; + end = frame.e; + continue; + } + // higher recursion needed + if(begin == pivot) { // no lower recursion needed + begin = higherBegin; // becomes the higher recursion + continue; + } + // both lower and higher recursions needed, make frame for the larger + // partition: + // The smaller partition is less than or equal to half the elements: + // size(smaller) <= size/2 => depth of recursion <= log2(N) + if(0 < bias) { // lower partition is smaller + stack[index] = { higherBegin, end }; + end = pivot; + } else { // higher partition is smaller + stack[index] = { begin, pivot }; + begin = higherBegin; + } + if(FrameCount <= ++index) { + throw std::runtime_error("quicksort stack exhausted"); + } + } +} + +template +bool is_sorted(FI begin, FI end, Comparison cmp = Comparison{}) { + if(begin == end) { return true; } + auto old = begin++; + while(begin != end) { + if(not cmp(*old, *begin)) { return false; } + old = begin++; + } + return true; +} + +} + +#endif diff --git a/inc/zoo/map/RobinHood.h b/inc/zoo/map/RobinHood.h index f25c805e..a58235a1 100644 --- a/inc/zoo/map/RobinHood.h +++ b/inc/zoo/map/RobinHood.h @@ -1,636 +1,636 @@ -#ifndef ZOO_ROBINHOOD_H -#define ZOO_ROBINHOOD_H - -#include "zoo/map/RobinHoodUtil.h" -#include "zoo/AlignedStorage.h" - -#ifndef ZOO_CONFIG_DEEP_ASSERTIONS - #define ZOO_CONFIG_DEEP_ASSERTIONS 0 -#endif - -#include -#include -#include -#include - -#if ZOO_CONFIG_DEEP_ASSERTIONS - #include -#endif - -namespace zoo { -namespace rh { - -struct RobinHoodException: std::runtime_error { - using std::runtime_error::runtime_error; -}; - -struct MaximumProbeSequenceLengthExceeded: RobinHoodException { - using RobinHoodException::RobinHoodException; -}; -struct RelocationStackExhausted: RobinHoodException { - using RobinHoodException::RobinHoodException; -}; - -template -struct RH_Backend { - using Metadata = impl::Metadata; - - constexpr static inline auto Width = Metadata::NBits; - Metadata *md_; - - /*! \brief SWAR check for a potential match - The invariant in Robin Hood is that the element being looked for, the "needle", is "richer" - than the elements already present, the "haystack". - "Richer" means that the PSL is smaller. - A PSL of 0 can only happen in the haystack, to indicate the slot is empty, this is "richest". - The first time the needle has a PSL greater than the haystacks' means the matching will fail, - because the hypothetical prior insertion would have "stolen" that slot. - If there is an equal, it would start a sequence of potential matches. To determine an actual match: - 1. A cheap SWAR check of hoisted hashes - 2. If there are still potential matches (now also the hoisted hashes), fall back to non-SWAR, - or iterative and expensive "deep equality" test for each potential match, outside of this function - - The above makes it very important to detect the first case in which the PSL is greater equal to the needle. - We call this the "deadline". - Because we assume the LITTLE ENDIAN byte ordering, the first element would be the least significant - non-false Boolean SWAR. - - Note about performance: - Every "early exit" faces a big justification hurdle, the proportion of cases - they intercept to be large enough that the branch prediction penalty of the entropy introduced is - overcompensated. - */ - - /// Boolean SWAR true in the first element/lane of the needle strictly poorer than its corresponding - /// haystack - constexpr static auto - firstInvariantBreakage(Metadata needle, Metadata haystack) { - auto nPSL = needle.PSLs(); - auto hPSL = haystack.PSLs(); - auto theyKeepInvariant = - greaterEqual_MSB_off(hPSL, nPSL); - // BTW, the reason to have encoded the PSLs in the least - // significant bits is to be able to call the cheaper version - // _MSB_off here - - auto theyBreakInvariant = not theyKeepInvariant; - // because we make the assumption of LITTLE ENDIAN byte ordering, - // we're interested in the elements up to the first haystack-richer - auto firstBreakage = swar::isolateLSB(theyBreakInvariant.value()); - return firstBreakage; - } - - // This should be more generic: if PSLs breach a broadcast PSL, saturate - // This should be more generic: if a SWAR breaches a SWAR condition, saturate. - constexpr static auto - needlePSLSaturation(Metadata nPSL) { - // create a saturator for max PSL. If any needle saturates, all later PSLs will be set to saturated. - constexpr auto saturatedPSL = broadcast(Metadata(Metadata::MaxPSL)); - //auto nPSL = needle.PSLs(); - auto saturation = greaterEqual_MSB_off(nPSL, saturatedPSL); - auto invertSatMask = ((swar::isolateLSB(saturation.value()) - 1) ); - auto satMask = (~(swar::isolateLSB(saturation.value()) - 1) ); - //if (not bool(saturation)) return std::tuple{nPSL, false}; - // Least sig lane is saturated, all more sig must be made saturated. - auto needlePSLsToSaturate = Metadata{satMask & saturatedPSL.value()}; - // addition might have overflown nPSL before entering function - return std::tuple{Metadata{nPSL.PSLs() | needlePSLsToSaturate}, bool(saturation)}; // saturated at any point, last swar to check. - } - - constexpr static impl::MatchResult - potentialMatches( - Metadata needle, Metadata haystack - ) noexcept { - // We need to determine if there are potential matches to consider - auto sames = equals(needle, haystack); - auto deadline = firstInvariantBreakage(needle, haystack); - // In a valid haystack, the PSLs can grow at most by 1 per entry. - // If a PSL is richer than the needle in any place, because the - // needle, by construction, always grows at least by 1 per entry, - // then the PSL won't be equal again. - // There is no need to filter potential matches using the deadline - // as previous versions of the code did. - return { - deadline, - sames - }; - } - - /*! \brief converts the given starting PSL and reduced hash code into a SWAR-ready needle - - The given needle would have a PSL as the starting (PSL + 1) in the first slot, the "+ 1" is because - the count starts at 1, in this way, a haystack PSL of 0 is always "richer" - */ - constexpr static auto makeNeedle(U startingPSL, U hoistedHash) { - constexpr auto Ones = meta::BitmaskMaker::value; - constexpr auto Progression = Ones * Ones; - auto core = startingPSL | (hoistedHash << PSL_Bits); - auto broadcasted = broadcast(Metadata(core)); - auto startingPSLmadePotentialPSLs = Metadata(Progression) + broadcasted; - return startingPSLmadePotentialPSLs; - } - - template - inline constexpr - std::tuple - findMisaligned_assumesSkarupkeTail( - U hoistedHash, int homeIndex, const KeyComparer &kc - ) const noexcept __attribute__((always_inline)); -}; - -template -template -inline constexpr -std::tuple::Metadata> -RH_Backend::findMisaligned_assumesSkarupkeTail( - U hoistedHash, int homeIndex, const KeyComparer &kc - ) const noexcept { - auto misalignment = homeIndex % Metadata::NSlots; - auto baseIndex = homeIndex / Metadata::NSlots; - auto base = this->md_ + baseIndex; - - constexpr auto Ones = meta::BitmaskMaker::value; - constexpr auto Progression = Metadata{Ones * Ones}; - constexpr auto AllNSlots = - Metadata{meta::BitmaskMaker::value}; - MisalignedGenerator_Dynamic p(base, int(Metadata::NBits * misalignment)); - auto index = homeIndex; - auto needle = makeNeedle(0, hoistedHash); - - for(;;) { - auto hay = *p; - auto result = potentialMatches(needle, hay); - auto positives = result.potentialMatches; - while(positives.value()) { - auto matchSubIndex = positives.lsbIndex(); - auto matchIndex = index + matchSubIndex; - // Possible specialist optimization to kick off all possible - // matches to an array (like chaining evict) and check them - // later. - if(kc(matchIndex)) { - return std::tuple(matchIndex, U(0), Metadata(0)); - } - positives = Metadata{swar::clearLSB(positives.value())}; - } - auto deadline = result.deadline; - if(deadline) { - // The deadline is relative to the misalignment. - // To build an absolute deadline, there are two cases: - // the bit falls in the first SWAR or the second SWAR. - // The same applies for needle. - // in general, for example a misaglignment of 6: - // { . | . | . | . | . | . | . | .}{ . | . | . | . | . | . | . | . } - // { a | b | c | d | e | f | g | h } - // shift left (to higher bits) by the misalignment - // { 0 | 0 | 0 | 0 | 0 | 0 | a | b } - // shift right (to lower bits) by NSlots - misalignment: - // { c | d | e | f | g | h | 0 | 0 } - // One might hope undefined behavior might be reasonable (zero - // result, unchanged result), but ARM proves that undefined - // behavior is indeed undefined, so we do our right shift as a - // double: shift by n-1, then shift by 1. - auto mdd = Metadata{deadline}; - auto toAbsolute = [](auto v, auto ma) { - auto shiftedLeft = v.shiftLanesLeft(ma); - auto shiftedRight = - v.shiftLanesRight(Metadata::NSlots - ma - 1).shiftLanesRight(1); - return Metadata{shiftedLeft | shiftedRight}; - }; - auto position = index + Metadata{deadline}.lsbIndex(); - return - std::tuple( - position, - toAbsolute(mdd, misalignment).value(), - toAbsolute(needle, misalignment) - ); - } - // Skarupke's tail allows us to not have to worry about the end - // of the metadata - ++p; - index += Metadata::NSlots; - needle = needle + AllNSlots; - } - } - -template -struct KeyValuePairWrapper { - using type = std::pair; - AlignedStorageFor pair_; - - template - void build(Initializers &&...izers) - noexcept(noexcept(pair_.template build(std::forward(izers)...))) - { - pair_.template build(std::forward(izers)...); - } - - template - KeyValuePairWrapper &operator=(RHS &&rhs) - noexcept(noexcept(std::declval() = std::forward(rhs))) - { - *pair_.template as() = std::forward(rhs); - return *this; - } - - void destroy() noexcept { pair_.template destroy(); } - - auto valuePtr() const noexcept { - auto deconstified = const_cast(this); - return deconstified->pair_.template as(); - } - - auto &value() noexcept { return *valuePtr(); } - const auto &value() const noexcept { return const_cast(this)->value(); } -}; - -template< - typename K, - typename MV, - size_t RequestedSize_, - int PSL_Bits, int HashBits, - typename Hash = std::hash, - typename KE = std::equal_to, - typename U = std::uint64_t, - typename Scatter = FibonacciScatter, - typename RangeReduce = LemireReduce, - typename HashReduce = TopHashReducer -> -struct RH_Frontend_WithSkarupkeTail { - using Backend = RH_Backend; - using MD = typename Backend::Metadata; - - constexpr static inline auto RequestedSize = RequestedSize_; - constexpr static inline auto LongestEncodablePSL = (1 << PSL_Bits); - constexpr static inline auto WithTail = - RequestedSize + - LongestEncodablePSL // the Skarupke tail - ; - constexpr static inline auto SWARCount = - ( - WithTail + - MD::NSlots - 1 // to calculate the ceiling rounding - ) / MD::NSlots - ; - constexpr static inline auto SlotCount = SWARCount * MD::NSlots; - constexpr static inline auto HighestSafePSL = - LongestEncodablePSL - MD::NSlots - 1; - - using MetadataCollection = std::array; - using value_type = std::pair; - - MetadataCollection md_; - /// \todo Scatter key and value in a flavor - std::array, SlotCount> values_; - size_t elementCount_; - - RH_Frontend_WithSkarupkeTail() noexcept: elementCount_(0) { - for(auto &mde: md_) { mde = MD{0}; } - } - - template - void traverse(Callable &&c) const { - for(size_t swarIndex = 0; swarIndex < SWARCount; ++swarIndex) { - auto PSLs = md_[swarIndex].PSLs(); - auto occupied = booleans(PSLs); - while(occupied) { - auto intraIndex = occupied.lsbIndex(); - c(swarIndex, intraIndex); - occupied = occupied.clearLSB(); - } - } - } - - ~RH_Frontend_WithSkarupkeTail() { - traverse([thy=this](std::size_t sI, std::size_t intra) { - thy->values_[intra + sI * MD::NSlots].destroy(); - }); - } - - RH_Frontend_WithSkarupkeTail(const RH_Frontend_WithSkarupkeTail &model): - RH_Frontend_WithSkarupkeTail() - { - model.traverse([thy=this,other=&model](std::size_t sI, std::size_t intra) { - auto index = intra + sI * MD::NSlots; - thy->values_[index].build(other->values_[index].value()); - thy->md_[sI] = thy->md_[sI].blitElement(other->md_[sI], intra); - ++thy->elementCount_; - }); - } - - RH_Frontend_WithSkarupkeTail(RH_Frontend_WithSkarupkeTail &&donor) noexcept: - md_(donor.md_), elementCount_(donor.elementCount_) - { - traverse([thy=this, other=&donor](std::size_t sI, std::size_t intra) { - auto index = intra + sI * MD::NSlots; - thy->values_[index].build(std::move(other->values_[index].value())); - }); - } - - - auto findParameters(const K &k) const noexcept { - auto [hoisted, homeIndex] = - findBasicParameters< - K, RequestedSize, HashBits, U, - Hash, Scatter, RangeReduce, HashReduce - >(k); - return - std::tuple{ - hoisted, - homeIndex, - [thy = this, &k](size_t ndx) noexcept { - return KE{}(thy->values_[ndx].value().first, k); - } - }; - } - - template - auto insert(ValuteTypeCompatible &&val) { - auto &k = val.first; - auto &mv = val.second; - auto [hoistedT, homeIndexT, kc] = findParameters(k); - auto hoisted = hoistedT; - auto homeIndex = homeIndexT; - auto thy = const_cast(this); - Backend be{thy->md_.data()}; - auto [iT, deadlineT, needleT] = - be.findMisaligned_assumesSkarupkeTail(hoisted, homeIndex, kc); - auto index = iT; - if(HighestSafePSL < index - homeIndex) { - throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, from finding"); - } - auto deadline = deadlineT; - if(!deadline) { return std::pair{iterator(values_.data() + index), false}; } - auto needle = needleT; - auto rv = - insertionEvictionChain( - index, deadline, needle, - std::forward(val) - ); - ++elementCount_; - return rv; - } - - // Do the chain of relocations - // From this point onward, the hashes don't matter except for the - // updates to the metadata, the relocations - template - auto insertionEvictionChain( - std::size_t index, - U deadline, - MD needle, - VTC &&val - ) { - auto &k = val.first; - auto &mv = val.second; - auto swarIndex = index / MD::Lanes; - auto intraIndex = index % MD::Lanes; - auto mdp = this->md_.data() + swarIndex; - - // Because we have not decided about strong versus basic exception - // safety guarantee, for the time being we will just put a very large - // number here. - constexpr auto MaxRelocations = 150000; - std::array relocations; - std::array newElements; - auto relocationsCount = 0; - auto elementToInsert = needle.at(intraIndex); - - // The very last element in the metadata will always have a psl of 0 - // this serves as a sentinel for insertions, the only place to make - // sure the table has not been exhausted is an eviction chain that - // ends in the sentinel - // Also, the encoding for the PSL may be exhausted - for(;;) { - // Loop invariant: - // deadline, index, swarIndex, intraIndex, elementToInsert correct - // mdp points to the haystack that gave the deadline - auto md = *mdp; - auto evictedPSL = md.PSLs().at(intraIndex); - if(0 == evictedPSL) { // end of eviction chain! - if(SlotCount - 1 <= index) { - throw MaximumProbeSequenceLengthExceeded("full table"); - } - if(0 == relocationsCount) { // direct build of a new value - values_[index].build( - std::piecewise_construct, - std::tuple(std::forward(val).first), - std::tuple(std::forward(val).second) - ); - *mdp = mdp->blitElement(intraIndex, elementToInsert); - return std::pair{iterator(values_.data() + index), true}; - } - // the last element is special because it is a - // move-construction, not a move-assignment - --relocationsCount; - auto fromIndex = relocations[relocationsCount]; - values_[index].build( - std::move(values_[fromIndex].value()) - ); - md_[swarIndex] = - md_[swarIndex].blitElement( - intraIndex, elementToInsert - ); - elementToInsert = newElements[relocationsCount]; - index = fromIndex; - swarIndex = index / MD::NSlots; - intraIndex = index % MD::NSlots; - // do the pair relocations - while(relocationsCount--) { - fromIndex = relocations[relocationsCount]; - values_[index].value() = - std::move(values_[fromIndex].value()); - md_[swarIndex] = - md_[swarIndex].blitElement(intraIndex, elementToInsert); - elementToInsert = newElements[relocationsCount]; - index = fromIndex; - swarIndex = index / MD::NSlots; - intraIndex = index % MD::NSlots; - } - values_[index].value() = std::forward(val); - md_[swarIndex] = - md_[swarIndex].blitElement(intraIndex, elementToInsert); - return std::pair{iterator(values_.data() + index), true}; - } - if(HighestSafePSL < evictedPSL) { - throw MaximumProbeSequenceLengthExceeded("Encoding insertion"); - } - - // evict the "deadline" element: - // first, insert the element in its place (it "stole") - // find the place for the evicted: when Robin Hood breaks again. - - // for this search, we need to make a search needle with only - // the PSL being evicted. - - // "push" the index of the element that will be evicted - relocations[relocationsCount] = index; - // we have a place for the element being inserted, at this index - newElements[relocationsCount++] = elementToInsert; - if(MaxRelocations <= relocationsCount) { - throw RelocationStackExhausted("Relocation Stack"); - } - - // now the insertion will be for the old metadata entry - elementToInsert = md.hashes().at(intraIndex); - - // now, where should the evicted element go to? - // assemble a new needle - - // Constants relevant for the rest - constexpr auto Ones = meta::BitmaskMaker::value; - // | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | - constexpr auto ProgressionFromOne = MD(Ones * Ones); - // | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | - constexpr auto ProgressionFromZero = - MD(ProgressionFromOne - MD(Ones)); - // | 0 | 1 | 2 | 3 | ... | 7 | - constexpr auto BroadcastSWAR_ElementCount = - MD(meta::BitmaskMaker::value); - // | 8 | 8 | 8 | 8 | ... | 8 | - constexpr auto SWARIterationAddendumBase = - ProgressionFromZero + BroadcastSWAR_ElementCount; - // | 8 | 9 | ... | 15 | - - auto broadcastedEvictedPSL = broadcast(MD(evictedPSL)); - auto evictedPSLWithProgressionFromZero = - broadcastedEvictedPSL + ProgressionFromZero; - // | ePSL+0 | ePSL+1 | ePSL+2 | ePSL+3 | ... | ePSL+7 | - auto needlePSLs = - evictedPSLWithProgressionFromZero.shiftLanesLeft(intraIndex); - // zeroes make the new needle - // "richer" in all elements lower than the deadline - // because of the progression starts with 0 - // the "deadline" element will have equal PSL, not - // "poorer". - // assuming the deadline happened in the index 2: - // needlePSLs = | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | - // find the place for the new needle, without checking the keys. - auto haystackPSLs = md.PSLs(); - // haystack < needle => !(haystack >= needle) - auto breaksRobinHood = - not greaterEqual_MSB_off(haystackPSLs, needlePSLs); - if(!bool(breaksRobinHood)) { - // no place for the evicted element found in this swar. - // increment the PSLs in the needle to check the next haystack - - // for the next swar, we will want (continuing the assumption - // of the deadline happening at index 2) - // old needle: - // | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | - // desired new needle PSLs: - // | ePSL+6 | ePSL+7 | ePSL+8 | ePSL+9 | ... | ePSL+13 | - // from evictedPSLWithProgressionFromZero, - // shift "right" NLanes - intraIndex (keep the last two lanes): - // | ePSL+6 | ePSL+7 | 0 | ... | 0 | - auto lowerPart = - evictedPSLWithProgressionFromZero. - shiftLanesRight(MD::Lanes - intraIndex - 1). - shiftLanesRight(1); - // the other part, of +8 onwards, is BroadcastElementCount, - // shifted: - // | 8 | 8 | 8 | 8 | ... | 8 | - // shifted two lanes: - // | 0 | 0 | 8 | 8 | ... | 8 | - // - auto topAdd = - BroadcastSWAR_ElementCount.shiftLanesLeft(intraIndex); - needlePSLs = needlePSLs + lowerPart + topAdd; - for(;;) { // hunt for the next deadline - ++swarIndex; - // should the maintenance of `index` be replaced - // with pointer arithmetic on mdp? - index += MD::NSlots; - ++mdp; - haystackPSLs = mdp->PSLs(); - breaksRobinHood = - not greaterEqual_MSB_off(haystackPSLs, needlePSLs); - if(breaksRobinHood) { break; } - evictedPSL += MD::NSlots; - if(HighestSafePSL < evictedPSL) { - throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, insertion"); - } - needlePSLs = needlePSLs + BroadcastSWAR_ElementCount; - } - } - deadline = swar::isolateLSB(breaksRobinHood.value()); - intraIndex = breaksRobinHood.lsbIndex(); - index = swarIndex * MD::NSlots + intraIndex; - elementToInsert = elementToInsert | needlePSLs.at(intraIndex); - } - } - - struct const_iterator { - const KeyValuePairWrapper *p_; - - // note: ++ not yet implemented, we can't iterate ;-) - - const value_type &operator*() noexcept { return p_->value(); } - const value_type *operator->() noexcept { return &p_->value(); } - - bool operator==(const const_iterator &other) const noexcept { - return p_ == other.p_; - } - - bool operator!=(const const_iterator &other) const noexcept { - return p_ == other.p_; - } - - const_iterator(const KeyValuePairWrapper *p): p_(p) {} - const_iterator(const const_iterator &) = default; - }; - - struct iterator: const_iterator { - value_type *ncp() { return const_cast(&this->p_->value()); } - value_type &operator*() noexcept { return *ncp(); } - value_type *operator->() noexcept { return ncp(); } - using const_iterator::const_iterator; - }; - - const_iterator begin() const noexcept { return this->values_.data(); } - const_iterator end() const noexcept { - return this->values_.data() + this->values_.size(); - } - - inline iterator find(const K &k) noexcept __attribute__((always_inline)); - - const_iterator find(const K &k) const noexcept { - const_cast(this)->find(k); - } - - auto displacement(const_iterator from, const_iterator to) { - return to.p_ - from.p_; - } -}; - -template< - typename K, - typename MV, - size_t RequestedSize_, - int PSL_Bits, int HashBits, - typename Hash, - typename KE, - typename U, - typename Scatter, - typename RangeReduce, - typename HashReduce -> -auto -RH_Frontend_WithSkarupkeTail< - K, MV, RequestedSize_, PSL_Bits, HashBits, Hash, KE, U, Scatter, - RangeReduce, HashReduce ->::find(const K &k) noexcept -> iterator -{ - auto [hoisted, homeIndex, keyChecker] = findParameters(k); - Backend be{this->md_.data()}; - auto [index, deadline, dontcare] = - be.findMisaligned_assumesSkarupkeTail( - hoisted, homeIndex, keyChecker - ); - return deadline ? values_.end() : values_.data() + index; - } - -} // rh - -} // swar, zoo - -#endif +#ifndef ZOO_ROBINHOOD_H +#define ZOO_ROBINHOOD_H + +#include "zoo/map/RobinHoodUtil.h" +#include "zoo/AlignedStorage.h" + +#ifndef ZOO_CONFIG_DEEP_ASSERTIONS + #define ZOO_CONFIG_DEEP_ASSERTIONS 0 +#endif + +#include +#include +#include +#include + +#if ZOO_CONFIG_DEEP_ASSERTIONS + #include +#endif + +namespace zoo { +namespace rh { + +struct RobinHoodException: std::runtime_error { + using std::runtime_error::runtime_error; +}; + +struct MaximumProbeSequenceLengthExceeded: RobinHoodException { + using RobinHoodException::RobinHoodException; +}; +struct RelocationStackExhausted: RobinHoodException { + using RobinHoodException::RobinHoodException; +}; + +template +struct RH_Backend { + using Metadata = impl::Metadata; + + constexpr static inline auto Width = Metadata::NBits; + Metadata *md_; + + /*! \brief SWAR check for a potential match + The invariant in Robin Hood is that the element being looked for, the "needle", is "richer" + than the elements already present, the "haystack". + "Richer" means that the PSL is smaller. + A PSL of 0 can only happen in the haystack, to indicate the slot is empty, this is "richest". + The first time the needle has a PSL greater than the haystacks' means the matching will fail, + because the hypothetical prior insertion would have "stolen" that slot. + If there is an equal, it would start a sequence of potential matches. To determine an actual match: + 1. A cheap SWAR check of hoisted hashes + 2. If there are still potential matches (now also the hoisted hashes), fall back to non-SWAR, + or iterative and expensive "deep equality" test for each potential match, outside of this function + + The above makes it very important to detect the first case in which the PSL is greater equal to the needle. + We call this the "deadline". + Because we assume the LITTLE ENDIAN byte ordering, the first element would be the least significant + non-false Boolean SWAR. + + Note about performance: + Every "early exit" faces a big justification hurdle, the proportion of cases + they intercept to be large enough that the branch prediction penalty of the entropy introduced is + overcompensated. + */ + + /// Boolean SWAR true in the first element/lane of the needle strictly poorer than its corresponding + /// haystack + constexpr static auto + firstInvariantBreakage(Metadata needle, Metadata haystack) { + auto nPSL = needle.PSLs(); + auto hPSL = haystack.PSLs(); + auto theyKeepInvariant = + greaterEqual_MSB_off(hPSL, nPSL); + // BTW, the reason to have encoded the PSLs in the least + // significant bits is to be able to call the cheaper version + // _MSB_off here + + auto theyBreakInvariant = not theyKeepInvariant; + // because we make the assumption of LITTLE ENDIAN byte ordering, + // we're interested in the elements up to the first haystack-richer + auto firstBreakage = swar::isolateLSB(theyBreakInvariant.value()); + return firstBreakage; + } + + // This should be more generic: if PSLs breach a broadcast PSL, saturate + // This should be more generic: if a SWAR breaches a SWAR condition, saturate. + constexpr static auto + needlePSLSaturation(Metadata nPSL) { + // create a saturator for max PSL. If any needle saturates, all later PSLs will be set to saturated. + constexpr auto saturatedPSL = broadcast(Metadata(Metadata::MaxPSL)); + //auto nPSL = needle.PSLs(); + auto saturation = greaterEqual_MSB_off(nPSL, saturatedPSL); + auto invertSatMask = ((swar::isolateLSB(saturation.value()) - 1) ); + auto satMask = (~(swar::isolateLSB(saturation.value()) - 1) ); + //if (not bool(saturation)) return std::tuple{nPSL, false}; + // Least sig lane is saturated, all more sig must be made saturated. + auto needlePSLsToSaturate = Metadata{satMask & saturatedPSL.value()}; + // addition might have overflown nPSL before entering function + return std::tuple{Metadata{nPSL.PSLs() | needlePSLsToSaturate}, bool(saturation)}; // saturated at any point, last swar to check. + } + + constexpr static impl::MatchResult + potentialMatches( + Metadata needle, Metadata haystack + ) noexcept { + // We need to determine if there are potential matches to consider + auto sames = equals(needle, haystack); + auto deadline = firstInvariantBreakage(needle, haystack); + // In a valid haystack, the PSLs can grow at most by 1 per entry. + // If a PSL is richer than the needle in any place, because the + // needle, by construction, always grows at least by 1 per entry, + // then the PSL won't be equal again. + // There is no need to filter potential matches using the deadline + // as previous versions of the code did. + return { + deadline, + sames + }; + } + + /*! \brief converts the given starting PSL and reduced hash code into a SWAR-ready needle + + The given needle would have a PSL as the starting (PSL + 1) in the first slot, the "+ 1" is because + the count starts at 1, in this way, a haystack PSL of 0 is always "richer" + */ + constexpr static auto makeNeedle(U startingPSL, U hoistedHash) { + constexpr auto Ones = meta::BitmaskMaker::value; + constexpr auto Progression = Ones * Ones; + auto core = startingPSL | (hoistedHash << PSL_Bits); + auto broadcasted = broadcast(Metadata(core)); + auto startingPSLmadePotentialPSLs = Metadata(Progression) + broadcasted; + return startingPSLmadePotentialPSLs; + } + + template + inline constexpr + std::tuple + findMisaligned_assumesSkarupkeTail( + U hoistedHash, int homeIndex, const KeyComparer &kc + ) const noexcept __attribute__((always_inline)); +}; + +template +template +inline constexpr +std::tuple::Metadata> +RH_Backend::findMisaligned_assumesSkarupkeTail( + U hoistedHash, int homeIndex, const KeyComparer &kc + ) const noexcept { + auto misalignment = homeIndex % Metadata::NSlots; + auto baseIndex = homeIndex / Metadata::NSlots; + auto base = this->md_ + baseIndex; + + constexpr auto Ones = meta::BitmaskMaker::value; + constexpr auto Progression = Metadata{Ones * Ones}; + constexpr auto AllNSlots = + Metadata{meta::BitmaskMaker::value}; + MisalignedGenerator_Dynamic p(base, int(Metadata::NBits * misalignment)); + auto index = homeIndex; + auto needle = makeNeedle(0, hoistedHash); + + for(;;) { + auto hay = *p; + auto result = potentialMatches(needle, hay); + auto positives = result.potentialMatches; + while(positives.value()) { + auto matchSubIndex = positives.lsbIndex(); + auto matchIndex = index + matchSubIndex; + // Possible specialist optimization to kick off all possible + // matches to an array (like chaining evict) and check them + // later. + if(kc(matchIndex)) { + return std::tuple(matchIndex, U(0), Metadata(0)); + } + positives = Metadata{swar::clearLSB(positives.value())}; + } + auto deadline = result.deadline; + if(deadline) { + // The deadline is relative to the misalignment. + // To build an absolute deadline, there are two cases: + // the bit falls in the first SWAR or the second SWAR. + // The same applies for needle. + // in general, for example a misaglignment of 6: + // { . | . | . | . | . | . | . | .}{ . | . | . | . | . | . | . | . } + // { a | b | c | d | e | f | g | h } + // shift left (to higher bits) by the misalignment + // { 0 | 0 | 0 | 0 | 0 | 0 | a | b } + // shift right (to lower bits) by NSlots - misalignment: + // { c | d | e | f | g | h | 0 | 0 } + // One might hope undefined behavior might be reasonable (zero + // result, unchanged result), but ARM proves that undefined + // behavior is indeed undefined, so we do our right shift as a + // double: shift by n-1, then shift by 1. + auto mdd = Metadata{deadline}; + auto toAbsolute = [](auto v, auto ma) { + auto shiftedLeft = v.shiftLanesLeft(ma); + auto shiftedRight = + v.shiftLanesRight(Metadata::NSlots - ma - 1).shiftLanesRight(1); + return Metadata{shiftedLeft | shiftedRight}; + }; + auto position = index + Metadata{deadline}.lsbIndex(); + return + std::tuple( + position, + toAbsolute(mdd, misalignment).value(), + toAbsolute(needle, misalignment) + ); + } + // Skarupke's tail allows us to not have to worry about the end + // of the metadata + ++p; + index += Metadata::NSlots; + needle = needle + AllNSlots; + } + } + +template +struct KeyValuePairWrapper { + using type = std::pair; + AlignedStorageFor pair_; + + template + void build(Initializers &&...izers) + noexcept(noexcept(pair_.template build(std::forward(izers)...))) + { + pair_.template build(std::forward(izers)...); + } + + template + KeyValuePairWrapper &operator=(RHS &&rhs) + noexcept(noexcept(std::declval() = std::forward(rhs))) + { + *pair_.template as() = std::forward(rhs); + return *this; + } + + void destroy() noexcept { pair_.template destroy(); } + + auto valuePtr() const noexcept { + auto deconstified = const_cast(this); + return deconstified->pair_.template as(); + } + + auto &value() noexcept { return *valuePtr(); } + const auto &value() const noexcept { return const_cast(this)->value(); } +}; + +template< + typename K, + typename MV, + size_t RequestedSize_, + int PSL_Bits, int HashBits, + typename Hash = std::hash, + typename KE = std::equal_to, + typename U = std::uint64_t, + typename Scatter = FibonacciScatter, + typename RangeReduce = LemireReduce, + typename HashReduce = TopHashReducer +> +struct RH_Frontend_WithSkarupkeTail { + using Backend = RH_Backend; + using MD = typename Backend::Metadata; + + constexpr static inline auto RequestedSize = RequestedSize_; + constexpr static inline auto LongestEncodablePSL = (1 << PSL_Bits); + constexpr static inline auto WithTail = + RequestedSize + + LongestEncodablePSL // the Skarupke tail + ; + constexpr static inline auto SWARCount = + ( + WithTail + + MD::NSlots - 1 // to calculate the ceiling rounding + ) / MD::NSlots + ; + constexpr static inline auto SlotCount = SWARCount * MD::NSlots; + constexpr static inline auto HighestSafePSL = + LongestEncodablePSL - MD::NSlots - 1; + + using MetadataCollection = std::array; + using value_type = std::pair; + + MetadataCollection md_; + /// \todo Scatter key and value in a flavor + std::array, SlotCount> values_; + size_t elementCount_; + + RH_Frontend_WithSkarupkeTail() noexcept: elementCount_(0) { + for(auto &mde: md_) { mde = MD{0}; } + } + + template + void traverse(Callable &&c) const { + for(size_t swarIndex = 0; swarIndex < SWARCount; ++swarIndex) { + auto PSLs = md_[swarIndex].PSLs(); + auto occupied = booleans(PSLs); + while(occupied) { + auto intraIndex = occupied.lsbIndex(); + c(swarIndex, intraIndex); + occupied = occupied.clearLSB(); + } + } + } + + ~RH_Frontend_WithSkarupkeTail() { + traverse([thy=this](std::size_t sI, std::size_t intra) { + thy->values_[intra + sI * MD::NSlots].destroy(); + }); + } + + RH_Frontend_WithSkarupkeTail(const RH_Frontend_WithSkarupkeTail &model): + RH_Frontend_WithSkarupkeTail() + { + model.traverse([thy=this,other=&model](std::size_t sI, std::size_t intra) { + auto index = intra + sI * MD::NSlots; + thy->values_[index].build(other->values_[index].value()); + thy->md_[sI] = thy->md_[sI].blitElement(other->md_[sI], intra); + ++thy->elementCount_; + }); + } + + RH_Frontend_WithSkarupkeTail(RH_Frontend_WithSkarupkeTail &&donor) noexcept: + md_(donor.md_), elementCount_(donor.elementCount_) + { + traverse([thy=this, other=&donor](std::size_t sI, std::size_t intra) { + auto index = intra + sI * MD::NSlots; + thy->values_[index].build(std::move(other->values_[index].value())); + }); + } + + + auto findParameters(const K &k) const noexcept { + auto [hoisted, homeIndex] = + findBasicParameters< + K, RequestedSize, HashBits, U, + Hash, Scatter, RangeReduce, HashReduce + >(k); + return + std::tuple{ + hoisted, + homeIndex, + [thy = this, &k](size_t ndx) noexcept { + return KE{}(thy->values_[ndx].value().first, k); + } + }; + } + + template + auto insert(ValuteTypeCompatible &&val) { + auto &k = val.first; + auto &mv = val.second; + auto [hoistedT, homeIndexT, kc] = findParameters(k); + auto hoisted = hoistedT; + auto homeIndex = homeIndexT; + auto thy = const_cast(this); + Backend be{thy->md_.data()}; + auto [iT, deadlineT, needleT] = + be.findMisaligned_assumesSkarupkeTail(hoisted, homeIndex, kc); + auto index = iT; + if(HighestSafePSL < index - homeIndex) { + throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, from finding"); + } + auto deadline = deadlineT; + if(!deadline) { return std::pair{iterator(values_.data() + index), false}; } + auto needle = needleT; + auto rv = + insertionEvictionChain( + index, deadline, needle, + std::forward(val) + ); + ++elementCount_; + return rv; + } + + // Do the chain of relocations + // From this point onward, the hashes don't matter except for the + // updates to the metadata, the relocations + template + auto insertionEvictionChain( + std::size_t index, + U deadline, + MD needle, + VTC &&val + ) { + auto &k = val.first; + auto &mv = val.second; + auto swarIndex = index / MD::Lanes; + auto intraIndex = index % MD::Lanes; + auto mdp = this->md_.data() + swarIndex; + + // Because we have not decided about strong versus basic exception + // safety guarantee, for the time being we will just put a very large + // number here. + constexpr auto MaxRelocations = 150000; + std::array relocations; + std::array newElements; + auto relocationsCount = 0; + auto elementToInsert = needle.at(intraIndex); + + // The very last element in the metadata will always have a psl of 0 + // this serves as a sentinel for insertions, the only place to make + // sure the table has not been exhausted is an eviction chain that + // ends in the sentinel + // Also, the encoding for the PSL may be exhausted + for(;;) { + // Loop invariant: + // deadline, index, swarIndex, intraIndex, elementToInsert correct + // mdp points to the haystack that gave the deadline + auto md = *mdp; + auto evictedPSL = md.PSLs().at(intraIndex); + if(0 == evictedPSL) { // end of eviction chain! + if(SlotCount - 1 <= index) { + throw MaximumProbeSequenceLengthExceeded("full table"); + } + if(0 == relocationsCount) { // direct build of a new value + values_[index].build( + std::piecewise_construct, + std::tuple(std::forward(val).first), + std::tuple(std::forward(val).second) + ); + *mdp = mdp->blitElement(intraIndex, elementToInsert); + return std::pair{iterator(values_.data() + index), true}; + } + // the last element is special because it is a + // move-construction, not a move-assignment + --relocationsCount; + auto fromIndex = relocations[relocationsCount]; + values_[index].build( + std::move(values_[fromIndex].value()) + ); + md_[swarIndex] = + md_[swarIndex].blitElement( + intraIndex, elementToInsert + ); + elementToInsert = newElements[relocationsCount]; + index = fromIndex; + swarIndex = index / MD::NSlots; + intraIndex = index % MD::NSlots; + // do the pair relocations + while(relocationsCount--) { + fromIndex = relocations[relocationsCount]; + values_[index].value() = + std::move(values_[fromIndex].value()); + md_[swarIndex] = + md_[swarIndex].blitElement(intraIndex, elementToInsert); + elementToInsert = newElements[relocationsCount]; + index = fromIndex; + swarIndex = index / MD::NSlots; + intraIndex = index % MD::NSlots; + } + values_[index].value() = std::forward(val); + md_[swarIndex] = + md_[swarIndex].blitElement(intraIndex, elementToInsert); + return std::pair{iterator(values_.data() + index), true}; + } + if(HighestSafePSL < evictedPSL) { + throw MaximumProbeSequenceLengthExceeded("Encoding insertion"); + } + + // evict the "deadline" element: + // first, insert the element in its place (it "stole") + // find the place for the evicted: when Robin Hood breaks again. + + // for this search, we need to make a search needle with only + // the PSL being evicted. + + // "push" the index of the element that will be evicted + relocations[relocationsCount] = index; + // we have a place for the element being inserted, at this index + newElements[relocationsCount++] = elementToInsert; + if(MaxRelocations <= relocationsCount) { + throw RelocationStackExhausted("Relocation Stack"); + } + + // now the insertion will be for the old metadata entry + elementToInsert = md.hashes().at(intraIndex); + + // now, where should the evicted element go to? + // assemble a new needle + + // Constants relevant for the rest + constexpr auto Ones = meta::BitmaskMaker::value; + // | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | + constexpr auto ProgressionFromOne = MD(Ones * Ones); + // | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + constexpr auto ProgressionFromZero = + MD(ProgressionFromOne - MD(Ones)); + // | 0 | 1 | 2 | 3 | ... | 7 | + constexpr auto BroadcastSWAR_ElementCount = + MD(meta::BitmaskMaker::value); + // | 8 | 8 | 8 | 8 | ... | 8 | + constexpr auto SWARIterationAddendumBase = + ProgressionFromZero + BroadcastSWAR_ElementCount; + // | 8 | 9 | ... | 15 | + + auto broadcastedEvictedPSL = broadcast(MD(evictedPSL)); + auto evictedPSLWithProgressionFromZero = + broadcastedEvictedPSL + ProgressionFromZero; + // | ePSL+0 | ePSL+1 | ePSL+2 | ePSL+3 | ... | ePSL+7 | + auto needlePSLs = + evictedPSLWithProgressionFromZero.shiftLanesLeft(intraIndex); + // zeroes make the new needle + // "richer" in all elements lower than the deadline + // because of the progression starts with 0 + // the "deadline" element will have equal PSL, not + // "poorer". + // assuming the deadline happened in the index 2: + // needlePSLs = | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | + // find the place for the new needle, without checking the keys. + auto haystackPSLs = md.PSLs(); + // haystack < needle => !(haystack >= needle) + auto breaksRobinHood = + not greaterEqual_MSB_off(haystackPSLs, needlePSLs); + if(!bool(breaksRobinHood)) { + // no place for the evicted element found in this swar. + // increment the PSLs in the needle to check the next haystack + + // for the next swar, we will want (continuing the assumption + // of the deadline happening at index 2) + // old needle: + // | 0 | 0 | ePSL | ePSL+1 | ... | ePSL+5 | + // desired new needle PSLs: + // | ePSL+6 | ePSL+7 | ePSL+8 | ePSL+9 | ... | ePSL+13 | + // from evictedPSLWithProgressionFromZero, + // shift "right" NLanes - intraIndex (keep the last two lanes): + // | ePSL+6 | ePSL+7 | 0 | ... | 0 | + auto lowerPart = + evictedPSLWithProgressionFromZero. + shiftLanesRight(MD::Lanes - intraIndex - 1). + shiftLanesRight(1); + // the other part, of +8 onwards, is BroadcastElementCount, + // shifted: + // | 8 | 8 | 8 | 8 | ... | 8 | + // shifted two lanes: + // | 0 | 0 | 8 | 8 | ... | 8 | + // + auto topAdd = + BroadcastSWAR_ElementCount.shiftLanesLeft(intraIndex); + needlePSLs = needlePSLs + lowerPart + topAdd; + for(;;) { // hunt for the next deadline + ++swarIndex; + // should the maintenance of `index` be replaced + // with pointer arithmetic on mdp? + index += MD::NSlots; + ++mdp; + haystackPSLs = mdp->PSLs(); + breaksRobinHood = + not greaterEqual_MSB_off(haystackPSLs, needlePSLs); + if(breaksRobinHood) { break; } + evictedPSL += MD::NSlots; + if(HighestSafePSL < evictedPSL) { + throw MaximumProbeSequenceLengthExceeded("Scanning for eviction, insertion"); + } + needlePSLs = needlePSLs + BroadcastSWAR_ElementCount; + } + } + deadline = swar::isolateLSB(breaksRobinHood.value()); + intraIndex = breaksRobinHood.lsbIndex(); + index = swarIndex * MD::NSlots + intraIndex; + elementToInsert = elementToInsert | needlePSLs.at(intraIndex); + } + } + + struct const_iterator { + const KeyValuePairWrapper *p_; + + // note: ++ not yet implemented, we can't iterate ;-) + + const value_type &operator*() noexcept { return p_->value(); } + const value_type *operator->() noexcept { return &p_->value(); } + + bool operator==(const const_iterator &other) const noexcept { + return p_ == other.p_; + } + + bool operator!=(const const_iterator &other) const noexcept { + return p_ == other.p_; + } + + const_iterator(const KeyValuePairWrapper *p): p_(p) {} + const_iterator(const const_iterator &) = default; + }; + + struct iterator: const_iterator { + value_type *ncp() { return const_cast(&this->p_->value()); } + value_type &operator*() noexcept { return *ncp(); } + value_type *operator->() noexcept { return ncp(); } + using const_iterator::const_iterator; + }; + + const_iterator begin() const noexcept { return this->values_.data(); } + const_iterator end() const noexcept { + return this->values_.data() + this->values_.size(); + } + + inline iterator find(const K &k) noexcept __attribute__((always_inline)); + + const_iterator find(const K &k) const noexcept { + const_cast(this)->find(k); + } + + auto displacement(const_iterator from, const_iterator to) { + return to.p_ - from.p_; + } +}; + +template< + typename K, + typename MV, + size_t RequestedSize_, + int PSL_Bits, int HashBits, + typename Hash, + typename KE, + typename U, + typename Scatter, + typename RangeReduce, + typename HashReduce +> +auto +RH_Frontend_WithSkarupkeTail< + K, MV, RequestedSize_, PSL_Bits, HashBits, Hash, KE, U, Scatter, + RangeReduce, HashReduce +>::find(const K &k) noexcept -> iterator +{ + auto [hoisted, homeIndex, keyChecker] = findParameters(k); + Backend be{this->md_.data()}; + auto [index, deadline, dontcare] = + be.findMisaligned_assumesSkarupkeTail( + hoisted, homeIndex, keyChecker + ); + return deadline ? values_.end() : values_.data() + index; + } + +} // rh + +} // swar, zoo + +#endif diff --git a/inc/zoo/map/RobinHoodAlt.h b/inc/zoo/map/RobinHoodAlt.h index df869e4b..5f8fa484 100644 --- a/inc/zoo/map/RobinHoodAlt.h +++ b/inc/zoo/map/RobinHoodAlt.h @@ -1,194 +1,194 @@ - -#pragma once - -#include "zoo/swar/SWAR.h" - -#include -#include - -namespace zoo { - -namespace rh { - -using u64 = uint64_t; -using u32 = uint32_t; -using u16 = uint16_t; -using u8 = uint8_t; - -template struct SlotOperations { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - static constexpr inline auto SlotOnes = - meta::BitmaskMaker::value; - // for 64 bit size, 8 bit meta, 0x0807'0605'0403'0201ull - static constexpr inline auto PSLProgression = SlotOnes * SlotOnes; - static constexpr T allOnes = - meta::BitmaskMaker::value; - - static constexpr auto needlePSL(T currentPSL) { - return broadcast(SM{currentPSL}) + SM{PSLProgression}; - } - - // At the position the needle psl exceeds the haystack psl, a match becomes - // impossible. Only elements _before_ the exceeding element can match. - // pslNeedle must be PSLProgression + startingPSLValue - static constexpr auto deadline(SM pslHaystack, SM pslNeedle) { - // We must ensure the MSBs of the psl blocks are off. Since we store - // PSLs in a swar with sublanes in the least bits, this guarantee - // holds. - auto satisfied = greaterEqual_MSB_off(pslHaystack, pslNeedle); - auto broken = not satisfied; - return swar::isolateLSB(broken.value()); - } - - // Has no intrinsic binding to the metadata, just easier to write with - // using decls. - static constexpr auto attemptMatch( - SM haystack, SM needleHashes, SM needlePSL) { - const auto haystackPSL = SM{SSL::LeastMask.value() & haystack.value()}; - const auto d = deadline(haystackPSL, needlePSL); // breaks abstraction - const auto needle = needleHashes | needlePSL; - - const auto matches = equals(haystack, needle); - const auto searchEnds = d ? 1 : 0; - // Returned value is MSB boolean array with 'finality' bit on at - // position 0. Breaks if PSLs are width 1 (but so does everything else) - return SM{searchEnds | ((d - 1) & matches.value())}; - } -}; - -/// Slot metadata provides the affordances of -/// 'attempt to match this needle of hashes and PSLs' -/// Contains sizeof(T)/(NBitsHash,NBitsPSL) hashes and PSLs -template struct SlotMetadata { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - // PSL are stored in least sig bits, Hashes are stored in most sig bits. - // This allows us to do fast greaterequal checks for PSLs (with the hashes - // blitted out) and fast equality checks for hash bits (as equality checks - // do not need carry bits. - SSL data_; - - constexpr auto PSLs() const noexcept{ - return data_.least(); - } - - constexpr auto Hashes() const noexcept { - return data_.most(); - } - - constexpr auto attemptMatch(SM needleHashes, SM needlePSL) { - return SlotOperations::attemptMatch( - data_, needleHashes, needlePSL); - } -}; - -/// BlockProvider provides affordances of -/// 'give me the slot metadata that contains position p' -/// 'set slot metadata at index that contains position p' -/// 'set key at position p' -/// 'check this concrete key against position p' -template struct SlotSplitKeys { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - - std::array keys_; - std::array metadata_; - - Key keyAt(int pos) const { - return keys_[pos]; - } - void setKey(int pos, Key k) { - keys_[pos] = k; - } - constexpr int posToSlot(int pos) const { return pos/SM::Lanes; } - void setSlotAt(int idx, T v) { - metadata_[idx] = v; - } - - // Track to avoid second divide? - SSL slotAt(int pos) const { return metadata_[posToSlot(pos)]; } - bool keyCheck(int major, int minor, Key k) const { return k == keyAt(major * SM::Lanes + minor); } - bool keyCheck(int pos, Key k) const { return k == keyAt(pos); } -}; - -/// RobinHood tables provide affordances of -/// 'lookup this key' -/// 'insert this key' -/// 'delete this key' -/// Split locale of key value when both are present. -/// Position is major + minor - -template -struct Hasher { - Key operator()(Key k) { return k; } -}; - -template> -struct RH { - using SSL = swar::SWARWithSubLanes; - using SM = swar::SWAR; - using BoolSM = swar::BooleanSWAR; - Meta m_; - Hash h_; - - bool exists(Key k) { - return findSlot(k).second; - } - - struct MajorMinorInserted { - int major; - int minor; - bool inserted; - }; - - /// pos, hash - std::pair twoNumbers(Key k) { - auto hash = h_(k); - const u32 pos = mapToSlotLemireReduction(fibonacciIndexModulo(hash)); - const T thinhash = badMixer (hash); - return {pos, thinhash}; - } - - std::pair twoNumbersBad(Key k) { - return {1, 1}; - } - - /// Find slot can mean 'psl too short/no slot', 'found and in slot', 'not found but (richer|empty) slot' - /// Currently bug: 'psl too short/no slot' not handled correctly. - /// Returns major/minor to attempt to avoid divisions. - template - MajorMinorInserted findSlot(Key k, KeyCheck kc) { - const auto twoNum = twoNumbersBad(k); - const auto pos = twoNum.first; - const auto hash = twoNum.second; - const auto major = m_.posToSlot(pos); - const auto haystack = m_.slotAt(pos); - // PSL is off by one territory - constexpr auto exactlyOne = SM{1}; - auto minor = 0; - while(true) { - const auto matchResult = SlotOperations::matchAttempt(haystack, hash, 0); - auto finished = exactlyOne & matchResult; - auto matches = exactlyOne & ~matchResult; - while (matches) { - auto minor = matches.lsbIndex(); // Lane offset - if (m_.keyCheck(major + minor, k)) { - return {major, minor, true}; // 'found and in slot' - } - matches = SM{matches.clearLSB()}; - } - // minor points at slot that the key currently fits in. - if (finished) { - return {major, minor, false}; // 'not found, richer or empty slot' - } - } - } -}; - - -} // namespace rh -} // namespace zoo + +#pragma once + +#include "zoo/swar/SWAR.h" + +#include +#include + +namespace zoo { + +namespace rh { + +using u64 = uint64_t; +using u32 = uint32_t; +using u16 = uint16_t; +using u8 = uint8_t; + +template struct SlotOperations { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + static constexpr inline auto SlotOnes = + meta::BitmaskMaker::value; + // for 64 bit size, 8 bit meta, 0x0807'0605'0403'0201ull + static constexpr inline auto PSLProgression = SlotOnes * SlotOnes; + static constexpr T allOnes = + meta::BitmaskMaker::value; + + static constexpr auto needlePSL(T currentPSL) { + return broadcast(SM{currentPSL}) + SM{PSLProgression}; + } + + // At the position the needle psl exceeds the haystack psl, a match becomes + // impossible. Only elements _before_ the exceeding element can match. + // pslNeedle must be PSLProgression + startingPSLValue + static constexpr auto deadline(SM pslHaystack, SM pslNeedle) { + // We must ensure the MSBs of the psl blocks are off. Since we store + // PSLs in a swar with sublanes in the least bits, this guarantee + // holds. + auto satisfied = greaterEqual_MSB_off(pslHaystack, pslNeedle); + auto broken = not satisfied; + return swar::isolateLSB(broken.value()); + } + + // Has no intrinsic binding to the metadata, just easier to write with + // using decls. + static constexpr auto attemptMatch( + SM haystack, SM needleHashes, SM needlePSL) { + const auto haystackPSL = SM{SSL::LeastMask.value() & haystack.value()}; + const auto d = deadline(haystackPSL, needlePSL); // breaks abstraction + const auto needle = needleHashes | needlePSL; + + const auto matches = equals(haystack, needle); + const auto searchEnds = d ? 1 : 0; + // Returned value is MSB boolean array with 'finality' bit on at + // position 0. Breaks if PSLs are width 1 (but so does everything else) + return SM{searchEnds | ((d - 1) & matches.value())}; + } +}; + +/// Slot metadata provides the affordances of +/// 'attempt to match this needle of hashes and PSLs' +/// Contains sizeof(T)/(NBitsHash,NBitsPSL) hashes and PSLs +template struct SlotMetadata { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + // PSL are stored in least sig bits, Hashes are stored in most sig bits. + // This allows us to do fast greaterequal checks for PSLs (with the hashes + // blitted out) and fast equality checks for hash bits (as equality checks + // do not need carry bits. + SSL data_; + + constexpr auto PSLs() const noexcept{ + return data_.least(); + } + + constexpr auto Hashes() const noexcept { + return data_.most(); + } + + constexpr auto attemptMatch(SM needleHashes, SM needlePSL) { + return SlotOperations::attemptMatch( + data_, needleHashes, needlePSL); + } +}; + +/// BlockProvider provides affordances of +/// 'give me the slot metadata that contains position p' +/// 'set slot metadata at index that contains position p' +/// 'set key at position p' +/// 'check this concrete key against position p' +template struct SlotSplitKeys { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + + std::array keys_; + std::array metadata_; + + Key keyAt(int pos) const { + return keys_[pos]; + } + void setKey(int pos, Key k) { + keys_[pos] = k; + } + constexpr int posToSlot(int pos) const { return pos/SM::Lanes; } + void setSlotAt(int idx, T v) { + metadata_[idx] = v; + } + + // Track to avoid second divide? + SSL slotAt(int pos) const { return metadata_[posToSlot(pos)]; } + bool keyCheck(int major, int minor, Key k) const { return k == keyAt(major * SM::Lanes + minor); } + bool keyCheck(int pos, Key k) const { return k == keyAt(pos); } +}; + +/// RobinHood tables provide affordances of +/// 'lookup this key' +/// 'insert this key' +/// 'delete this key' +/// Split locale of key value when both are present. +/// Position is major + minor + +template +struct Hasher { + Key operator()(Key k) { return k; } +}; + +template> +struct RH { + using SSL = swar::SWARWithSubLanes; + using SM = swar::SWAR; + using BoolSM = swar::BooleanSWAR; + Meta m_; + Hash h_; + + bool exists(Key k) { + return findSlot(k).second; + } + + struct MajorMinorInserted { + int major; + int minor; + bool inserted; + }; + + /// pos, hash + std::pair twoNumbers(Key k) { + auto hash = h_(k); + const u32 pos = mapToSlotLemireReduction(fibonacciIndexModulo(hash)); + const T thinhash = badMixer (hash); + return {pos, thinhash}; + } + + std::pair twoNumbersBad(Key k) { + return {1, 1}; + } + + /// Find slot can mean 'psl too short/no slot', 'found and in slot', 'not found but (richer|empty) slot' + /// Currently bug: 'psl too short/no slot' not handled correctly. + /// Returns major/minor to attempt to avoid divisions. + template + MajorMinorInserted findSlot(Key k, KeyCheck kc) { + const auto twoNum = twoNumbersBad(k); + const auto pos = twoNum.first; + const auto hash = twoNum.second; + const auto major = m_.posToSlot(pos); + const auto haystack = m_.slotAt(pos); + // PSL is off by one territory + constexpr auto exactlyOne = SM{1}; + auto minor = 0; + while(true) { + const auto matchResult = SlotOperations::matchAttempt(haystack, hash, 0); + auto finished = exactlyOne & matchResult; + auto matches = exactlyOne & ~matchResult; + while (matches) { + auto minor = matches.lsbIndex(); // Lane offset + if (m_.keyCheck(major + minor, k)) { + return {major, minor, true}; // 'found and in slot' + } + matches = SM{matches.clearLSB()}; + } + // minor points at slot that the key currently fits in. + if (finished) { + return {major, minor, false}; // 'not found, richer or empty slot' + } + } + } +}; + + +} // namespace rh +} // namespace zoo diff --git a/inc/zoo/meta/log.h b/inc/zoo/meta/log.h index 84011561..cae6d2f1 100644 --- a/inc/zoo/meta/log.h +++ b/inc/zoo/meta/log.h @@ -19,6 +19,8 @@ constexpr int logFloor_WithoutIntrinsic(T value) { } #ifdef _MSC_VER +// change to use the relevant functions in C++ 20's header +// when bumping to C++ 20 constexpr int logFloor(uint64_t arg) { return logFloor_WithoutIntrinsic(arg); } diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index d6872632..9adf42f2 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -6,6 +6,8 @@ #include +#include + namespace zoo { namespace swar { using u64 = uint64_t; diff --git a/inc/zoo/traits/is_container.h b/inc/zoo/traits/is_container.h index 39a824ad..35e83070 100644 --- a/inc/zoo/traits/is_container.h +++ b/inc/zoo/traits/is_container.h @@ -3,6 +3,11 @@ #ifndef SIMPLIFY_INCLUDES #include + +#ifdef _MSC_VER +#include +#endif + #endif namespace zoo { diff --git a/inc/zoo/util/container_insertion.h b/inc/zoo/util/container_insertion.h index f1f6603a..d816f7eb 100644 --- a/inc/zoo/util/container_insertion.h +++ b/inc/zoo/util/container_insertion.h @@ -1,43 +1,43 @@ -#ifndef ZOO_CONTAINER_INSERTION -#define ZOO_CONTAINER_INSERTION - -#include -#include - -namespace zoo { - -template -struct is_insertable_impl: std::false_type {}; -template -struct is_insertable_impl< - T, - std::void_t() << std::declval() - )> ->: std::true_type {}; - -} - -namespace std { - -template -auto operator<<(std::ostream &out, const C &a) --> std::enable_if_t< - not(zoo::is_insertable_impl::value) && zoo::is_container_v, - std::ostream & -> { - out << '('; - auto current{cbegin(a)}, sentry{cend(a)}; - if(current != sentry) { - for(;;) { - out << *current++; - if(sentry == current) { break; } - out << ", "; - } - } - return out << ')'; -} - -} - -#endif +#ifndef ZOO_CONTAINER_INSERTION +#define ZOO_CONTAINER_INSERTION + +#include +#include + +namespace zoo { + +template +struct is_insertable_impl: std::false_type {}; +template +struct is_insertable_impl< + T, + std::void_t() << std::declval() + )> +>: std::true_type {}; + +} + +namespace std { + +template +auto operator<<(std::ostream &out, const C &a) +-> std::enable_if_t< + not(zoo::is_insertable_impl::value) && zoo::is_container_v, + std::ostream & +> { + out << '('; + auto current{cbegin(a)}, sentry{cend(a)}; + if(current != sentry) { + for(;;) { + out << *current++; + if(sentry == current) { break; } + out << ", "; + } + } + return out << ')'; +} + +} + +#endif diff --git a/inc/zoo/util/range_equivalence.h b/inc/zoo/util/range_equivalence.h index 6c514d79..7517b825 100644 --- a/inc/zoo/util/range_equivalence.h +++ b/inc/zoo/util/range_equivalence.h @@ -1,46 +1,46 @@ -#ifndef ZOO_RANGE_EQUIVALENCE -#define ZOO_RANGE_EQUIVALENCE - -#include - -namespace zoo { - -template -auto operator==(const C1 &l, const C2 &r) --> std::enable_if_t< - zoo::is_container_v and - zoo::is_container_v, - bool -> -{ - auto lb{cbegin(l)}, le{cend(l)}; - auto rb{cbegin(r)}, re{cend(r)}; - for(;;++lb, ++rb){ - if(lb == le) { return rb == re; } // termination at the same time - if(rb == re) { return false; } // r has fewer elements - if(not(*lb == *rb)) { return false; } - } - return true; -} - -template -auto weaklySame(const C1 &l, const C2 &r) --> std::enable_if_t< - zoo::is_container_v and - zoo::is_container_v, - bool -> -{ - auto lb{cbegin(l)}, le{cend(l)}; - auto rb{cbegin(r)}, re{cend(r)}; - for(;;++lb, ++rb){ - if(lb == le) { return rb == re; } // termination at the same time - if(rb == re) { return false; } // r has fewer elements - if(*lb < *rb || *rb < *lb) { return false; } - } - return true; -} - -} - -#endif +#ifndef ZOO_RANGE_EQUIVALENCE +#define ZOO_RANGE_EQUIVALENCE + +#include + +namespace zoo { + +template +auto operator==(const C1 &l, const C2 &r) +-> std::enable_if_t< + bool(zoo::is_container_v) and + bool(zoo::is_container_v), + bool +> +{ + auto lb{cbegin(l)}, le{cend(l)}; + auto rb{cbegin(r)}, re{cend(r)}; + for(;;++lb, ++rb){ + if(lb == le) { return rb == re; } // termination at the same time + if(rb == re) { return false; } // r has fewer elements + if(not(*lb == *rb)) { return false; } + } + return true; +} + +template +auto weaklySame(const C1 &l, const C2 &r) +-> std::enable_if_t< + zoo::is_container_v and + zoo::is_container_v, + bool +> +{ + auto lb{cbegin(l)}, le{cend(l)}; + auto rb{cbegin(r)}, re{cend(r)}; + for(;;++lb, ++rb){ + if(lb == le) { return rb == re; } // termination at the same time + if(rb == re) { return false; } // r has fewer elements + if(*lb < *rb || *rb < *lb) { return false; } + } + return true; +} + +} + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0f1e47d3..db03bb4b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,7 +14,7 @@ if(MSVC) # MSVC specific configuration # Avoids multiple problems - # Due to multiple SFINAE bugs, forced upgrade to C++ 20 to use "concepts" + # Due to multiple bugs, forced upgrade to C++ 20 set(CMAKE_CXX_STANDARD 20) # Set the policy to use the new behavior From 82398eff83f29db2d41e91a4ea5874822017caf5 Mon Sep 17 00:00:00 2001 From: Eddie Date: Wed, 20 Dec 2023 02:35:28 -0800 Subject: [PATCH 4/5] .h --- inc/zoo/algorithm/quicksort.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/algorithm/quicksort.h b/inc/zoo/algorithm/quicksort.h index 5e438812..46a2efc2 100644 --- a/inc/zoo/algorithm/quicksort.h +++ b/inc/zoo/algorithm/quicksort.h @@ -7,7 +7,7 @@ #include // for temporary storage #include -#include +#include namespace zoo { From 9ffd4c7f02fde8437c17b6124f9285d430d4ea1a Mon Sep 17 00:00:00 2001 From: Eddie Date: Wed, 20 Dec 2023 02:39:18 -0800 Subject: [PATCH 5/5] .h, again --- inc/zoo/algorithm/quicksort.h | 3 +++ inc/zoo/swar/SWAR.h | 4 +++- inc/zoo/traits/is_container.h | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/inc/zoo/algorithm/quicksort.h b/inc/zoo/algorithm/quicksort.h index 46a2efc2..980174f4 100644 --- a/inc/zoo/algorithm/quicksort.h +++ b/inc/zoo/algorithm/quicksort.h @@ -7,7 +7,10 @@ #include // for temporary storage #include + +#ifdef _MSC_VER #include +#endif namespace zoo { diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 9adf42f2..d2bdd6e8 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -6,7 +6,9 @@ #include -#include +#ifdef _MSC_VER +#include +#endif namespace zoo { namespace swar { diff --git a/inc/zoo/traits/is_container.h b/inc/zoo/traits/is_container.h index 35e83070..254f55fb 100644 --- a/inc/zoo/traits/is_container.h +++ b/inc/zoo/traits/is_container.h @@ -5,7 +5,7 @@ #include #ifdef _MSC_VER -#include +#include #endif #endif