diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 9e2d2c7b..fec26f22 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -27,7 +27,6 @@ constexpr uint64_t popcount(uint64_t a) noexcept { >::execute(a); } - /// Index into the bits of the type T that contains the MSB. template constexpr std::make_unsigned_t msbIndex(T v) noexcept { @@ -189,7 +188,7 @@ constexpr auto isolateLSB(T v) { template constexpr auto leastNBitsMask() { - return (T(1ull)< @@ -199,7 +198,7 @@ constexpr auto leastNBitsMask() { template constexpr T mostNBitsMask() { - return ~leastNBitsMask(); + return ~leastNBitsMask(); } @@ -297,6 +296,10 @@ struct BooleanSWAR: SWAR { friend constexpr BooleanSWAR greaterEqual_MSB_off(SWAR, SWAR) noexcept; + template + constexpr T + indexOfMostSignficantLaneSet(SWAR test) noexcept; + template friend constexpr BooleanSWAR convertToBooleanSWAR(SWAR arg) noexcept; @@ -362,6 +365,39 @@ constantIsGreaterEqual_MSB_off(SWAR subtrahend) noexcept { } } +template +constexpr T median(T x, U y, V z) { + return (x | y) & (y | z) & (x | z); +} + +template +constexpr BooleanSWAR +greaterEqual(SWAR left, SWAR right) noexcept { + // Adapted from TAOCP V4 P152 + // h is msbselector, x is right, l is lower/left. Sets MSB to 1 in lanes + // in test variable t for when xi < yi for lane i . Invert for greaterEqual. + // t = h & ~ + // z = (x|h) - (y&~h) + using S = swar::SWAR; + const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y + const auto z = (x|h) - (y&~h); + // bitwise ternary median! + const auto t = h & ~median(x, ~y, z); + return ~BooleanSWAR{static_cast(t)}; // ~(x= y +} + +// In the condition where only MSBs will be on, we can fast lookup with 1 multiply the index of the most signficant byte that is on. +// This appears to be a mapping from the (say) 256 unique values of a 64 bit int where only MSBs of each 8 bits can be on, but I don't fully understand it. +// Adapted from TAOCP Vol 4A Page 153 Eq 94. +template +constexpr T +indexOfMostSignficantLaneSet(SWAR test) noexcept { + const auto TypeWidth = sizeof(T) * 8; + const auto TopVal = (T{1}<<(TypeWidth-NBits))-1, BottomVal = (T{1}<<(NBits-1))-1; + const T MappingConstant = TopVal / BottomVal; + return (test.value() * MappingConstant) >> (TypeWidth - NBits); +} + template constexpr BooleanSWAR greaterEqual_MSB_off(SWAR left, SWAR right) noexcept { diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 2d784b42..1628e222 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -2,12 +2,19 @@ #include "catch2/catch.hpp" +#include +#include +#include #include using namespace zoo; using namespace zoo::swar; +using S2_64 = SWAR<2, uint64_t>; +using S2_32 = SWAR<2, uint32_t>; +using S2_16 = SWAR<2, uint16_t>; + using S4_64 = SWAR<4, uint64_t>; using S4_32 = SWAR<4, uint32_t>; using S4_16 = SWAR<4, uint16_t>; @@ -260,8 +267,8 @@ static_assert(8 == lsbIndex(1<<8)); static_assert(17 == lsbIndex(1<<17)); static_assert(30 == lsbIndex(1<<30)); -/* -These tests were not catching errors known to have been present + +/*These tests were not catching errors known to have been present static_assert(0x80880008 == greaterEqual<3>(SWAR<4, uint32_t>(0x3245'1027)).value()); static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value()); static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x7654'3210)).value()); @@ -269,35 +276,107 @@ static_assert(0x00000008 == greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).valu static_assert(0x80000000 == greaterEqual<7>(SWAR<4, uint32_t>(0x7654'3210)).value()); */ -// Unusual formatting for easy visual verification. -#define GE_MSB_TEST(left, right, result) static_assert(result== greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); - -GE_MSB_TEST(0x1000'0010, - 0x0111'1101, - 0x8000'0080) -GE_MSB_TEST(0x4333'3343, - 0x4444'4444, - 0x8000'0080) -GE_MSB_TEST(0x0550'0110, - 0x0110'0550, - 0x8888'8008) -GE_MSB_TEST(0x4771'1414, - 0x4641'1774, - 0x8888'8008) - -GE_MSB_TEST(0x0123'4567, - 0x0000'0000, - 0x8888'8888) -GE_MSB_TEST(0x0123'4567, - 0x7777'7777, - 0x0000'0008) - -GE_MSB_TEST(0x0000'0000, - 0x0123'4567, - 0x8000'0000) -GE_MSB_TEST(0x7777'7777, - 0x0123'4567, - 0x8888'8888) + +#define GE_MSB_TEST(left, right, result) static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); + +GE_MSB_TEST( + 0x1000'0010, + 0x0111'1101, + 0x8000'0080) +GE_MSB_TEST( + 0x4333'3343, + 0x4444'4444, + 0x8000'0080) +GE_MSB_TEST( + 0x0550'0110, + 0x0110'0550, + 0x8888'8008) +GE_MSB_TEST( + 0x4771'1414, + 0x4641'1774, + 0x8888'8008) +GE_MSB_TEST( + 0x0123'4567, + 0x0000'0000, + 0x8888'8888) +GE_MSB_TEST( + 0x0123'4567, + 0x7777'7777, + 0x0000'0008) +GE_MSB_TEST( + 0x0000'0000, + 0x0123'4567, + 0x8000'0000) +GE_MSB_TEST( + 0x7777'7777, + 0x0123'4567, + 0x8888'8888) + +// Replicate the msb off tests with the greaterEqual that allows msb on +#define GE_MSB_ON_TEST(left, right, result) static_assert(result == greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); + +GE_MSB_ON_TEST( + 0x1000'0010, + 0x0111'1101, + 0x8000'0080) +GE_MSB_ON_TEST( + 0x4333'3343, + 0x4444'4444, + 0x8000'0080) +GE_MSB_ON_TEST( + 0x0550'0110, + 0x0110'0550, + 0x8888'8008) +GE_MSB_ON_TEST( + 0x4771'1414, + 0x4641'1774, + 0x8888'8008) +GE_MSB_ON_TEST( + 0x0123'4567, + 0x0000'0000, + 0x8888'8888) +GE_MSB_ON_TEST( + 0x0123'4567, + 0x7777'7777, + 0x0000'0008) +GE_MSB_ON_TEST( + 0x0000'0000, + 0x0123'4567, + 0x8000'0000) +GE_MSB_ON_TEST( + 0x7777'7777, + 0x0123'4567, + 0x8888'8888) + +TEST_CASE( + "greaterEqualMSBOn", + "[swar][unsigned-swar]" +) { + SECTION("single") { + for (uint32_t i = 1; i < 4; i++) { + const auto left = S2_16{0}.blitElement(1, i); + const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); + const auto test = S2_16{0}.blitElement(1, 2); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + } + } + SECTION("single") { + for (uint32_t i = 1; i < 15; i++) { + const auto large = S4_32{0}.blitElement(1, i+1); + const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); + const auto test = S4_32{0}.blitElement(1, 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + } + } + SECTION("allLanes") { + for (uint32_t i = 1; i < 15; i++) { + const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); + const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); + const auto test = S4_32(S4_32::LeastSignificantBit * 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + } + } +} static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value()); static_assert(0 == isolateLSB(u32(0)));