Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Literals Utils #80

Merged
merged 16 commits into from
Jun 10, 2024
46 changes: 43 additions & 3 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@

namespace zoo { namespace swar {

template <int NBits, typename T> struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {
constexpr static void (SWAR<NumBits, BaseType>::*value)() = nullptr;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not in use, remove

};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};
Scottbruceheart marked this conversation as resolved.
Show resolved Hide resolved

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -70,6 +79,18 @@ struct SWAR {
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;

template <typename Arg, std::size_t N, typename = std::enable_if_t<N == Lanes, int>>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]) : m_v{0} {
auto result = T{0};
for (const auto arg : values) {
result = (result << NBits) | arg;
}
m_v = result;
}

constexpr static T MaxUnsignedLaneValue = ~(((~T{0}) << (NBits - 1)) << 1);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this constant is worth including, but not here, and not with this calculation. See there are a bunch of constants in SWAR, you may, for example, put it at the end of the list like this:

MaxUnsignedLaneValue = LeastSignificantLaneMask;


SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
constexpr explicit operator T() const noexcept { return m_v; }
Expand Down Expand Up @@ -161,6 +182,9 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -231,6 +255,19 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

constexpr auto toMsbBools(const bool (&values)[Base::Lanes]) {
constexpr auto msbOfFirstLane = T{1} << (NBits - 1);
auto result = T{0};
for (auto arg : values) {
auto bit = arg ? msbOfFirstLane : 0;
result = (result << NBits) | bit;
}
return BooleanSWAR{result};
}

template <std::size_t N, typename = std::enable_if_t<N == Base::Lanes, T>>
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N]) : Base(toMsbBools(values)) {}
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -240,7 +277,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -256,7 +293,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -305,6 +342,9 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[BooleanSWAR<NBits, T>::Lanes]) -> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -381,7 +421,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
32 changes: 26 additions & 6 deletions test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,26 @@ using S32_32 = SWAR<32, uint32_t>;

using S64_64 = SWAR<64, uint64_t>;

static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255);
static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15);
static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3);
Comment on lines +32 to +36
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decimal is not the natural base for these tests.
I know we ahve a mess of decimals and hex and binary in these tests, but let's stop the mess.
You may create an assign a PR to me to clean this up if you're upset about this request.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand why we might want to have just one format, decimal makes the repo a little more approachable and understandable.


static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 0}}.value() == 0);
static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 1}}.value() == 1);
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
static_assert(SWAR{Literals<8, u32>, {8, 3, 2, 1}}.value() == 0x08'03'02'01);
static_assert(SWAR{Literals<8, u32>, {42, 42, 42, 42}}.value() == 0x2A'2A'2A'2A);
static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0);
static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 1}}.value() == 1);
static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0);
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 1}}.value() == 0x8765'4321);
static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 7}}.value() == 0x8765'4327);
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved

static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, false}}.value() == 0);
static_assert(BooleanSWAR{Literals<4, u16>, {true, true, true, true}}.value() == 0b1000'1000'1000'1000);
static_assert(BooleanSWAR{Literals<8, u32>, {true, true, true, true}}.value() == 0b10000000'10000000'10000000'10000000);
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved

thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
namespace Multiplication {

static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());
Expand Down Expand Up @@ -357,23 +377,23 @@ TEST_CASE(
const auto left = S2_16{0}.blitElement(1, i);
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
const auto test = S2_16{0}.blitElement(1, 2);
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
}
}
SECTION("single") {
for (uint32_t i = 1; i < 15; i++) {
const auto large = S4_32{0}.blitElement(1, i+1);
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
const auto test = S4_32{0}.blitElement(1, 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
SECTION("allLanes") {
for (uint32_t i = 1; i < 15; i++) {
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
}
Expand Down Expand Up @@ -425,7 +445,7 @@ TEST_CASE(
"BooleanSWAR MSBtoLaneMask",
"[swar]"
) {
// BooleanSWAR as a mask:
// BooleanSWAR as a mask:
auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
auto mask = S4_32(0x0F0F'0000);
CHECK(bswar.MSBtoLaneMask().value() == mask.value());
Expand All @@ -452,6 +472,6 @@ TEST_CASE(
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
}