Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Literals Utils #80

Merged
merged 16 commits into from
Jun 10, 2024
49 changes: 45 additions & 4 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@

namespace zoo { namespace swar {

template <int NBits, typename T>
struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {
constexpr static void (SWAR<NumBits, BaseType>::*value)() = nullptr;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not in use, remove

};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};
Scottbruceheart marked this conversation as resolved.
Show resolved Hide resolved

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -68,7 +78,26 @@ struct SWAR {
~(~T(0) << NBits),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;
LowerBits = MostSignificantBit - LeastSignificantBit,
MaxUnsignedLaneValue = LeastSignificantLaneMask;

template <typename U, typename ManipulationFn>
constexpr auto loadIntoLanes(const U (&values)[Lanes],
const ManipulationFn&& manipulation) {
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
auto result = T{0};
for (auto value : values) {
auto laneValue = manipulation(value);
result = (result << NBits) | laneValue;
}
return result;
}

template <typename Arg, std::size_t N, typename = std::enable_if_t<N == Lanes, int>>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]) : m_v{0} {
m_v = loadIntoLanes(values, [](auto x) { return x; });
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
}


SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
Expand Down Expand Up @@ -161,6 +190,9 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -231,6 +263,12 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

template <std::size_t N, typename = std::enable_if_t<N == Base::Lanes, T>>
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N]) : Base{0} {
constexpr auto msbOfFirstLane = T{1} << (NBits - 1);
this->m_v = Base::loadIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; });
}
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -240,7 +278,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -256,7 +294,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -305,6 +343,9 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[BooleanSWAR<NBits, T>::Lanes]) -> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -381,7 +422,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
45 changes: 39 additions & 6 deletions test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <ios>
#include <iomanip>
#include <iostream>
#include <sys/wait.h>
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
#include <type_traits>


Expand Down Expand Up @@ -33,6 +34,38 @@ using S32_32 = SWAR<32, uint32_t>;

using S64_64 = SWAR<64, uint64_t>;

static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255);
static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15);
static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3);
Comment on lines +32 to +36
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decimal is not the natural base for these tests.
I know we ahve a mess of decimals and hex and binary in these tests, but let's stop the mess.
You may create an assign a PR to me to clean this up if you're upset about this request.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand why we might want to have just one format, decimal makes the repo a little more approachable and understandable.


static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001);
static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002);

static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001);
static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004);

static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001);
static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002);

static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01);
static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04);

static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201);
static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102);

static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21);
static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12);

#define F false
#define T true
static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0);
static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>, {false, false, true, false}}.value() == 0b0000'0000'1000'0000);
static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, true}}.value() == 0b0000'0000'0000'1000);
thecppzoo marked this conversation as resolved.
Show resolved Hide resolved

thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
namespace Multiplication {

static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());
Expand Down Expand Up @@ -357,23 +390,23 @@ TEST_CASE(
const auto left = S2_16{0}.blitElement(1, i);
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
const auto test = S2_16{0}.blitElement(1, 2);
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
}
}
SECTION("single") {
for (uint32_t i = 1; i < 15; i++) {
const auto large = S4_32{0}.blitElement(1, i+1);
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
const auto test = S4_32{0}.blitElement(1, 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
SECTION("allLanes") {
for (uint32_t i = 1; i < 15; i++) {
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
}
Expand Down Expand Up @@ -425,7 +458,7 @@ TEST_CASE(
"BooleanSWAR MSBtoLaneMask",
"[swar]"
) {
// BooleanSWAR as a mask:
// BooleanSWAR as a mask:
auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
auto mask = S4_32(0x0F0F'0000);
CHECK(bswar.MSBtoLaneMask().value() == mask.value());
Expand All @@ -452,6 +485,6 @@ TEST_CASE(
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
}
Loading