Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Literals Utils #80

Merged
merged 16 commits into from
Jun 10, 2024
50 changes: 46 additions & 4 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@
#include "zoo/meta/log.h"

#include <type_traits>
#include <assert.h>

#ifdef _MSC_VER
#include <iso646.h>
#endif

namespace zoo { namespace swar {

template <int NBits, typename T>
struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};
Scottbruceheart marked this conversation as resolved.
Show resolved Hide resolved

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -68,7 +77,29 @@ struct SWAR {
~(~T(0) << NBits),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;
LowerBits = MostSignificantBit - LeastSignificantBit,
MaxUnsignedLaneValue = LeastSignificantLaneMask;

/// \note breaks the camel case since other libraries have from_array
template <typename U>
constexpr static auto from_array(const U (&values)[Lanes]) noexcept {
auto result = T{0};
for (auto value : values) {
result = (result << NBits) | value;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blitElement?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I this does not quite blit the element, if it will move in an element, it shifts up what it got and then adds to the least significant lane

}
return result;
}

template<
typename Arg,
std::size_t N,
// Reject via SFINAE plain arrays with non-matching number of elements
typename = std::enable_if_t<N == Lanes>
>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]):
m_v{from_array(values)}
{}

SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
Expand Down Expand Up @@ -161,6 +192,9 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -231,6 +265,11 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

template<std::size_t N, typename = std::enable_if_t<Base::Lanes == N>>
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N]):
Base(Literals<NBits, T>, values)
{ this->m_v <<= (NBits - 1); }

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -240,7 +279,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -256,7 +295,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -305,6 +344,9 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[BooleanSWAR<NBits, T>::Lanes]) -> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -381,7 +423,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
4 changes: 3 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ if(MSVC)
# map/RobinHood.hybrid.test.cpp
algorithm/cfs.cpp
algorithm/quicksort.cpp
egyptian.cpp var.cpp variant.cpp CopyMoveAbilities.cpp
egyptian.cpp var.cpp
# variant.cpp investigate why this is failing
CopyMoveAbilities.cpp
)

add_subdirectory(third_party EXCLUDE_FROM_ALL)
Expand Down
137 changes: 127 additions & 10 deletions test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@

#include "catch2/catch.hpp"

#include <ios>
#include <iomanip>
#include <iostream>
#include <type_traits>


using namespace zoo;
using namespace zoo::swar;

Expand All @@ -33,6 +29,127 @@ using S32_32 = SWAR<32, uint32_t>;

using S64_64 = SWAR<64, uint64_t>;

static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255);
static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15);
static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3);
Comment on lines +32 to +36
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decimal is not the natural base for these tests.
I know we ahve a mess of decimals and hex and binary in these tests, but let's stop the mess.
You may create an assign a PR to me to clean this up if you're upset about this request.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand why we might want to have just one format, decimal makes the repo a little more approachable and understandable.



#define ZOO_PP_UNPARENTHESIZE(...) __VA_ARGS__
#define X(TYPE, av, expected) \
static_assert(\
SWAR{\
Literals<ZOO_PP_UNPARENTHESIZE TYPE>,\
{ZOO_PP_UNPARENTHESIZE av}\
}.value() ==\
expected\
);

/* Preserved to illustrate a technique, remove in a few revisions
static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001);
static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002);

static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001);
static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004);

static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001);
static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002);

static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01);
static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04);

static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201);
static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102);
*/
#define LITERALS_TESTS \
X(\
(32, u64),\
(2, 1),\
0x00000002'00000001\
);\
X(\
(32, u64),\
(1, 2),\
0x00000001'00000002\
);\
X(\
(16, u64),\
(4, 3, 2, 1),\
0x0004'0003'0002'0001\
);\
X(\
(16, u64),\
(1, 2, 3, 4),\
0x0001'0002'0003'0004\
)\
X(\
(16, u32),\
(2, 1),\
0x0002'0001\
)\
X(\
(16, u32),\
(1, 2),\
0x0001'0002\
)\
X(\
(8, u32),\
(4, 3, 2, 1),\
0x04'03'02'01\
)\
X(\
(8, u32),\
(1, 2, 3, 4),\
0x01'02'03'04\
)\
X(\
(8, u16),\
(2, 1),\
0x0201\
)\
X(\
(8, u16),\
(1, 2),\
0x0102\
)\
X(\
(4, u8),\
(2, 1),\
0x21\
)\
X(\
(4, u8),\
(1, 2),\
0x12\
)

LITERALS_TESTS


#define F false
#define T true
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, F, F}}.value() ==
0b0000'0000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{T, F, F, F}}.value() ==
0b1000'0000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, T, F, F}}.value() ==
0b0000'1000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, T, F}}.value() ==
0b0000'0000'1000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, F, T}}.value() ==
0b0000'0000'0000'1000);
static_assert(BooleanSWAR{Literals<4, u16>,
{T, F, F, F}}.value() ==
0b1000'0000'0000'0000);
#undef F
#undef T

thecppzoo marked this conversation as resolved.
Show resolved Hide resolved
namespace Multiplication {

static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());
Expand Down Expand Up @@ -357,23 +474,23 @@ TEST_CASE(
const auto left = S2_16{0}.blitElement(1, i);
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
const auto test = S2_16{0}.blitElement(1, 2);
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
}
}
SECTION("single") {
for (uint32_t i = 1; i < 15; i++) {
const auto large = S4_32{0}.blitElement(1, i+1);
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
const auto test = S4_32{0}.blitElement(1, 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
SECTION("allLanes") {
for (uint32_t i = 1; i < 15; i++) {
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
}
Expand Down Expand Up @@ -425,7 +542,7 @@ TEST_CASE(
"BooleanSWAR MSBtoLaneMask",
"[swar]"
) {
// BooleanSWAR as a mask:
// BooleanSWAR as a mask:
auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
auto mask = S4_32(0x0F0F'0000);
CHECK(bswar.MSBtoLaneMask().value() == mask.value());
Expand All @@ -452,6 +569,6 @@ TEST_CASE(
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
}