Skip to content

Commit

Permalink
New Literals Utils (#80)
Browse files Browse the repository at this point in the history
* new literals

* PR feedback

* tidy tests

* tidy reused code for booleanswaer

* update name

* style

* test style

* fix

* undef util

* simplify again

* fmt

* Add to Array

add to_array

oops make 23 for now

not sure we want to commit to this?

remove named function

Revert "Add to Array"

This reverts commit a7d744d.

* from array

* rm unused

* Finalizing PR

* Strange Windows fail on unmodified code of Variant

---------

Co-authored-by: Eddie <thecppzoo@gmail.com>
  • Loading branch information
jamierpond and thecppzoo committed Jun 10, 2024
1 parent e45cca2 commit 0e2a15b
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 15 deletions.
50 changes: 46 additions & 4 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@
#include "zoo/meta/log.h"

#include <type_traits>
#include <assert.h>

#ifdef _MSC_VER
#include <iso646.h>
#endif

namespace zoo { namespace swar {

template <int NBits, typename T>
struct SWAR;

template <int NumBits, typename BaseType> struct Literals_t {};

template <int NumBits, typename BaseType>
constexpr Literals_t<NumBits, BaseType> Literals{};

using u64 = uint64_t;
using u32 = uint32_t;
using u16 = uint16_t;
Expand Down Expand Up @@ -82,7 +91,29 @@ struct SWAR {
~(~T(0) << NBits),
// Use LowerBits in favor of ~MostSignificantBit to not pollute
// "don't care" bits when non-power-of-two bit lane sizes are supported
LowerBits = MostSignificantBit - LeastSignificantBit;
LowerBits = MostSignificantBit - LeastSignificantBit,
MaxUnsignedLaneValue = LeastSignificantLaneMask;

/// \note breaks the camel case since other libraries have from_array
template <typename U>
constexpr static auto from_array(const U (&values)[Lanes]) noexcept {
auto result = T{0};
for (auto value : values) {
result = (result << NBits) | value;
}
return result;
}

template<
typename Arg,
std::size_t N,
// Reject via SFINAE plain arrays with non-matching number of elements
typename = std::enable_if_t<N == Lanes>
>
constexpr
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]):
m_v{from_array(values)}
{}

SWAR() = default;
constexpr explicit SWAR(T v): m_v(v) {}
Expand Down Expand Up @@ -175,6 +206,9 @@ struct SWAR {
T m_v;
};

template <int NBits, typename T, typename Arg>
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>;

/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
template<int NBits, typename T = uint64_t>
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
Expand Down Expand Up @@ -245,6 +279,11 @@ template<int NBits, typename T>
struct BooleanSWAR: SWAR<NBits, T> {
using Base = SWAR<NBits, T>;

template<std::size_t N, typename = std::enable_if_t<Base::Lanes == N>>
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N]):
Base(Literals<NBits, T>, values)
{ this->m_v <<= (NBits - 1); }

// Booleanness is stored in the MSBs
static constexpr auto MaskMSB =
broadcast<NBits, T>(Base(T(1) << (NBits -1)));
Expand All @@ -254,7 +293,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
static constexpr auto MaskNonLSB = ~MaskLSB;
static constexpr auto MaskNonMSB = ~MaskMSB;
constexpr explicit BooleanSWAR(T v): Base(v) {}

constexpr BooleanSWAR clear(int bit) const noexcept {
constexpr auto Bit = T(1) << (NBits - 1);
return this->m_v ^ (Bit << (NBits * bit)); }
Expand All @@ -270,7 +309,7 @@ struct BooleanSWAR: SWAR<NBits, T> {
constexpr auto operator ~() const noexcept {
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this);
}

constexpr auto operator not() const noexcept {
return BooleanSWAR(MaskMSB ^ *this);
}
Expand Down Expand Up @@ -319,6 +358,9 @@ struct BooleanSWAR: SWAR<NBits, T> {
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
};

template <int NBits, typename T>
BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[BooleanSWAR<NBits, T>::Lanes]) -> BooleanSWAR<NBits, T>;

template<int NBits, typename T>
constexpr BooleanSWAR<NBits, T>
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept {
Expand Down Expand Up @@ -395,7 +437,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
using S = swar::SWAR<NBits, T>;
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
const auto z = (x|h) - (y&~h);
// bitwise ternary median!
// bitwise ternary median!
const auto t = h & ~median(x, ~y, z);
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
}
Expand Down
4 changes: 3 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ if(MSVC)
# map/RobinHood.hybrid.test.cpp
algorithm/cfs.cpp
algorithm/quicksort.cpp
egyptian.cpp var.cpp variant.cpp CopyMoveAbilities.cpp
egyptian.cpp var.cpp
# variant.cpp investigate why this is failing
CopyMoveAbilities.cpp
)

add_subdirectory(third_party EXCLUDE_FROM_ALL)
Expand Down
137 changes: 127 additions & 10 deletions test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@

#include "catch2/catch.hpp"

#include <ios>
#include <iomanip>
#include <iostream>
#include <type_traits>


using namespace zoo;
using namespace zoo::swar;

Expand All @@ -33,6 +29,127 @@ using S32_32 = SWAR<32, uint32_t>;

using S64_64 = SWAR<64, uint64_t>;

static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535);
static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255);
static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15);
static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3);


#define ZOO_PP_UNPARENTHESIZE(...) __VA_ARGS__
#define X(TYPE, av, expected) \
static_assert(\
SWAR{\
Literals<ZOO_PP_UNPARENTHESIZE TYPE>,\
{ZOO_PP_UNPARENTHESIZE av}\
}.value() ==\
expected\
);

/* Preserved to illustrate a technique, remove in a few revisions
static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001);
static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002);
static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001);
static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004);
static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001);
static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002);
static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01);
static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04);
static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201);
static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102);
*/
#define LITERALS_TESTS \
X(\
(32, u64),\
(2, 1),\
0x00000002'00000001\
);\
X(\
(32, u64),\
(1, 2),\
0x00000001'00000002\
);\
X(\
(16, u64),\
(4, 3, 2, 1),\
0x0004'0003'0002'0001\
);\
X(\
(16, u64),\
(1, 2, 3, 4),\
0x0001'0002'0003'0004\
)\
X(\
(16, u32),\
(2, 1),\
0x0002'0001\
)\
X(\
(16, u32),\
(1, 2),\
0x0001'0002\
)\
X(\
(8, u32),\
(4, 3, 2, 1),\
0x04'03'02'01\
)\
X(\
(8, u32),\
(1, 2, 3, 4),\
0x01'02'03'04\
)\
X(\
(8, u16),\
(2, 1),\
0x0201\
)\
X(\
(8, u16),\
(1, 2),\
0x0102\
)\
X(\
(4, u8),\
(2, 1),\
0x21\
)\
X(\
(4, u8),\
(1, 2),\
0x12\
)

LITERALS_TESTS


#define F false
#define T true
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, F, F}}.value() ==
0b0000'0000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{T, F, F, F}}.value() ==
0b1000'0000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, T, F, F}}.value() ==
0b0000'1000'0000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, T, F}}.value() ==
0b0000'0000'1000'0000);
static_assert(BooleanSWAR{Literals<4, u16>,
{F, F, F, T}}.value() ==
0b0000'0000'0000'1000);
static_assert(BooleanSWAR{Literals<4, u16>,
{T, F, F, F}}.value() ==
0b1000'0000'0000'0000);
#undef F
#undef T

namespace Multiplication {

static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());
Expand Down Expand Up @@ -357,23 +474,23 @@ TEST_CASE(
const auto left = S2_16{0}.blitElement(1, i);
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
const auto test = S2_16{0}.blitElement(1, 2);
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
}
}
SECTION("single") {
for (uint32_t i = 1; i < 15; i++) {
const auto large = S4_32{0}.blitElement(1, i+1);
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
const auto test = S4_32{0}.blitElement(1, 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
SECTION("allLanes") {
for (uint32_t i = 1; i < 15; i++) {
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
}
}
}
Expand Down Expand Up @@ -425,7 +542,7 @@ TEST_CASE(
"BooleanSWAR MSBtoLaneMask",
"[swar]"
) {
// BooleanSWAR as a mask:
// BooleanSWAR as a mask:
auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
auto mask = S4_32(0x0F0F'0000);
CHECK(bswar.MSBtoLaneMask().value() == mask.value());
Expand All @@ -452,6 +569,6 @@ TEST_CASE(
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
}

0 comments on commit 0e2a15b

Please sign in to comment.