-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New Literals Utils #80
Changes from 4 commits
295d602
dbf8115
2327807
7c10d29
0b8b214
e3a802d
7ed6e88
03edd02
0648bb1
0d0a833
33183cd
1cba60e
8aa4ea5
2a383cb
1dc2d89
aabab90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,16 @@ | |
|
||
namespace zoo { namespace swar { | ||
|
||
template <int NBits, typename T> | ||
struct SWAR; | ||
|
||
template <int NumBits, typename BaseType> struct Literals_t { | ||
constexpr static void (SWAR<NumBits, BaseType>::*value)() = nullptr; | ||
}; | ||
|
||
template <int NumBits, typename BaseType> | ||
constexpr Literals_t<NumBits, BaseType> Literals{}; | ||
Scottbruceheart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
using u64 = uint64_t; | ||
using u32 = uint32_t; | ||
using u16 = uint16_t; | ||
|
@@ -68,7 +78,26 @@ struct SWAR { | |
~(~T(0) << NBits), | ||
// Use LowerBits in favor of ~MostSignificantBit to not pollute | ||
// "don't care" bits when non-power-of-two bit lane sizes are supported | ||
LowerBits = MostSignificantBit - LeastSignificantBit; | ||
LowerBits = MostSignificantBit - LeastSignificantBit, | ||
MaxUnsignedLaneValue = LeastSignificantLaneMask; | ||
|
||
template <typename U, typename ManipulationFn> | ||
constexpr auto loadBaseTypeIntoLanes(const U (&values)[Lanes], | ||
const ManipulationFn&& manipulation) { | ||
auto result = T{0}; | ||
for (auto value : values) { | ||
auto laneValue = manipulation(value); | ||
result = (result << NBits) | laneValue; | ||
} | ||
return result; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not 100% sure about the names for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be a reasonable way to do this, but I'd use the appropriate algorithm in |
||
|
||
template <typename Arg, std::size_t N, typename = std::enable_if_t<N == Lanes, int>> | ||
constexpr | ||
SWAR(Literals_t<NBits, T>, const Arg (&values)[N]) : m_v{0} { | ||
m_v = loadBaseTypeIntoLanes(values, [](auto x) { return x; }); | ||
} | ||
|
||
|
||
SWAR() = default; | ||
constexpr explicit SWAR(T v): m_v(v) {} | ||
|
@@ -161,6 +190,9 @@ struct SWAR { | |
T m_v; | ||
}; | ||
|
||
template <int NBits, typename T, typename Arg> | ||
SWAR(Literals_t<NBits, T>, const Arg (&values)[SWAR<NBits, T>::Lanes]) -> SWAR<NBits, T>; | ||
|
||
/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation. | ||
template<int NBits, typename T = uint64_t> | ||
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) { | ||
|
@@ -231,6 +263,12 @@ template<int NBits, typename T> | |
struct BooleanSWAR: SWAR<NBits, T> { | ||
using Base = SWAR<NBits, T>; | ||
|
||
template <std::size_t N, typename = std::enable_if_t<N == Base::Lanes, T>> | ||
thecppzoo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
constexpr BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[N]) : Base{0} { | ||
constexpr auto msbOfFirstLane = T{1} << (NBits - 1); | ||
this->m_v = Base::loadBaseTypeIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); | ||
} | ||
|
||
// Booleanness is stored in the MSBs | ||
static constexpr auto MaskMSB = | ||
broadcast<NBits, T>(Base(T(1) << (NBits -1))); | ||
|
@@ -240,7 +278,7 @@ struct BooleanSWAR: SWAR<NBits, T> { | |
static constexpr auto MaskNonLSB = ~MaskLSB; | ||
static constexpr auto MaskNonMSB = ~MaskMSB; | ||
constexpr explicit BooleanSWAR(T v): Base(v) {} | ||
|
||
constexpr BooleanSWAR clear(int bit) const noexcept { | ||
constexpr auto Bit = T(1) << (NBits - 1); | ||
return this->m_v ^ (Bit << (NBits * bit)); } | ||
|
@@ -256,7 +294,7 @@ struct BooleanSWAR: SWAR<NBits, T> { | |
constexpr auto operator ~() const noexcept { | ||
return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this); | ||
} | ||
|
||
constexpr auto operator not() const noexcept { | ||
return BooleanSWAR(MaskMSB ^ *this); | ||
} | ||
|
@@ -305,6 +343,9 @@ struct BooleanSWAR: SWAR<NBits, T> { | |
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept; | ||
}; | ||
|
||
template <int NBits, typename T> | ||
BooleanSWAR(Literals_t<NBits, T>, const bool (&values)[BooleanSWAR<NBits, T>::Lanes]) -> BooleanSWAR<NBits, T>; | ||
|
||
template<int NBits, typename T> | ||
constexpr BooleanSWAR<NBits, T> | ||
convertToBooleanSWAR(SWAR<NBits, T> arg) noexcept { | ||
|
@@ -381,7 +422,7 @@ greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept { | |
using S = swar::SWAR<NBits, T>; | ||
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y | ||
const auto z = (x|h) - (y&~h); | ||
// bitwise ternary median! | ||
// bitwise ternary median! | ||
const auto t = h & ~median(x, ~y, z); | ||
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
#include <ios> | ||
#include <iomanip> | ||
#include <iostream> | ||
#include <sys/wait.h> | ||
thecppzoo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#include <type_traits> | ||
|
||
|
||
|
@@ -33,6 +34,38 @@ using S32_32 = SWAR<32, uint32_t>; | |
|
||
using S64_64 = SWAR<64, uint64_t>; | ||
|
||
static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535); | ||
static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535); | ||
static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255); | ||
static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15); | ||
static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); | ||
Comment on lines
+32
to
+36
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Decimal is not the natural base for these tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand why we might want to have just one format, decimal makes the repo a little more approachable and understandable. |
||
|
||
static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001); | ||
static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); | ||
|
||
static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); | ||
static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004); | ||
|
||
static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001); | ||
static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); | ||
|
||
static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01); | ||
static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); | ||
|
||
static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201); | ||
static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); | ||
|
||
static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21); | ||
static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); | ||
|
||
#define F false | ||
#define T true | ||
static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0); | ||
static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); | ||
static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); | ||
static_assert(BooleanSWAR{Literals<4, u16>, {false, false, true, false}}.value() == 0b0000'0000'1000'0000); | ||
static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, true}}.value() == 0b0000'0000'0000'1000); | ||
thecppzoo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
thecppzoo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
namespace Multiplication { | ||
|
||
static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value()); | ||
|
@@ -357,23 +390,23 @@ TEST_CASE( | |
const auto left = S2_16{0}.blitElement(1, i); | ||
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); | ||
const auto test = S2_16{0}.blitElement(1, 2); | ||
CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); | ||
CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); | ||
} | ||
} | ||
SECTION("single") { | ||
for (uint32_t i = 1; i < 15; i++) { | ||
const auto large = S4_32{0}.blitElement(1, i+1); | ||
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); | ||
const auto test = S4_32{0}.blitElement(1, 8); | ||
CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); | ||
CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); | ||
} | ||
} | ||
SECTION("allLanes") { | ||
for (uint32_t i = 1; i < 15; i++) { | ||
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); | ||
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); | ||
const auto test = S4_32(S4_32::LeastSignificantBit * 8); | ||
CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); | ||
CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); | ||
} | ||
} | ||
} | ||
|
@@ -425,7 +458,7 @@ TEST_CASE( | |
"BooleanSWAR MSBtoLaneMask", | ||
"[swar]" | ||
) { | ||
// BooleanSWAR as a mask: | ||
// BooleanSWAR as a mask: | ||
auto bswar =BooleanSWAR<4, u32>(0x0808'0000); | ||
auto mask = S4_32(0x0F0F'0000); | ||
CHECK(bswar.MSBtoLaneMask().value() == mask.value()); | ||
|
@@ -452,6 +485,6 @@ TEST_CASE( | |
CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); | ||
CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); | ||
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); | ||
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); | ||
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); | ||
CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); | ||
CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not in use, remove