Skip to content

Commit

Permalink
Implement any and all for intN and uintN vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
redorav committed Sep 12, 2020
1 parent fb35628 commit 0b370de
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 63 deletions.
10 changes: 10 additions & 0 deletions include/hlsl++_vector_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,16 @@ namespace hlslpp
hlslpp_inline int3 abs(const int3& i) { return int3(_hlslpp_abs_epi32(i.vec)); }
hlslpp_inline int4 abs(const int4& i) { return int4(_hlslpp_abs_epi32(i.vec)); }

hlslpp_inline bool all(const int1& f) { return _hlslpp_all1_epi32(f.vec); }
hlslpp_inline bool all(const int2& f) { return _hlslpp_all2_epi32(f.vec); }
hlslpp_inline bool all(const int3& f) { return _hlslpp_all3_epi32(f.vec); }
hlslpp_inline bool all(const int4& f) { return _hlslpp_all4_epi32(f.vec); }

hlslpp_inline bool any(const int1& f) { return _hlslpp_any1_epi32(f.vec); }
hlslpp_inline bool any(const int2& f) { return _hlslpp_any2_epi32(f.vec); }
hlslpp_inline bool any(const int3& f) { return _hlslpp_any3_epi32(f.vec); }
hlslpp_inline bool any(const int4& f) { return _hlslpp_any4_epi32(f.vec); }

hlslpp_inline int1 countbits(const int1& i) { return int1(_hlslpp_countbits_epi32(i.vec)); }
hlslpp_inline int2 countbits(const int2& i) { return int2(_hlslpp_countbits_epi32(i.vec)); }
hlslpp_inline int3 countbits(const int3& i) { return int3(_hlslpp_countbits_epi32(i.vec)); }
Expand Down
10 changes: 10 additions & 0 deletions include/hlsl++_vector_uint.h
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,16 @@ namespace hlslpp
hlslpp_inline uint3& operator ^= (uint3& i1, const uint3& i2) { i1 = i1 ^ i2; return i1; }
hlslpp_inline uint4& operator ^= (uint4& i1, const uint4& i2) { i1 = i1 ^ i2; return i1; }

hlslpp_inline bool all(const uint1& f) { return _hlslpp_all1_epu32(f.vec); }
hlslpp_inline bool all(const uint2& f) { return _hlslpp_all2_epu32(f.vec); }
hlslpp_inline bool all(const uint3& f) { return _hlslpp_all3_epu32(f.vec); }
hlslpp_inline bool all(const uint4& f) { return _hlslpp_all4_epu32(f.vec); }

hlslpp_inline bool any(const uint1& f) { return _hlslpp_any1_epu32(f.vec); }
hlslpp_inline bool any(const uint2& f) { return _hlslpp_any2_epu32(f.vec); }
hlslpp_inline bool any(const uint3& f) { return _hlslpp_any3_epu32(f.vec); }
hlslpp_inline bool any(const uint4& f) { return _hlslpp_any4_epu32(f.vec); }

hlslpp_inline uint1 countbits(const uint1& i) { return uint1(_hlslpp_countbits_epu32(i.vec)); }
hlslpp_inline uint2 countbits(const uint2& i) { return uint2(_hlslpp_countbits_epu32(i.vec)); }
hlslpp_inline uint3 countbits(const uint3& i) { return uint3(_hlslpp_countbits_epu32(i.vec)); }
Expand Down
20 changes: 20 additions & 0 deletions include/platforms/hlsl++_360.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,16 @@ hlslpp_inline __vector4 __vrcp(__vector4 x)
#define _hlslpp_sllv_epi32(x, y) __vslw((x), (y))
#define _hlslpp_srlv_epi32(x, y) __vsrw((x), (y))

#define _hlslpp_any1_epi32(x) _hlslpp_any1_ps(x)
#define _hlslpp_any2_epi32(x) _hlslpp_any2_ps(x)
#define _hlslpp_any3_epi32(x) _hlslpp_any3_ps(x)
#define _hlslpp_any4_epi32(x) _hlslpp_any4_ps(x)

#define _hlslpp_all1_epi32(x) _hlslpp_all1_ps(x)
#define _hlslpp_all2_epi32(x) _hlslpp_all2_ps(x)
#define _hlslpp_all3_epi32(x) _hlslpp_all3_ps(x)
#define _hlslpp_all4_epi32(x) _hlslpp_all4_ps(x)

//-----------------
// Unsigned Integer
//-----------------
Expand Down Expand Up @@ -244,6 +254,16 @@ hlslpp_inline __vector4 __vrcp(__vector4 x)
#define _hlslpp_sllv_epu32(x, y) _hlslpp_sllv_epi32((x), (y))
#define _hlslpp_srlv_epu32(x, y) _hlslpp_srlv_epi32((x), (y))

#define _hlslpp_any1_epu32(x) _hlslpp_any1_epi32(x)
#define _hlslpp_any2_epu32(x) _hlslpp_any2_epi32(x)
#define _hlslpp_any3_epu32(x) _hlslpp_any3_epi32(x)
#define _hlslpp_any4_epu32(x) _hlslpp_any4_epi32(x)

#define _hlslpp_all1_epu32(x) _hlslpp_all1_epi32(x)
#define _hlslpp_all2_epu32(x) _hlslpp_all2_epi32(x)
#define _hlslpp_all3_epu32(x) _hlslpp_all3_epi32(x)
#define _hlslpp_all4_epu32(x) _hlslpp_all4_epi32(x)

#if !defined(XM_CRMASK_CR6TRUE)
#define XM_CRMASK_CR6TRUE (1 << 7)
#endif
Expand Down
144 changes: 82 additions & 62 deletions include/platforms/hlsl++_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,69 +390,15 @@ hlslpp_inline n128 _hlslpp_dot4_ps(n128 x, n128 y)

//https://stackoverflow.com/questions/15389539/fastest-way-to-test-a-128-bit-neon-register-for-a-value-of-0-using-intrinsics

hlslpp_inline bool _hlslpp_any1_ps(n128 x)
{
return vget_lane_u32(vget_low_u32(vreinterpretq_u32_f32(x)), 0) != 0;
}

hlslpp_inline bool _hlslpp_any2_ps(n128 x)
{
uint32x2_t low = vget_low_u32(vreinterpretq_u32_f32(x));
return vget_lane_u32(vpmax_u32(low, low), 0) != 0;
}

hlslpp_inline bool _hlslpp_any3_ps(n128 x)
{
uint32x4_t maskw = vsetq_lane_u32(0, vreinterpretq_u32_f32(x), 3); // set w to minimum value
#if defined(HLSLPP_ARM64)
return vmaxvq_u32(maskw) != 0;
#else
uint32x2_t maxlohi = vorr_u32(vget_low_u32(maskw), vget_high_u32(maskw));
return vget_lane_u32(vpmax_u32(maxlohi, maxlohi), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_any4_ps(n128 x)
{
#if defined(HLSLPP_ARM64)
return vmaxvq_u32(vreinterpretq_u32_f32(x)) != 0;
#else
uint32x2_t tmp = vorr_u32(vget_low_u32(vreinterpretq_u32_f32(x)), vget_high_u32(vreinterpretq_u32_f32(x)));
return vget_lane_u32(vpmax_u32(tmp, tmp), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_all1_ps(n128 x)
{
return vget_lane_u32(vget_low_u32(vreinterpretq_u32_f32(x)), 0) != 0;
}
#define _hlslpp_any1_ps(x) _hlslpp_any1_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_any2_ps(x) _hlslpp_any2_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_any3_ps(x) _hlslpp_any3_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_any4_ps(x) _hlslpp_any4_epi32(vreinterpretq_u32_f32(x))

hlslpp_inline bool _hlslpp_all2_ps(n128 x)
{
uint32x2_t low = vget_low_u32(vreinterpretq_u32_f32(x));
return vget_lane_u32(vpmin_u32(low, low), 0) != 0;
}

hlslpp_inline bool _hlslpp_all3_ps(n128 x)
{
uint32x4_t maskw = vsetq_lane_u32(0xffffffff, vreinterpretq_u32_f32(x), 3); // set w to maximum value
#if defined(HLSLPP_ARM64)
return vminvq_u32(maskw) != 0;
#else
uint32x2_t minlohi = vpmin_u32(vget_low_u32(maskw), vget_high_u32(maskw));
return vget_lane_u32(vpmin_u32(minlohi, minlohi), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_all4_ps(n128 x)
{
#if defined(HLSLPP_ARM64)
return vminvq_u32(vreinterpretq_u32_f32(x)) != 0;
#else
uint32x2_t minlohi = vpmin_u32(vget_low_u32(vreinterpretq_u32_f32(x)), vget_high_u32(vreinterpretq_u32_f32(x)));
return vget_lane_u32(vpmin_u32(minlohi, minlohi), 0) != 0;
#endif
}
#define _hlslpp_all1_ps(x) _hlslpp_all1_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_all2_ps(x) _hlslpp_all2_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_all3_ps(x) _hlslpp_all3_epi32(vreinterpretq_u32_f32(x))
#define _hlslpp_all4_ps(x) _hlslpp_all4_epi32(vreinterpretq_u32_f32(x))

//--------
// Storing
Expand Down Expand Up @@ -593,6 +539,70 @@ hlslpp_inline void _hlslpp_load4x4_ps(float* p, n128& x0, n128& x1, n128& x2, n1
#define _hlslpp_sllv_epi32(x, y) vshlq_s32((x), (y))
#define _hlslpp_srlv_epi32(x, y) vshlq_s32((x), _hlslpp_neg_epi32(y))

hlslpp_inline bool _hlslpp_any1_epi32(n128i x)
{
return vget_lane_u32(vget_low_u32(x), 0) != 0;
}

hlslpp_inline bool _hlslpp_any2_epi32(n128i x)
{
uint32x2_t low = vget_low_u32(x);
return vget_lane_u32(vpmax_u32(low, low), 0) != 0;
}

hlslpp_inline bool _hlslpp_any3_epi32(n128i x)
{
uint32x4_t maskw = vsetq_lane_u32(0, x, 3); // set w to minimum value
#if defined(HLSLPP_ARM64)
return vmaxvq_u32(maskw) != 0;
#else
uint32x2_t maxlohi = vorr_u32(vget_low_u32(maskw), vget_high_u32(maskw));
return vget_lane_u32(vpmax_u32(maxlohi, maxlohi), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_any4_epi32(n128i x)
{
#if defined(HLSLPP_ARM64)
return vmaxvq_u32(x) != 0;
#else
uint32x2_t tmp = vorr_u32(vget_low_u32(x), vget_high_u32(x));
return vget_lane_u32(vpmax_u32(tmp, tmp), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_all1_epi32(n128i x)
{
return vget_lane_u32(vget_low_u32(x), 0) != 0;
}

hlslpp_inline bool _hlslpp_all2_epi32(n128i x)
{
uint32x2_t low = vget_low_u32(x);
return vget_lane_u32(vpmin_u32(low, low), 0) != 0;
}

hlslpp_inline bool _hlslpp_all3_epi32(n128i x)
{
uint32x4_t maskw = vsetq_lane_u32(0xffffffff, x, 3); // set w to maximum value
#if defined(HLSLPP_ARM64)
return vminvq_u32(maskw) != 0;
#else
uint32x2_t minlohi = vpmin_u32(vget_low_u32(maskw), vget_high_u32(maskw));
return vget_lane_u32(vpmin_u32(minlohi, minlohi), 0) != 0;
#endif
}

hlslpp_inline bool _hlslpp_all4_epi32(n128i x)
{
#if defined(HLSLPP_ARM64)
return vminvq_u32(x) != 0;
#else
uint32x2_t minlohi = vpmin_u32(vget_low_u32(x), vget_high_u32(x));
return vget_lane_u32(vpmin_u32(minlohi, minlohi), 0) != 0;
#endif
}

//-----------------
// Unsigned Integer
//-----------------
Expand Down Expand Up @@ -635,6 +645,16 @@ hlslpp_inline void _hlslpp_load4x4_ps(float* p, n128& x0, n128& x1, n128& x2, n1
#define _hlslpp_sllv_epu32(x, y) vshlq_u32((x), (y))
#define _hlslpp_srlv_epu32(x, y) vshlq_u32((x), _hlslpp_neg_epi32(y))

#define _hlslpp_any1_epu32(x) _hlslpp_any1_epi32(x)
#define _hlslpp_any2_epu32(x) _hlslpp_any2_epi32(x)
#define _hlslpp_any3_epu32(x) _hlslpp_any3_epi32(x)
#define _hlslpp_any4_epu32(x) _hlslpp_any4_epi32(x)

#define _hlslpp_all1_epu32(x) _hlslpp_all1_epi32(x)
#define _hlslpp_all2_epu32(x) _hlslpp_all2_epi32(x)
#define _hlslpp_all3_epu32(x) _hlslpp_all3_epi32(x)
#define _hlslpp_all4_epu32(x) _hlslpp_all4_epi32(x)

#if defined(HLSLPP_DOUBLE)

//-------
Expand Down
21 changes: 20 additions & 1 deletion include/platforms/hlsl++_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,16 @@ namespace hlslpp
return vector_int4(v1.x >> v2.x, v1.y >> v2.y, v1.z >> v2.z, v1.w >> v2.w);
}

hlslpp_inline bool _hlslpp_any1_epi32(const vector_int4& v) { return v.x != 0; }
hlslpp_inline bool _hlslpp_any2_epi32(const vector_int4& v) { return v.x != 0 || v.y != 0; }
hlslpp_inline bool _hlslpp_any3_epi32(const vector_int4& v) { return v.x != 0 || v.y != 0 || v.z != 0; }
hlslpp_inline bool _hlslpp_any4_epi32(const vector_int4& v) { return v.x != 0 || v.y != 0 || v.z != 0 || v.w != 0; }

hlslpp_inline bool _hlslpp_all1_epi32(const vector_int4& v) { return v.x != 0; }
hlslpp_inline bool _hlslpp_all2_epi32(const vector_int4& v) { return v.x != 0 && v.y != 0; }
hlslpp_inline bool _hlslpp_all3_epi32(const vector_int4& v) { return v.x != 0 && v.y != 0 && v.z != 0; }
hlslpp_inline bool _hlslpp_all4_epi32(const vector_int4& v) { return v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0; }

//-----------------
// Unsigned Integer
//-----------------
Expand Down Expand Up @@ -782,6 +792,16 @@ namespace hlslpp
return vector_uint4(v1.x >> v2.x, v1.y >> v2.y, v1.z >> v2.z, v1.w >> v2.w);
}

hlslpp_inline bool _hlslpp_any1_epu32(const vector_uint4& v) { return v.x != 0; }
hlslpp_inline bool _hlslpp_any2_epu32(const vector_uint4& v) { return v.x != 0 || v.y != 0; }
hlslpp_inline bool _hlslpp_any3_epu32(const vector_uint4& v) { return v.x != 0 || v.y != 0 || v.z != 0; }
hlslpp_inline bool _hlslpp_any4_epu32(const vector_uint4& v) { return v.x != 0 || v.y != 0 || v.z != 0 || v.w != 0; }

hlslpp_inline bool _hlslpp_all1_epu32(const vector_uint4& v) { return v.x != 0; }
hlslpp_inline bool _hlslpp_all2_epu32(const vector_uint4& v) { return v.x != 0 && v.y != 0; }
hlslpp_inline bool _hlslpp_all3_epu32(const vector_uint4& v) { return v.x != 0 && v.y != 0 && v.z != 0; }
hlslpp_inline bool _hlslpp_all4_epu32(const vector_uint4& v) { return v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0; }

//--------
// Storing
//--------
Expand All @@ -806,7 +826,6 @@ namespace hlslpp
p[12] = v4.x; p[13] = v4.y; p[14] = v4.z; p[15] = v4.w;
}


hlslpp_inline void _hlslpp_load1_ps(float* p, n128& v) { v.x = p[0]; }
hlslpp_inline void _hlslpp_load2_ps(float* p, n128& v) { v.x = p[0]; v.y = p[1]; }
hlslpp_inline void _hlslpp_load3_ps(float* p, n128& v) { v.x = p[0]; v.y = p[1]; v.z = p[2]; }
Expand Down
50 changes: 50 additions & 0 deletions include/platforms/hlsl++_sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,46 @@ inline n128i _hlslpp_srlv_epi32(n128i x, n128i count)

#endif

hlslpp_inline bool _hlslpp_any1_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xf) != 0xf;
}

hlslpp_inline bool _hlslpp_any2_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xff) != 0xff;
}

hlslpp_inline bool _hlslpp_any3_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xfff) != 0xfff;
}

hlslpp_inline bool _hlslpp_any4_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xffff) != 0xffff;
}

hlslpp_inline bool _hlslpp_all1_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xf) == 0;
}

hlslpp_inline bool _hlslpp_all2_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xff) == 0;
}

hlslpp_inline bool _hlslpp_all3_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xfff) == 0;
}

hlslpp_inline bool _hlslpp_all4_epi32(n128i x)
{
return (_mm_movemask_epi8(_mm_cmpeq_epi32(x, _mm_setzero_si128())) & 0xffff) == 0;
}

//------------
// Integer 256
//------------
Expand Down Expand Up @@ -934,6 +974,16 @@ hlslpp_inline n128i _hlslpp_min_epu32(n128u x, n128u y)
#define _hlslpp_sllv_epu32(x, y) _hlslpp_sllv_epi32((x), (y))
#define _hlslpp_srlv_epu32(x, y) _hlslpp_srlv_epi32((x), (y))

#define _hlslpp_any1_epu32(x) _hlslpp_any1_epi32(x)
#define _hlslpp_any2_epu32(x) _hlslpp_any2_epi32(x)
#define _hlslpp_any3_epu32(x) _hlslpp_any3_epi32(x)
#define _hlslpp_any4_epu32(x) _hlslpp_any4_epi32(x)

#define _hlslpp_all1_epu32(x) _hlslpp_all1_epi32(x)
#define _hlslpp_all2_epu32(x) _hlslpp_all2_epi32(x)
#define _hlslpp_all3_epu32(x) _hlslpp_all3_epi32(x)
#define _hlslpp_all4_epu32(x) _hlslpp_all4_epi32(x)

//-------
// Double
//-------
Expand Down
20 changes: 20 additions & 0 deletions src/hlsl++_unit_tests_vector_int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,26 @@ void RunUnitTestsVectorInt()
ivshr_3 = ivfoo3 >> ivshift_value_1; eq(ivshr_3, shift_right(ivfoo3.x, ivshift_value_1.x), shift_right(ivfoo3.y, ivshift_value_1.x), shift_right(ivfoo3.z, ivshift_value_1.x));
ivshr_4 = ivfoo4 >> ivshift_value_1; eq(ivshr_4, shift_right(ivfoo4.x, ivshift_value_1.x), shift_right(ivfoo4.y, ivshift_value_1.x), shift_right(ivfoo4.z, ivshift_value_1.x), shift_right(ivfoo4.w, ivshift_value_1.x));

bool ivall1 = all(ivfoo1); eq(ivall1, ivfoo1.x != 0);
bool ivall2 = all(ivfoo2); eq(ivall2, ivfoo2.x != 0 && ivfoo2.y != 0);
bool ivall3 = all(ivfoo3); eq(ivall3, ivfoo3.x != 0 && ivfoo3.y != 0 && ivfoo3.z != 0);
bool ivall4 = all(ivfoo4); eq(ivall4, ivfoo4.x != 0 && ivfoo4.y != 0 && ivfoo4.z != 0 && ivfoo4.w != 0);

bool ivall_swiz_1 = all(ivfoo1.r); eq(ivall_swiz_1, ivfoo1.r != 0);
bool ivall_swiz_2 = all(ivfoo2.yx); eq(ivall_swiz_2, ivfoo2.y != 0 && ivfoo2.x != 0);
bool ivall_swiz_3 = all(ivfoo3.bgr); eq(ivall_swiz_3, ivfoo3.b != 0 && ivfoo3.g != 0 && ivfoo3.r != 0);
bool ivall_swiz_4 = all(ivfoo4.wwww); eq(ivall_swiz_4, ivfoo4.w != 0);

bool ivany1 = any(ivfoo1); eq(ivany1, ivfoo1.x != 0);
bool ivany2 = any(ivfoo2); eq(ivany2, ivfoo2.x != 0 || ivfoo2.y != 0);
bool ivany3 = any(ivfoo3); eq(ivany3, ivfoo3.x != 0 || ivfoo3.y != 0 || ivfoo3.z != 0);
bool ivany4 = any(ivfoo4); eq(ivany4, ivfoo4.x != 0 || ivfoo4.y != 0 || ivfoo4.z != 0 || ivfoo4.w != 0);

bool ivany_swiz_1 = any(ivfoo1.r); eq(ivany_swiz_1, ivfoo1.x != 0);
bool ivany_swiz_2 = any(ivfoo2.yx); eq(ivany_swiz_2, ivfoo2.y != 0 || ivfoo2.x != 0);
bool ivany_swiz_3 = any(ivfoo3.bgr); eq(ivany_swiz_3, ivfoo3.b != 0 || ivfoo3.g != 0 || ivfoo3.r != 0);
bool ivany_swiz_4 = any(ivfoo4.wwww); eq(ivany_swiz_4, ivfoo4.w != 0);

// int4 sat4i = saturate(b);
//
// int3 c = a + b.zzw;
Expand Down

0 comments on commit 0b370de

Please sign in to comment.