Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2083,28 +2083,28 @@ namespace {
}

static _Vec_t _H_min(const _Vec_t _Cur) noexcept {
int64x2_t _Swapped = vextq_s64(_Cur, _Cur, 1);
uint64x2_t _Mask_lt = vcltq_s64(_Swapped, _Cur);
const int64x2_t _Swapped = vextq_s64(_Cur, _Cur, 1);
const uint64x2_t _Mask_lt = vcltq_s64(_Swapped, _Cur);
return vbslq_s64(_Mask_lt, _Swapped, _Cur);
}

static _Vec_t _H_max(const _Vec_t _Cur) noexcept {
int64x2_t _Swapped = vextq_s64(_Cur, _Cur, 1);
uint64x2_t _Mask_gt = vcgtq_s64(_Swapped, _Cur);
const int64x2_t _Swapped = vextq_s64(_Cur, _Cur, 1);
const uint64x2_t _Mask_gt = vcgtq_s64(_Swapped, _Cur);
return vbslq_s64(_Mask_gt, _Swapped, _Cur);
}

static _Vec_t _H_min_u(const _Vec_t _Cur) noexcept {
const uint64x2_t _Cur_u = vreinterpretq_u64_s64(_Cur);
uint64x2_t _Swapped = vextq_u64(_Cur_u, _Cur_u, 1);
uint64x2_t _Mask_lt = vcltq_u64(_Swapped, _Cur_u);
const uint64x2_t _Cur_u = vreinterpretq_u64_s64(_Cur);
const uint64x2_t _Swapped = vextq_u64(_Cur_u, _Cur_u, 1);
const uint64x2_t _Mask_lt = vcltq_u64(_Swapped, _Cur_u);
return vreinterpretq_s64_u64(vbslq_u64(_Mask_lt, _Swapped, _Cur_u));
}

static _Vec_t _H_max_u(const _Vec_t _Cur) noexcept {
const uint64x2_t _Cur_u = vreinterpretq_u64_s64(_Cur);
uint64x2_t _Swapped = vextq_u64(_Cur_u, _Cur_u, 1);
uint64x2_t _Mask_gt = vcgtq_u64(_Swapped, _Cur_u);
const uint64x2_t _Cur_u = vreinterpretq_u64_s64(_Cur);
const uint64x2_t _Swapped = vextq_u64(_Cur_u, _Cur_u, 1);
const uint64x2_t _Mask_gt = vcgtq_u64(_Swapped, _Cur_u);
return vreinterpretq_s64_u64(vbslq_u64(_Mask_gt, _Swapped, _Cur_u));
}

Expand Down Expand Up @@ -4094,13 +4094,13 @@ namespace {
}

static uint64_t _Match_mask_eq(const uint8x16_t _Cmp_lo, const uint8x16_t _Cmp_hi) noexcept {
auto _Cmp = vreinterpretq_u64_u8(vorrq_u8(_Cmp_lo, _Cmp_hi));
const auto _Cmp = vreinterpretq_u64_u8(vorrq_u8(_Cmp_lo, _Cmp_hi));
return vgetq_lane_u64(vpaddq_u64(_Cmp, _Cmp), 0);
}

static uint64_t _Match_mask_ne(const uint8x16_t _Cmp_lo, const uint8x16_t _Cmp_hi) noexcept {
auto _Cmp = vminq_u8(_Cmp_lo, _Cmp_hi);
auto _Comb = vreinterpretq_u64_u8(vpminq_u8(_Cmp, _Cmp));
const auto _Cmp = vminq_u8(_Cmp_lo, _Cmp_hi);
const auto _Comb = vreinterpretq_u64_u8(vpminq_u8(_Cmp, _Cmp));
return vgetq_lane_u64(_Comb, 0) ^ 0xFFFF'FFFF'FFFF'FFFF;
}
};
Expand Down Expand Up @@ -4141,13 +4141,13 @@ namespace {
}

static uint64_t _Match_mask_eq(const uint16x8_t _Cmp_lo, const uint16x8_t _Cmp_hi) noexcept {
uint8x8_t _Cmp = vaddhn_u16(_Cmp_lo, _Cmp_hi);
const uint8x8_t _Cmp = vaddhn_u16(_Cmp_lo, _Cmp_hi);
return vget_lane_u64(vreinterpret_u64_u8(_Cmp), 0);
}

static uint64_t _Match_mask_ne(const uint16x8_t _Cmp_lo, const uint16x8_t _Cmp_hi) noexcept {
auto _Cmp = vminq_u16(_Cmp_lo, _Cmp_hi);
auto _Comb = vreinterpretq_u64_u16(vpminq_u16(_Cmp, _Cmp));
const auto _Cmp = vminq_u16(_Cmp_lo, _Cmp_hi);
const auto _Comb = vreinterpretq_u64_u16(vpminq_u16(_Cmp, _Cmp));
return vgetq_lane_u64(_Comb, 0) ^ 0xFFFF'FFFF'FFFF'FFFF;
}
};
Expand Down Expand Up @@ -4188,13 +4188,13 @@ namespace {
}

static uint64_t _Match_mask_eq(const uint32x4_t _Cmp_lo, const uint32x4_t _Cmp_hi) noexcept {
uint8x8_t _Cmp = vaddhn_u16(vreinterpretq_u16_u32(_Cmp_lo), vreinterpretq_u16_u32(_Cmp_hi));
const uint8x8_t _Cmp = vaddhn_u16(vreinterpretq_u16_u32(_Cmp_lo), vreinterpretq_u16_u32(_Cmp_hi));
return vget_lane_u64(vreinterpret_u64_u8(_Cmp), 0);
}

static uint64_t _Match_mask_ne(const uint32x4_t _Cmp_lo, const uint32x4_t _Cmp_hi) noexcept {
auto _Cmp = vminq_u32(_Cmp_lo, _Cmp_hi);
auto _Comb = vreinterpretq_u64_u32(vpminq_u32(_Cmp, _Cmp));
const auto _Cmp = vminq_u32(_Cmp_lo, _Cmp_hi);
const auto _Comb = vreinterpretq_u64_u32(vpminq_u32(_Cmp, _Cmp));
return vgetq_lane_u64(_Comb, 0) ^ 0xFFFF'FFFF'FFFF'FFFF;
}
};
Expand All @@ -4219,7 +4219,7 @@ namespace {
}

static uint64_t _Match_mask_eq(const uint64x2_t _Cmp_lo, const uint64x2_t _Cmp_hi) noexcept {
uint8x8_t _Cmp = vaddhn_u16(vreinterpretq_u16_u64(_Cmp_lo), vreinterpretq_u16_u64(_Cmp_hi));
const uint8x8_t _Cmp = vaddhn_u16(vreinterpretq_u16_u64(_Cmp_lo), vreinterpretq_u16_u64(_Cmp_hi));
return vget_lane_u64(vreinterpret_u64_u8(_Cmp), 0);
}

Expand Down Expand Up @@ -4390,8 +4390,8 @@ namespace {
const auto _Data_lo = _Traits::_Load_q(static_cast<const uint8_t*>(_First) + 0);
const auto _Data_hi = _Traits::_Load_q(static_cast<const uint8_t*>(_First) + 16);

auto _Comparison_lo = _Traits::_Cmp_neon_q(_Data_lo, _Comparand);
auto _Comparison_hi = _Traits::_Cmp_neon_q(_Data_hi, _Comparand);
const auto _Comparison_lo = _Traits::_Cmp_neon_q(_Data_lo, _Comparand);
const auto _Comparison_hi = _Traits::_Cmp_neon_q(_Data_hi, _Comparand);

// Use a fast check for the termination condition.
uint64_t _Any_match = 0;
Expand Down Expand Up @@ -4431,7 +4431,7 @@ namespace {
const auto _Comparand = _Traits::_Set_neon_q(_Val);
const auto _Data = _Traits::_Load_q(_First);

auto _Comparison = _Traits::_Cmp_neon_q(_Data, _Comparand);
const auto _Comparison = _Traits::_Cmp_neon_q(_Data, _Comparand);

auto _Match = _Traits::_Mask_q(_Comparison);
if constexpr (_Pred == _Predicate::_Not_equal) {
Expand All @@ -4452,7 +4452,7 @@ namespace {
const auto _Comparand = _Traits::_Set_neon(_Val);
const auto _Data = _Traits::_Load(_First);

auto _Comparison = _Traits::_Cmp_neon(_Data, _Comparand);
const auto _Comparison = _Traits::_Cmp_neon(_Data, _Comparand);

auto _Match = _Traits::_Mask(_Comparison);
if constexpr (_Pred == _Predicate::_Not_equal) {
Expand Down Expand Up @@ -5390,12 +5390,12 @@ namespace {
}

static size_t _Reduce_sse(const __m128i _Val) noexcept {
#ifdef _M_IX86
#ifdef _WIN64
return _mm_cvtsi128_si64(_Val) + _mm_extract_epi64(_Val, 1);
#else // ^^^ 64-bit / 32-bit vvv
return static_cast<uint32_t>(_mm_cvtsi128_si32(_Val))
+ static_cast<uint32_t>(_mm_extract_epi32(_Val, 2));
#else // ^^^ defined(_M_IX86) / defined(_M_X64) vvv
return _mm_cvtsi128_si64(_Val) + _mm_extract_epi64(_Val, 1);
#endif // ^^^ defined(_M_X64) ^^^
#endif // ^^^ 32-bit ^^^
}
};

Expand Down Expand Up @@ -9250,8 +9250,8 @@ namespace {
struct _Traits_8_avx : _Traits_avx {
static __m256i _Broadcast(const uint64_t _Data) noexcept {
#ifdef _WIN64
return _mm256_broadcastq_epi64(_mm_cvtsi64x_si128(_Data));
#else // ^^^ defined(_WIN64) / !defined(_WIN64), workaround, _mm_cvtsi64x_si128 does not compile vvv
return _mm256_broadcastq_epi64(_mm_cvtsi64_si128(_Data));
#else // ^^^ defined(_WIN64) / !defined(_WIN64), workaround, _mm_cvtsi64_si128 does not compile vvv
return _mm256_set1_epi64x(_Data);
#endif // ^^^ !defined(_WIN64) ^^^
}
Expand Down Expand Up @@ -9334,8 +9334,8 @@ namespace {
struct _Traits_8_sse : _Traits_sse {
static __m128i _Broadcast(const uint64_t _Data) noexcept {
#ifdef _WIN64
return _mm_shuffle_epi32(_mm_cvtsi64x_si128(_Data), _MM_SHUFFLE(1, 0, 1, 0));
#else // ^^^ defined(_WIN64) / !defined(_WIN64), workaround, _mm_cvtsi64x_si128 does not compile vvv
return _mm_shuffle_epi32(_mm_cvtsi64_si128(_Data), _MM_SHUFFLE(1, 0, 1, 0));
#else // ^^^ defined(_WIN64) / !defined(_WIN64), workaround, _mm_cvtsi64_si128 does not compile vvv
return _mm_set1_epi64x(_Data);
#endif // ^^^ !defined(_WIN64) ^^^
}
Expand Down