Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize vector algorithms dispatch #4544

Merged
merged 27 commits into from Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
eb5d503
rename functions
AlexGuteniev Mar 31, 2024
e4e5dc7
Avoid recursion in dispatchers
AlexGuteniev Mar 31, 2024
68c7ec2
inline `_Find_last_trivial`
AlexGuteniev Mar 31, 2024
5cc03a5
extract reverse copies
AlexGuteniev Mar 31, 2024
17f0a26
missing `_STD`
AlexGuteniev Mar 31, 2024
0fe3958
clang format
AlexGuteniev Mar 31, 2024
626fc8e
missing variable
AlexGuteniev Mar 31, 2024
30eb5a0
types
AlexGuteniev Mar 31, 2024
e2532d4
types this way look safer
AlexGuteniev Mar 31, 2024
1203114
template param
AlexGuteniev Mar 31, 2024
c5f8d22
correct to_address use
AlexGuteniev Mar 31, 2024
3f8857e
Verify `_Nx == 8`.
StephanTLavavej Apr 3, 2024
9693997
Update comments for renamed functions.
StephanTLavavej Apr 3, 2024
2fe0e78
`_Count_trivial` => `_Count_vectorized`
StephanTLavavej Apr 3, 2024
e1634e7
`_Find_trivial` => `_Find_vectorized`
StephanTLavavej Apr 3, 2024
7e78deb
`_Find_first_of_trivial` => `_Find_first_of_vectorized`
StephanTLavavej Apr 3, 2024
e69d491
`_Reverse_copy_trivially_copyable` => `_Reverse_copy_vectorized`
StephanTLavavej Apr 3, 2024
e6c3897
`_Reverse_trivially_swappable` => `_Reverse_vectorized`
StephanTLavavej Apr 3, 2024
a4bf8cc
`_Mismatch` => `_Mismatch_vectorized`
StephanTLavavej Apr 3, 2024
6261ae2
`_Min_element` => `_Min_element_vectorized`
StephanTLavavej Apr 3, 2024
0a9b95f
`_Max_element` => `_Max_element_vectorized`
StephanTLavavej Apr 3, 2024
193c5a4
`_Minmax_element` => `_Minmax_element_vectorized`
StephanTLavavej Apr 3, 2024
b4bfc32
`_Min(` => `_Min_vectorized(` in `<xutility>`
StephanTLavavej Apr 3, 2024
8f0d5ae
`_Max(` => `_Max_vectorized(` in `<xutility>`
StephanTLavavej Apr 3, 2024
4d92cd5
`_Minmax` => `_Minmax_vectorized`
StephanTLavavej Apr 3, 2024
f88de77
Restore and rename `__std_find_last_trivial` => `_Find_last_vectorized`
StephanTLavavej Apr 3, 2024
3326718
Handle `_WIN64` in `_Find_last_vectorized` without recursing.
StephanTLavavej Apr 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
131 changes: 57 additions & 74 deletions stl/inc/algorithm
Expand Up @@ -76,8 +76,21 @@ __declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* _First, cons
} // extern "C"

_STD_BEGIN
template <size_t _Nx>
__declspec(noalias) void _Reverse_copy_trivially_copyable(const void* _First, const void* _Last, void* _Dest) {
if constexpr (_Nx == 1) {
::__std_reverse_copy_trivially_copyable_1(_First, _Last, _Dest);
} else if constexpr (_Nx == 2) {
::__std_reverse_copy_trivially_copyable_2(_First, _Last, _Dest);
} else if constexpr (_Nx == 4) {
::__std_reverse_copy_trivially_copyable_4(_First, _Last, _Dest);
} else {
::__std_reverse_copy_trivially_copyable_8(_First, _Last, _Dest);
}
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}

template <class _Ty>
pair<_Ty*, _Ty*> __std_minmax_element(_Ty* const _First, _Ty* const _Last) noexcept {
pair<_Ty*, _Ty*> _Minmax_element(_Ty* const _First, _Ty* const _Last) noexcept {
constexpr bool _Signed = is_signed_v<_Ty>;

_Min_max_element_t _Res;
Expand All @@ -102,7 +115,7 @@ pair<_Ty*, _Ty*> __std_minmax_element(_Ty* const _First, _Ty* const _Last) noexc
}

template <class _Ty>
auto __std_minmax(_Ty* const _First, _Ty* const _Last) noexcept {
auto _Minmax(_Ty* const _First, _Ty* const _Last) noexcept {
constexpr bool _Signed = is_signed_v<_Ty>;

if constexpr (is_pointer_v<_Ty>) {
Expand Down Expand Up @@ -145,29 +158,8 @@ auto __std_minmax(_Ty* const _First, _Ty* const _Last) noexcept {
}
}

template <class _Ty, class _TVal>
_Ty* __std_find_last_trivial(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept {
if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) {
return _STD __std_find_last_trivial(_First, _Last, reinterpret_cast<uintptr_t>(_Val));
} else if constexpr (sizeof(_Ty) == 1) {
return const_cast<_Ty*>(
static_cast<const _Ty*>(::__std_find_last_trivial_1(_First, _Last, static_cast<uint8_t>(_Val))));
} else if constexpr (sizeof(_Ty) == 2) {
return const_cast<_Ty*>(
static_cast<const _Ty*>(::__std_find_last_trivial_2(_First, _Last, static_cast<uint16_t>(_Val))));
} else if constexpr (sizeof(_Ty) == 4) {
return const_cast<_Ty*>(
static_cast<const _Ty*>(::__std_find_last_trivial_4(_First, _Last, static_cast<uint32_t>(_Val))));
} else if constexpr (sizeof(_Ty) == 8) {
return const_cast<_Ty*>(
static_cast<const _Ty*>(::__std_find_last_trivial_8(_First, _Last, static_cast<uint64_t>(_Val))));
} else {
static_assert(_Always_false<_Ty>, "Unexpected size");
}
}

template <class _Ty1, class _Ty2>
_Ty1* __std_find_first_of_trivial(
_Ty1* _Find_first_of_trivial(
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(
Expand Down Expand Up @@ -578,7 +570,7 @@ namespace ranges {
const auto _First_ptr = _STD _To_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);

return static_cast<iter_difference_t<_It>>(_STD __std_count_trivial(_First_ptr, _Last_ptr, _Val));
return static_cast<iter_difference_t<_It>>(_STD _Count_trivial(_First_ptr, _Last_ptr, _Val));
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
Expand Down Expand Up @@ -674,7 +666,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI
if (!_STD _Is_constant_evaluated()) {
constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>);

const size_t _Pos = _STD __std_mismatch<_Elem_size>(
const size_t _Pos = _STD _Mismatch<_Elem_size>(
_STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast<size_t>(_ULast1 - _UFirst1));

_UFirst1 += static_cast<_Iter_diff_t<_InIt1>>(_Pos);
Expand Down Expand Up @@ -738,7 +730,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(
if (!_STD _Is_constant_evaluated()) {
constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>);

const size_t _Pos = _STD __std_mismatch<_Elem_size>(
const size_t _Pos = _STD _Mismatch<_Elem_size>(
_STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast<size_t>(_Count));

_UFirst1 += static_cast<_Iter_diff_t<_InIt1>>(_Pos);
Expand Down Expand Up @@ -2976,7 +2968,33 @@ namespace ranges {
const auto _First_ptr = _STD _To_address(_First);
const auto _Last_ptr = _First_ptr + _Count;

const auto _Result = _STD __std_find_last_trivial(_First_ptr, _Last_ptr, _Value);
using _TVal = iter_value_t<_It>;
_TVal* _Result;

if constexpr (is_pointer_v<_Ty> || is_null_pointer_v<_Ty>) {
#ifdef _WIN64
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_8(_First_ptr, _Last_ptr, reinterpret_cast<uint64_t>(_Value))));
#else
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_4(_First_ptr, _Last_ptr, reinterpret_cast<uint32_t>(_Value))));
#endif
} else if constexpr (sizeof(_TVal) == 1) {
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_1(_First_ptr, _Last_ptr, static_cast<uint8_t>(_Value))));
} else if constexpr (sizeof(_TVal) == 2) {
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_2(_First_ptr, _Last_ptr, static_cast<uint16_t>(_Value))));
} else if constexpr (sizeof(_TVal) == 4) {
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_4(_First_ptr, _Last_ptr, static_cast<uint32_t>(_Value))));
} else if constexpr (sizeof(_TVal) == 8) {
_Result = const_cast<_TVal*>(static_cast<const _TVal*>(
::__std_find_last_trivial_8(_First_ptr, _Last_ptr, static_cast<uint64_t>(_Value))));
} else {
static_assert(_Always_false<_Ty>, "Unexpected size");
}

StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
if constexpr (is_pointer_v<_It>) {
return {_Result, _Last_ptr};
} else {
Expand Down Expand Up @@ -3387,7 +3405,7 @@ _NODISCARD _CONSTEXPR20 _FwdIt1 find_first_of(
if constexpr (_Vector_alg_in_find_first_of_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated() && _ULast1 - _UFirst1 >= _Threshold_find_first_of) {
const auto _First1_ptr = _STD _To_address(_UFirst1);
const auto _Result = _STD __std_find_first_of_trivial(
const auto _Result = _STD _Find_first_of_trivial(
_First1_ptr, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), _STD _To_address(_ULast2));

if constexpr (is_pointer_v<decltype(_UFirst1)>) {
Expand Down Expand Up @@ -3490,8 +3508,7 @@ namespace ranges {
const auto _First2_ptr = _STD _To_address(_First2);
const auto _Last2_ptr = _First2_ptr + _Count2;

const auto _Result =
_STD __std_find_first_of_trivial(_First1_ptr, _Last1_ptr, _First2_ptr, _Last2_ptr);
const auto _Result = _STD _Find_first_of_trivial(_First1_ptr, _Last1_ptr, _First2_ptr, _Last2_ptr);

if constexpr (is_pointer_v<_It1>) {
return _Result;
Expand Down Expand Up @@ -4919,18 +4936,7 @@ namespace ranges {

if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) {
if (!_STD is_constant_evaluated()) {
_Elem* const _First_addr = _STD to_address(_First);
_Elem* const _Last_addr = _STD to_address(_Last);
if constexpr (_Nx == 1) {
::__std_reverse_trivially_swappable_1(_First_addr, _Last_addr);
} else if constexpr (_Nx == 2) {
::__std_reverse_trivially_swappable_2(_First_addr, _Last_addr);
} else if constexpr (_Nx == 4) {
::__std_reverse_trivially_swappable_4(_First_addr, _Last_addr);
} else {
::__std_reverse_trivially_swappable_8(_First_addr, _Last_addr);
}

_STD _Reverse_trivially_swappable<_Nx>(_STD to_address(_First), _STD to_address(_Last));
return;
}
}
Expand Down Expand Up @@ -4989,20 +4995,8 @@ _CONSTEXPR20 _OutIt reverse_copy(_BidIt _First, _BidIt _Last, _OutIt _Dest) {
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
if constexpr (_Nx == 1) {
::__std_reverse_copy_trivially_copyable_1(
_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _To_address(_UDest));
} else if constexpr (_Nx == 2) {
::__std_reverse_copy_trivially_copyable_2(
_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _To_address(_UDest));
} else if constexpr (_Nx == 4) {
::__std_reverse_copy_trivially_copyable_4(
_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _To_address(_UDest));
} else {
::__std_reverse_copy_trivially_copyable_8(
_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _To_address(_UDest));
}

_STD _Reverse_copy_trivially_copyable<_Nx>(
_STD _To_address(_UFirst), _STD _To_address(_ULast), _STD _To_address(_UDest));
_UDest += _ULast - _UFirst;
_STD _Seek_wrapped(_Dest, _UDest);
return _Dest;
Expand Down Expand Up @@ -5078,19 +5072,8 @@ namespace ranges {

if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) {
if (!_STD is_constant_evaluated()) {
_Elem* const _First_addr = _STD to_address(_First);
_Elem* const _Last_addr = _STD to_address(_Last);
_DestElem* const _Result_addr = _STD to_address(_Result);
if constexpr (_Nx == 1) {
::__std_reverse_copy_trivially_copyable_1(_First_addr, _Last_addr, _Result_addr);
} else if constexpr (_Nx == 2) {
::__std_reverse_copy_trivially_copyable_2(_First_addr, _Last_addr, _Result_addr);
} else if constexpr (_Nx == 4) {
::__std_reverse_copy_trivially_copyable_4(_First_addr, _Last_addr, _Result_addr);
} else {
::__std_reverse_copy_trivially_copyable_8(_First_addr, _Last_addr, _Result_addr);
}

_STD _Reverse_copy_trivially_copyable<_Nx>(
_STD to_address(_First), _STD to_address(_Last), _STD to_address(_Result));
_Result += _Last - _First;
return _Result;
}
Expand Down Expand Up @@ -10049,7 +10032,7 @@ constexpr pair<_FwdIt, _FwdIt> _Minmax_element_unchecked(_FwdIt _First, _FwdIt _
if constexpr (_Is_min_max_optimization_safe<_FwdIt, _Pr>) {
if (!_Is_constant_evaluated()) {
const auto _First_ptr = _STD _To_address(_First);
const auto _Result = _STD __std_minmax_element(_First_ptr, _STD _To_address(_Last));
const auto _Result = _STD _Minmax_element(_First_ptr, _STD _To_address(_Last));
if constexpr (is_pointer_v<_FwdIt>) {
return _Result;
} else {
Expand Down Expand Up @@ -10176,7 +10159,7 @@ namespace ranges {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD __std_minmax_element(_First_ptr, _Last_ptr);
const auto _Result = _STD _Minmax_element(_First_ptr, _Last_ptr);
if constexpr (is_pointer_v<_It>) {
return {_Result.first, _Result.second};
} else {
Expand Down Expand Up @@ -10254,7 +10237,7 @@ _NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pr
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Result = _STD __std_minmax(_Ilist.begin(), _Ilist.end());
const auto _Result = _STD _Minmax(_Ilist.begin(), _Ilist.end());
return {static_cast<_Ty>(_Result._Min), static_cast<_Ty>(_Result._Max)};
}
}
Expand Down Expand Up @@ -10386,7 +10369,7 @@ namespace ranges {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD __std_minmax(_First_ptr, _Last_ptr);
const auto _Result = _STD _Minmax(_First_ptr, _Last_ptr);
return {static_cast<_Vty>(_Result._Min), static_cast<_Vty>(_Result._Max)};
}
}
Expand Down