Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion benchmarks/bench-qsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,49 @@ static void simdsort(benchmark::State &state, Args &&...args)
}
}

template <typename T, class... Args>
static void scalar_revsort(benchmark::State &state, Args &&...args)
{
// Get args
auto args_tuple = std::make_tuple(std::move(args)...);
size_t arrsize = std::get<0>(args_tuple);
std::string arrtype = std::get<1>(args_tuple);
// set up array
std::vector<T> arr = get_array<T>(arrtype, arrsize);
std::vector<T> arr_bkp = arr;
// benchmark
for (auto _ : state) {
std::sort(arr.rbegin(), arr.rend());
state.PauseTiming();
arr = arr_bkp;
state.ResumeTiming();
}
}

template <typename T, class... Args>
static void simd_revsort(benchmark::State &state, Args &&...args)
{
// Get args
auto args_tuple = std::make_tuple(std::move(args)...);
size_t arrsize = std::get<0>(args_tuple);
std::string arrtype = std::get<1>(args_tuple);
// set up array
std::vector<T> arr = get_array<T>(arrtype, arrsize);
std::vector<T> arr_bkp = arr;
// benchmark
for (auto _ : state) {
x86simdsort::qsort(arr.data(), arrsize, false, true);
state.PauseTiming();
arr = arr_bkp;
state.ResumeTiming();
}
}

#define BENCH_BOTH_QSORT(type) \
BENCH_SORT(simdsort, type) \
BENCH_SORT(scalarsort, type)
BENCH_SORT(scalarsort, type) \
BENCH_SORT(simd_revsort, type) \
BENCH_SORT(scalar_revsort, type)

BENCH_BOTH_QSORT(uint64_t)
BENCH_BOTH_QSORT(int64_t)
Expand Down
14 changes: 8 additions & 6 deletions lib/x86simdsort-avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@

#define DEFINE_ALL_METHODS(type) \
template <> \
void qsort(type *arr, size_t arrsize, bool hasnan) \
void qsort(type *arr, size_t arrsize, bool hasnan, bool descending) \
{ \
avx2_qsort(arr, arrsize, hasnan); \
avx2_qsort(arr, arrsize, hasnan, descending); \
} \
template <> \
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
void qselect( \
type *arr, size_t k, size_t arrsize, bool hasnan, bool descending) \
{ \
avx2_qselect(arr, k, arrsize, hasnan); \
avx2_qselect(arr, k, arrsize, hasnan, descending); \
} \
template <> \
void partial_qsort(type *arr, size_t k, size_t arrsize, bool hasnan) \
void partial_qsort( \
type *arr, size_t k, size_t arrsize, bool hasnan, bool descending) \
{ \
avx2_partial_qsort(arr, k, arrsize, hasnan); \
avx2_partial_qsort(arr, k, arrsize, hasnan, descending); \
} \
template <> \
std::vector<size_t> argsort(type *arr, size_t arrsize, bool hasnan) \
Expand Down
40 changes: 28 additions & 12 deletions lib/x86simdsort-icl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,50 @@
namespace xss {
namespace avx512 {
template <>
void qsort(uint16_t *arr, size_t size, bool hasnan)
void qsort(uint16_t *arr, size_t size, bool hasnan, bool descending)
{
avx512_qsort(arr, size, hasnan);
avx512_qsort(arr, size, hasnan, descending);
}
template <>
void qselect(uint16_t *arr, size_t k, size_t arrsize, bool hasnan)
void qselect(uint16_t *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_qselect(arr, k, arrsize, hasnan);
avx512_qselect(arr, k, arrsize, hasnan, descending);
}
template <>
void partial_qsort(uint16_t *arr, size_t k, size_t arrsize, bool hasnan)
void partial_qsort(uint16_t *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_partial_qsort(arr, k, arrsize, hasnan);
avx512_partial_qsort(arr, k, arrsize, hasnan, descending);
}
template <>
void qsort(int16_t *arr, size_t size, bool hasnan)
void qsort(int16_t *arr, size_t size, bool hasnan, bool descending)
{
avx512_qsort(arr, size, hasnan);
avx512_qsort(arr, size, hasnan, descending);
}
template <>
void qselect(int16_t *arr, size_t k, size_t arrsize, bool hasnan)
void qselect(int16_t *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_qselect(arr, k, arrsize, hasnan);
avx512_qselect(arr, k, arrsize, hasnan, descending);
}
template <>
void partial_qsort(int16_t *arr, size_t k, size_t arrsize, bool hasnan)
void partial_qsort(int16_t *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_partial_qsort(arr, k, arrsize, hasnan);
avx512_partial_qsort(arr, k, arrsize, hasnan, descending);
}
} // namespace avx512
} // namespace xss
51 changes: 36 additions & 15 deletions lib/x86simdsort-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,26 @@ namespace xss {
namespace avx512 {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void
qsort(T *arr, size_t arrsize, bool hasnan = false, bool descending = false);
// key-value quicksort
template <typename T1, typename T2>
XSS_EXPORT_SYMBOL void
keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
qselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void qselect(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// partial sort
template <typename T>
XSS_HIDE_SYMBOL void
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void partial_qsort(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
Expand All @@ -33,19 +40,26 @@ namespace avx512 {
namespace avx2 {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void
qsort(T *arr, size_t arrsize, bool hasnan = false, bool descending = false);
// key-value quicksort
template <typename T1, typename T2>
XSS_EXPORT_SYMBOL void
keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
qselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void qselect(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// partial sort
template <typename T>
XSS_HIDE_SYMBOL void
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void partial_qsort(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
Expand All @@ -58,19 +72,26 @@ namespace avx2 {
namespace scalar {
// quicksort
template <typename T>
XSS_HIDE_SYMBOL void qsort(T *arr, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void
qsort(T *arr, size_t arrsize, bool hasnan = false, bool descending = false);
// key-value quicksort
template <typename T1, typename T2>
XSS_EXPORT_SYMBOL void
keyvalue_qsort(T1 *key, T2 *val, size_t arrsize, bool hasnan = false);
// quickselect
template <typename T>
XSS_HIDE_SYMBOL void
qselect(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void qselect(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// partial sort
template <typename T>
XSS_HIDE_SYMBOL void
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan = false);
XSS_HIDE_SYMBOL void partial_qsort(T *arr,
size_t k,
size_t arrsize,
bool hasnan = false,
bool descending = false);
// argsort
template <typename T>
XSS_HIDE_SYMBOL std::vector<size_t>
Expand Down
61 changes: 37 additions & 24 deletions lib/x86simdsort-scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

namespace xss {
namespace utils {
/* O(1) permute array in place: stolen from
* http://www.davidespataro.it/apply-a-permutation-to-a-vector */
/*
* O(1) permute array in place: stolen from
* http://www.davidespataro.it/apply-a-permutation-to-a-vector
*/
template <typename T>
void apply_permutation_in_place(T *arr, std::vector<size_t> arg)
{
Expand All @@ -21,40 +23,51 @@ namespace utils {
arg[curr] = curr;
}
}
} // namespace utils

namespace scalar {
template <typename T>
void qsort(T *arr, size_t arrsize, bool hasnan)
decltype(auto) get_cmp_func(bool hasnan, bool reverse)
{
std::function<bool(T, T)> cmp;
if (hasnan) {
std::sort(arr, arr + arrsize, compare<T, std::less<T>>());
if (reverse == true) { cmp = compare<T, std::greater<T>>(); }
else {
cmp = compare<T, std::less<T>>();
}
}
else {
std::sort(arr, arr + arrsize);
if (reverse == true) { cmp = std::greater<T>(); }
else {
cmp = std::less<T>();
}
}
return cmp;
}
} // namespace utils

namespace scalar {
template <typename T>
void qselect(T *arr, size_t k, size_t arrsize, bool hasnan)
void qsort(T *arr, size_t arrsize, bool hasnan, bool reversed)
{
if (hasnan) {
std::nth_element(
arr, arr + k, arr + arrsize, compare<T, std::less<T>>());
}
else {
std::nth_element(arr, arr + k, arr + arrsize);
}
std::sort(arr,
arr + arrsize,
xss::utils::get_cmp_func<T>(hasnan, reversed));
}

template <typename T>
void partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan)
void qselect(T *arr, size_t k, size_t arrsize, bool hasnan, bool reversed)
{
if (hasnan) {
std::partial_sort(
arr, arr + k, arr + arrsize, compare<T, std::less<T>>());
}
else {
std::partial_sort(arr, arr + k, arr + arrsize);
}
std::nth_element(arr,
arr + k,
arr + arrsize,
xss::utils::get_cmp_func<T>(hasnan, reversed));
}
template <typename T>
void
partial_qsort(T *arr, size_t k, size_t arrsize, bool hasnan, bool reversed)
{
std::partial_sort(arr,
arr + k,
arr + arrsize,
xss::utils::get_cmp_func<T>(hasnan, reversed));
}
template <typename T>
std::vector<size_t> argsort(T *arr, size_t arrsize, bool hasnan)
Expand Down
14 changes: 8 additions & 6 deletions lib/x86simdsort-skx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@

#define DEFINE_ALL_METHODS(type) \
template <> \
void qsort(type *arr, size_t arrsize, bool hasnan) \
void qsort(type *arr, size_t arrsize, bool hasnan, bool descending) \
{ \
avx512_qsort(arr, arrsize, hasnan); \
avx512_qsort(arr, arrsize, hasnan, descending); \
} \
template <> \
void qselect(type *arr, size_t k, size_t arrsize, bool hasnan) \
void qselect( \
type *arr, size_t k, size_t arrsize, bool hasnan, bool descending) \
{ \
avx512_qselect(arr, k, arrsize, hasnan); \
avx512_qselect(arr, k, arrsize, hasnan, descending); \
} \
template <> \
void partial_qsort(type *arr, size_t k, size_t arrsize, bool hasnan) \
void partial_qsort( \
type *arr, size_t k, size_t arrsize, bool hasnan, bool descending) \
{ \
avx512_partial_qsort(arr, k, arrsize, hasnan); \
avx512_partial_qsort(arr, k, arrsize, hasnan, descending); \
} \
template <> \
std::vector<size_t> argsort(type *arr, size_t arrsize, bool hasnan) \
Expand Down
29 changes: 23 additions & 6 deletions lib/x86simdsort-spr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,36 @@
namespace xss {
namespace avx512 {
template <>
void qsort(_Float16 *arr, size_t size, bool hasnan)
void qsort(_Float16 *arr, size_t size, bool hasnan, bool descending)
{
avx512_qsort(arr, size, hasnan);
if (descending) { avx512_qsort<true>(arr, size, hasnan); }
else {
avx512_qsort<false>(arr, size, hasnan);
}
}
template <>
void qselect(_Float16 *arr, size_t k, size_t arrsize, bool hasnan)
void qselect(_Float16 *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_qselect(arr, k, arrsize, hasnan);
if (descending) { avx512_qselect<true>(arr, k, arrsize, hasnan); }
else {
avx512_qselect<false>(arr, k, arrsize, hasnan);
}
}
template <>
void partial_qsort(_Float16 *arr, size_t k, size_t arrsize, bool hasnan)
void partial_qsort(_Float16 *arr,
size_t k,
size_t arrsize,
bool hasnan,
bool descending)
{
avx512_partial_qsort(arr, k, arrsize, hasnan);
if (descending) { avx512_partial_qsort<true>(arr, k, arrsize, hasnan); }
else {
avx512_partial_qsort<false>(arr, k, arrsize, hasnan);
}
}
} // namespace avx512
} // namespace xss
Loading