From 7c42a0dac1c5b6b173a41674080fb4f4bf46598d Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 15 Nov 2023 12:42:19 -0800 Subject: [PATCH 1/6] Add method to sort array/vector of custom objects --- lib/x86simdsort.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index 907ba43d..0a5c3efa 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -3,11 +3,28 @@ #include #include #include +#include #define XSS_EXPORT_SYMBOL __attribute__((visibility("default"))) #define XSS_HIDE_SYMBOL __attribute__((visibility("hidden"))) #define UNUSED(x) (void)(x) +template +XSS_HIDE_SYMBOL void permute_array_in_place(T *A, std::vector P) +{ + for (size_t i = 0; i < P.size(); i++) { + size_t curr = i; + size_t next = P[curr]; + while (next != i) { + std::swap(A[curr], A[next]); + P[curr] = curr; + curr = next; + next = P[next]; + } + P[curr] = curr; + } +} + namespace x86simdsort { // quicksort @@ -34,10 +51,24 @@ template XSS_EXPORT_SYMBOL std::vector argselect(T *arr, size_t k, size_t arrsize, bool hasnan = false); -// argselect +// keyvalue sort template XSS_EXPORT_SYMBOL void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); +// sort an object +template +XSS_EXPORT_SYMBOL void object_qsort(T *arr, size_t arrsize, const F key_func) +{ + using return_type_of = + typename decltype(std::function {key_func})::result_type; + std::vector keys(arrsize); + for (size_t ii = 0; ii < arrsize; ++ii) { + keys[ii] = key_func(arr[ii]); + } + std::vector arg = x86simdsort::argsort(keys.data(), arrsize); + permute_array_in_place(arr, arg); +} + } // namespace x86simdsort #endif From 8f8dd4aa03ebfe0d34b627256fbe9c6f7c173750 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 15 Nov 2023 12:42:40 -0800 Subject: [PATCH 2/6] Add benchmarks for objsort --- benchmarks/bench-all.cpp | 1 + benchmarks/bench-objsort.hpp | 98 ++++++++++++++++++++++++++++++++++++ run-bench.py | 3 ++ 3 files changed, 102 insertions(+) create mode 100644 benchmarks/bench-objsort.hpp diff --git a/benchmarks/bench-all.cpp b/benchmarks/bench-all.cpp index 4862cb9f..11ed5082 100644 --- a/benchmarks/bench-all.cpp +++ b/benchmarks/bench-all.cpp @@ -4,3 +4,4 @@ #include "bench-qselect.hpp" #include "bench-qsort.hpp" #include "bench-keyvalue.hpp" +#include "bench-objsort.hpp" diff --git a/benchmarks/bench-objsort.hpp b/benchmarks/bench-objsort.hpp new file mode 100644 index 00000000..823eb4b5 --- /dev/null +++ b/benchmarks/bench-objsort.hpp @@ -0,0 +1,98 @@ +#include +struct Point3D { + double x; + double y; + double z; + Point3D() + { + x = (double)rand() / RAND_MAX; + y = (double)rand() / RAND_MAX; + z = (double)rand() / RAND_MAX; + } + double distance() + { + return sqrt(x * x + y * y + z * z); + } +}; + +struct Point2D { + double x; + double y; + Point2D() + { + x = (double)rand() / RAND_MAX; + y = (double)rand() / RAND_MAX; + } + double distance() + { + return sqrt(x * x + y * y); + } +}; + +template +std::vector init_data(const int size) +{ + srand(42); + std::vector arr; + for (auto ii = 0; ii < size; ++ii) { + T temp; + arr.push_back(temp); + } + return arr; +} + +template +struct less_than_key { + inline bool operator()(T &p1, T &p2) + { + return (p1.distance() < p2.distance()); + } +}; + +template +static void scalarobjsort(benchmark::State &state) +{ + // set up array + std::vector arr = init_data(state.range(0)); + std::vector arr_bkp = arr; + // benchmark + for (auto _ : state) { + std::sort(arr.begin(), arr.end(), less_than_key()); + state.PauseTiming(); + arr = arr_bkp; + state.ResumeTiming(); + } +} + +template +static void simdobjsort(benchmark::State &state) +{ + // set up array + std::vector arr = init_data(state.range(0)); + std::vector arr_bkp = arr; + // benchmark + for (auto _ : state) { + x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) -> double { + return p.distance(); + }); + state.PauseTiming(); + if (!std::is_sorted(arr.begin(), arr.end(), less_than_key())) { + std::cout << "sorting failed \n"; + } + arr = arr_bkp; + state.ResumeTiming(); + } +} + +#define BENCHMARK_OBJSORT(func, T) \ + BENCHMARK_TEMPLATE(func, T) \ + ->Arg(10e2) \ + ->Arg(10e3) \ + ->Arg(10e4) \ + ->Arg(10e5) \ + ->Arg(10e6); + +BENCHMARK_OBJSORT(simdobjsort, Point2D) +BENCHMARK_OBJSORT(scalarobjsort, Point2D) +BENCHMARK_OBJSORT(simdobjsort, Point3D) +BENCHMARK_OBJSORT(scalarobjsort, Point3D) diff --git a/run-bench.py b/run-bench.py index 3ea27812..d18dfd64 100644 --- a/run-bench.py +++ b/run-bench.py @@ -37,6 +37,9 @@ elif "keyvalue" in args.benchcompare: baseline = "scalarkvsort.*" + filterb contender = "simdkvsort.*" + filterb + elif "objsort" in args.benchcompare: + baseline = "scalarobjsort.*" + filterb + contender = "simdobjsort.*" + filterb else: parser.print_help(sys.stderr) parser.error("ERROR: Unknown argument '%s'" % args.benchcompare) From 38fbbc8e22d591494f40bae8c3bc81528692123b Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 15 Nov 2023 13:39:06 -0800 Subject: [PATCH 3/6] Use the permute array in-line --- benchmarks/bench-objsort.hpp | 34 ++++++++++++++----------- lib/x86simdsort.h | 49 ++++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/benchmarks/bench-objsort.hpp b/benchmarks/bench-objsort.hpp index 823eb4b5..60463c05 100644 --- a/benchmarks/bench-objsort.hpp +++ b/benchmarks/bench-objsort.hpp @@ -1,31 +1,31 @@ #include struct Point3D { - double x; - double y; - double z; + float x; + float y; + float z; Point3D() { - x = (double)rand() / RAND_MAX; - y = (double)rand() / RAND_MAX; - z = (double)rand() / RAND_MAX; + x = (float)rand() / RAND_MAX; + y = (float)rand() / RAND_MAX; + z = (float)rand() / RAND_MAX; } - double distance() + float distance() { - return sqrt(x * x + y * y + z * z); + return x; //sqrt(x * x + y * y + z * z); } }; struct Point2D { - double x; - double y; + float x; + float y; Point2D() { - x = (double)rand() / RAND_MAX; - y = (double)rand() / RAND_MAX; + x = (float)rand() / RAND_MAX; + y = (float)rand() / RAND_MAX; } - double distance() + float distance() { - return sqrt(x * x + y * y); + return x; //sqrt(x * x + y * y); } }; @@ -54,6 +54,8 @@ static void scalarobjsort(benchmark::State &state) { // set up array std::vector arr = init_data(state.range(0)); + //std::sort(arr.begin(), arr.end(), less_than_key()); + //std::reverse(arr.begin(), arr.end()); std::vector arr_bkp = arr; // benchmark for (auto _ : state) { @@ -69,10 +71,12 @@ static void simdobjsort(benchmark::State &state) { // set up array std::vector arr = init_data(state.range(0)); + //std::sort(arr.begin(), arr.end(), less_than_key()); + //std::reverse(arr.begin(), arr.end()); std::vector arr_bkp = arr; // benchmark for (auto _ : state) { - x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) -> double { + x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) -> float { return p.distance(); }); state.PauseTiming(); diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index 0a5c3efa..7327c5fa 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -9,22 +9,6 @@ #define XSS_HIDE_SYMBOL __attribute__((visibility("hidden"))) #define UNUSED(x) (void)(x) -template -XSS_HIDE_SYMBOL void permute_array_in_place(T *A, std::vector P) -{ - for (size_t i = 0; i < P.size(); i++) { - size_t curr = i; - size_t next = P[curr]; - while (next != i) { - std::swap(A[curr], A[next]); - P[curr] = curr; - curr = next; - next = P[next]; - } - P[curr] = curr; - } -} - namespace x86simdsort { // quicksort @@ -57,17 +41,40 @@ XSS_EXPORT_SYMBOL void keyvalue_qsort(T1 *key, T2* val, size_t arrsize, bool hasnan = false); // sort an object -template -XSS_EXPORT_SYMBOL void object_qsort(T *arr, size_t arrsize, const F key_func) +template +XSS_EXPORT_SYMBOL void object_qsort(T *arr, size_t arrsize, Func key_func) { + /* (1) Create a vector a keys */ using return_type_of = typename decltype(std::function {key_func})::result_type; - std::vector keys(arrsize); + std::vector keys; + keys.reserve(arrsize); for (size_t ii = 0; ii < arrsize; ++ii) { keys[ii] = key_func(arr[ii]); } - std::vector arg = x86simdsort::argsort(keys.data(), arrsize); - permute_array_in_place(arr, arg); + + /* (2) Call argsort based on the keys */ + std::vector arg = argsort(keys.data(), arrsize); + + /* (3) Permute array in-place */ + std::vector done(arrsize); + for (size_t i = 0; i < arrsize; ++i) + { + if (done[i]) + { + continue; + } + done[i] = true; + size_t prev_j = i; + size_t j = arg[i]; + while (i != j) + { + std::swap(arr[prev_j], arr[j]); + done[j] = true; + prev_j = j; + j = arg[j]; + } + } } } // namespace x86simdsort From 0d6ffa9bf119898ee72aeef2cab38b1349d34cc3 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 16 Nov 2023 12:29:25 -0800 Subject: [PATCH 4/6] Use distance --- benchmarks/bench-objsort.hpp | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/benchmarks/bench-objsort.hpp b/benchmarks/bench-objsort.hpp index 60463c05..30ae8b1f 100644 --- a/benchmarks/bench-objsort.hpp +++ b/benchmarks/bench-objsort.hpp @@ -1,31 +1,31 @@ #include struct Point3D { - float x; - float y; - float z; + double x; + double y; + double z; Point3D() { - x = (float)rand() / RAND_MAX; - y = (float)rand() / RAND_MAX; - z = (float)rand() / RAND_MAX; + x = (double)rand() / RAND_MAX; + y = (double)rand() / RAND_MAX; + z = (double)rand() / RAND_MAX; } - float distance() + double distance() { - return x; //sqrt(x * x + y * y + z * z); + return std::sqrt(x * x + y * y + z * z); } }; struct Point2D { - float x; - float y; + double x; + double y; Point2D() { - x = (float)rand() / RAND_MAX; - y = (float)rand() / RAND_MAX; + x = (double)rand() / RAND_MAX; + y = (double)rand() / RAND_MAX; } - float distance() + double distance() { - return x; //sqrt(x * x + y * y); + return std::sqrt(x * x + y * y); } }; @@ -54,8 +54,6 @@ static void scalarobjsort(benchmark::State &state) { // set up array std::vector arr = init_data(state.range(0)); - //std::sort(arr.begin(), arr.end(), less_than_key()); - //std::reverse(arr.begin(), arr.end()); std::vector arr_bkp = arr; // benchmark for (auto _ : state) { @@ -71,12 +69,10 @@ static void simdobjsort(benchmark::State &state) { // set up array std::vector arr = init_data(state.range(0)); - //std::sort(arr.begin(), arr.end(), less_than_key()); - //std::reverse(arr.begin(), arr.end()); std::vector arr_bkp = arr; // benchmark for (auto _ : state) { - x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) -> float { + x86simdsort::object_qsort(arr.data(), arr.size(), [](T p) -> double { return p.distance(); }); state.PauseTiming(); @@ -90,6 +86,7 @@ static void simdobjsort(benchmark::State &state) #define BENCHMARK_OBJSORT(func, T) \ BENCHMARK_TEMPLATE(func, T) \ + ->Arg(10e1) \ ->Arg(10e2) \ ->Arg(10e3) \ ->Arg(10e4) \ From 9a92ab0efb87b800b10d54053dd762288f34653d Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Fri, 17 Nov 2023 13:56:29 -0800 Subject: [PATCH 5/6] Use key-value sort instead of argsort --- lib/x86simdsort.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index 7327c5fa..e5887bb0 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -4,6 +4,7 @@ #include #include #include +#include #define XSS_EXPORT_SYMBOL __attribute__((visibility("default"))) #define XSS_HIDE_SYMBOL __attribute__((visibility("hidden"))) @@ -53,10 +54,12 @@ XSS_EXPORT_SYMBOL void object_qsort(T *arr, size_t arrsize, Func key_func) keys[ii] = key_func(arr[ii]); } - /* (2) Call argsort based on the keys */ - std::vector arg = argsort(keys.data(), arrsize); + /* (2) Call arg based on keys using the keyvalue sort */ + std::vector arg(arrsize); + std::iota(arg.begin(), arg.end(), 0); + keyvalue_qsort(keys.data(), arg.data(), arrsize); - /* (3) Permute array in-place */ + /* (3) Permute obj array in-place */ std::vector done(arrsize); for (size_t i = 0; i < arrsize; ++i) { From fbc033e085f5298f5275b30456572d4b92a0cf4f Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Thu, 30 Nov 2023 09:49:11 -0800 Subject: [PATCH 6/6] Add more distance metrics --- benchmarks/bench-objsort.hpp | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/benchmarks/bench-objsort.hpp b/benchmarks/bench-objsort.hpp index 30ae8b1f..d2f15990 100644 --- a/benchmarks/bench-objsort.hpp +++ b/benchmarks/bench-objsort.hpp @@ -1,8 +1,16 @@ #include + +static constexpr char x[] = "x"; +static constexpr char euclidean[] = "euclidean"; +static constexpr char taxicab[] = "taxicab"; +static constexpr char chebyshev[] = "chebyshev"; + +template struct Point3D { double x; double y; double z; + static constexpr std::string_view name {val}; Point3D() { x = (double)rand() / RAND_MAX; @@ -11,21 +19,18 @@ struct Point3D { } double distance() { - return std::sqrt(x * x + y * y + z * z); - } -}; - -struct Point2D { - double x; - double y; - Point2D() - { - x = (double)rand() / RAND_MAX; - y = (double)rand() / RAND_MAX; - } - double distance() - { - return std::sqrt(x * x + y * y); + if constexpr (name == "x") { + return x; + } + else if constexpr (name == "euclidean") { + return std::sqrt(x * x + y * y + z * z); + } + else if constexpr (name == "taxicab") { + return abs(x) + abs(y) + abs(z); + } + else if constexpr (name == "chebyshev") { + return std::max(std::max(x, y), z); + } } }; @@ -93,7 +98,11 @@ static void simdobjsort(benchmark::State &state) ->Arg(10e5) \ ->Arg(10e6); -BENCHMARK_OBJSORT(simdobjsort, Point2D) -BENCHMARK_OBJSORT(scalarobjsort, Point2D) -BENCHMARK_OBJSORT(simdobjsort, Point3D) -BENCHMARK_OBJSORT(scalarobjsort, Point3D) +BENCHMARK_OBJSORT(simdobjsort, Point3D) +BENCHMARK_OBJSORT(scalarobjsort, Point3D) +BENCHMARK_OBJSORT(simdobjsort, Point3D) +BENCHMARK_OBJSORT(scalarobjsort, Point3D) +BENCHMARK_OBJSORT(simdobjsort, Point3D) +BENCHMARK_OBJSORT(scalarobjsort, Point3D) +BENCHMARK_OBJSORT(simdobjsort, Point3D) +BENCHMARK_OBJSORT(scalarobjsort, Point3D)