diff --git a/benchmarks/bench-qsort-common.h b/benchmarks/bench-qsort-common.h index fe0decf7..87fba479 100644 --- a/benchmarks/bench-qsort-common.h +++ b/benchmarks/bench-qsort-common.h @@ -8,4 +8,26 @@ #include "avx512-32bit-qsort.hpp" #include "avx512-64bit-qsort.hpp" +#define MY_BENCHMARK_CAPTURE(func, T, test_case_name, ...) \ + BENCHMARK_PRIVATE_DECLARE(func) \ + = (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #func "/" #test_case_name "/" #T, \ + [](::benchmark::State &st) { \ + func(st, __VA_ARGS__); \ + }))) + +#define BENCH(func, type) \ + MY_BENCHMARK_CAPTURE( \ + func, type, random_10000, 10000, std::string("random")); \ + MY_BENCHMARK_CAPTURE( \ + func, type, random_100000, 100000, std::string("random")); \ + MY_BENCHMARK_CAPTURE( \ + func, type, sorted_10000, 10000, std::string("sorted")); \ + MY_BENCHMARK_CAPTURE( \ + func, type, constant_10000, 10000, std::string("constant")); \ + MY_BENCHMARK_CAPTURE( \ + func, type, reverse_10000, 10000, std::string("reverse")); + + #endif diff --git a/benchmarks/bench_qsort.hpp b/benchmarks/bench_qsort.hpp index 6659fdae..0f9c3c48 100644 --- a/benchmarks/bench_qsort.hpp +++ b/benchmarks/bench_qsort.hpp @@ -1,68 +1,94 @@ #include "bench-qsort-common.h" -template -static void avx512_qsort(benchmark::State& state) { - if (!cpu_has_avx512bw()) { - state.SkipWithMessage("Requires AVX512 BW ISA"); - } - if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { - state.SkipWithMessage("Requires AVX512 VBMI2 ISA"); - } +template +static void stdsort(benchmark::State &state, Args &&...args) +{ + auto args_tuple = std::make_tuple(std::move(args)...); // Perform setup here - size_t ARRSIZE = state.range(0); + size_t ARRSIZE = std::get<0>(args_tuple); std::vector arr; std::vector arr_bkp; - /* Initialize elements */ - arr = get_uniform_rand_array(ARRSIZE); + std::string arrtype = std::get<1>(args_tuple); + if (arrtype == "random") { arr = get_uniform_rand_array(ARRSIZE); } + else if (arrtype == "sorted") { + arr = get_uniform_rand_array(ARRSIZE); + std::sort(arr.begin(), arr.end()); + } + else if (arrtype == "constant") { + T temp = get_uniform_rand_array(1)[0]; + for (size_t ii = 0; ii < ARRSIZE; ++ii) { + arr.push_back(temp); + } + } + else if (arrtype == "reverse") { + arr = get_uniform_rand_array(ARRSIZE); + std::sort(arr.begin(), arr.end()); + std::reverse(arr.begin(), arr.end()); + } arr_bkp = arr; /* call avx512 quicksort */ for (auto _ : state) { - avx512_qsort(arr.data(), ARRSIZE); + std::sort(arr.begin(), arr.end()); state.PauseTiming(); arr = arr_bkp; state.ResumeTiming(); } } -template -static void stdsort(benchmark::State& state) { +template +static void avx512qsort(benchmark::State &state, Args &&...args) +{ + auto args_tuple = std::make_tuple(std::move(args)...); + if (!cpu_has_avx512bw()) { + state.SkipWithMessage("Requires AVX512 BW ISA"); + } + if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { + state.SkipWithMessage("Requires AVX512 VBMI2"); + } // Perform setup here - size_t ARRSIZE = state.range(0); + size_t ARRSIZE = std::get<0>(args_tuple); std::vector arr; std::vector arr_bkp; - /* Initialize elements */ - arr = get_uniform_rand_array(ARRSIZE); + std::string arrtype = std::get<1>(args_tuple); + if (arrtype == "random") { arr = get_uniform_rand_array(ARRSIZE); } + else if (arrtype == "sorted") { + arr = get_uniform_rand_array(ARRSIZE); + std::sort(arr.begin(), arr.end()); + } + else if (arrtype == "constant") { + T temp = get_uniform_rand_array(1)[0]; + for (size_t ii = 0; ii < ARRSIZE; ++ii) { + arr.push_back(temp); + } + } + else if (arrtype == "reverse") { + arr = get_uniform_rand_array(ARRSIZE); + std::sort(arr.begin(), arr.end()); + std::reverse(arr.begin(), arr.end()); + } arr_bkp = arr; - /* call std::sort */ + /* call avx512 quicksort */ for (auto _ : state) { - std::sort(arr.begin(), arr.end()); + avx512_qsort(arr.data(), ARRSIZE); state.PauseTiming(); arr = arr_bkp; state.ResumeTiming(); } } -// Register the function as a benchmark -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); - -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); +#define BENCH_ALL(type)\ + BENCH(avx512qsort, type)\ + BENCH(stdsort, type) -//BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); -BENCHMARK(avx512_qsort)->Arg(10000)->Arg(1000000); -BENCHMARK(stdsort)->Arg(10000)->Arg(1000000); +BENCH_ALL(uint64_t) +BENCH_ALL(int64_t) +BENCH_ALL(uint32_t) +BENCH_ALL(int32_t) +BENCH_ALL(uint16_t) +BENCH_ALL(int16_t) +BENCH_ALL(float) +BENCH_ALL(double)