-
Notifications
You must be signed in to change notification settings - Fork 371
[Microbenchmarks] Add benchmark for conditional scalar assignment autovec #295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,222 @@ | ||
| #include <iostream> | ||
| #include <memory> | ||
| #include <random> | ||
|
|
||
| #include "benchmark/benchmark.h" | ||
|
|
||
| #define ITERATIONS 100000 | ||
|
|
||
| template <typename T> | ||
| using CSAFunc = T (*)(T *, T *, T *, T); | ||
|
|
||
| // Find the last element in A above the given threshold, | ||
| // with default loop vectorization settings. | ||
| template <typename T> | ||
| static T run_single_csa_only_autovec(T *A, T *B, T *C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A' in init_data below. | ||
| T Result = 101; | ||
| for (unsigned i = 0; i < ITERATIONS; i++) | ||
| if (A[i] > Threshold) | ||
| Result = A[i]; | ||
|
|
||
| return Result; | ||
| } | ||
|
|
||
| // Find the last element in A above the given threshold, | ||
| // with loop vectorization disabled. | ||
| template <typename T> | ||
| static T run_single_csa_only_novec(T *A, T *B, T *C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A' in init_data below. | ||
| T Result = 101; | ||
| #pragma clang loop vectorize(disable) interleave(disable) | ||
| for (unsigned i = 0; i < ITERATIONS; i++) | ||
| if (A[i] > Threshold) | ||
| Result = A[i]; | ||
|
|
||
| return Result; | ||
| } | ||
|
|
||
| // Find the last elements in A, B, and C above the given threshold, | ||
| // with default loop vectorization settings. | ||
| template <typename T> | ||
| static T run_multi_csa_only_autovec(T *A, T *B, T *C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A', 'B', and 'C' in init_data below. | ||
| T ResultA = 101; | ||
| T ResultB = 101; | ||
| T ResultC = 101; | ||
| for (unsigned i = 0; i < ITERATIONS; i++) { | ||
| if (A[i] > Threshold) | ||
| ResultA = A[i]; | ||
| if (B[i] > Threshold) | ||
| ResultB = B[i]; | ||
| if (C[i] > Threshold) | ||
| ResultC = C[i]; | ||
| } | ||
|
|
||
| return ResultA ^ ResultB ^ ResultC; | ||
| } | ||
|
|
||
| // Find the last elements in A, B, and C above the given threshold, | ||
| // with loop vectorization disabled. | ||
| template <typename T> | ||
| static T run_multi_csa_only_novec(T *A, T *B, T *C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A', 'B', and 'C' in init_data below. | ||
| T ResultA = 101; | ||
| T ResultB = 101; | ||
| T ResultC = 101; | ||
| #pragma clang loop vectorize(disable) interleave(disable) | ||
| for (unsigned i = 0; i < ITERATIONS; i++) { | ||
| if (A[i] > Threshold) | ||
| ResultA = A[i]; | ||
| if (B[i] > Threshold) | ||
| ResultB = B[i]; | ||
| if (C[i] > Threshold) | ||
| ResultC = C[i]; | ||
| } | ||
|
|
||
| return ResultA ^ ResultB ^ ResultC; | ||
| } | ||
|
|
||
| // Find the last element in A above the given threshold, | ||
| // with default loop vectorization settings. | ||
| template <typename T> | ||
| static T run_csa_with_arith_autovec(T *A, T *B, T *C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A' in init_data below. | ||
| T Result = 101; | ||
| for (unsigned i = 0; i < ITERATIONS; i++) { | ||
| // Do some work to make the difference noticeable | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you add a few more variations, like the minimal case with just a CAS and multiple independent CAS?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
| C[i] = A[i] * 13 + B[i] * 5; | ||
| if (A[i] > Threshold) | ||
| Result = A[i]; | ||
| } | ||
|
|
||
| return Result; | ||
| } | ||
|
|
||
| // Find the last element in A above the given threshold, | ||
| // with loop vectorization disabled. | ||
| template <typename T> | ||
| static T run_csa_with_arith_novec(T *A, T *B, T* C, T Threshold) { | ||
| // Pick a default value that's out of range of the uniform distribution | ||
| // created for 'A' in init_data below. | ||
| T Result = 101; | ||
| #pragma clang loop vectorize(disable) interleave(disable) | ||
| for (unsigned i = 0; i < ITERATIONS; i++) { | ||
| // Do some work to make the difference noticeable | ||
| C[i] = A[i] * 13 + B[i] * 5; | ||
| if (A[i] > Threshold) | ||
| Result = A[i]; | ||
| } | ||
|
|
||
| return Result; | ||
| } | ||
|
|
||
| // Initialize arrays A, B, and C with random numbers | ||
| template <typename T> static void init_data(T *A, T* B, T *C) { | ||
| std::uniform_int_distribution<T> dist(0, 100); | ||
| std::mt19937 rng(12345); | ||
| for (unsigned i = 0; i < ITERATIONS; i++) { | ||
| A[i] = dist(rng); | ||
| B[i] = dist(rng); | ||
| C[i] = dist(rng); | ||
| } | ||
| } | ||
|
|
||
| // Benchmark auto-vectorized version. | ||
| template <typename T> | ||
| static void __attribute__((always_inline)) | ||
| benchmark_csa_autovec(benchmark::State &state, CSAFunc<T> VecFn, | ||
| CSAFunc<T> NoVecFn, T Threshold) { | ||
| std::unique_ptr<T[]> A(new T[ITERATIONS]); | ||
| std::unique_ptr<T[]> B(new T[ITERATIONS]); | ||
| std::unique_ptr<T[]> C(new T[ITERATIONS]); | ||
| init_data(&A[0], &B[0], &C[0]); | ||
|
|
||
| #ifdef BENCH_AND_VERIFY | ||
| // Verify the vectorized and un-vectorized versions produce the same results. | ||
| { | ||
| T VecRes = VecFn(&A[0], &B[0], &C[0], Threshold); | ||
| T NoVecRes = NoVecFn(&A[0], &B[0], &C[0], Threshold); | ||
| // We're only interested in whether the conditional assignment results | ||
| // were the same. | ||
| if (VecRes != NoVecRes) { | ||
| std::cerr << "ERROR: autovec result different to scalar result; " | ||
| << VecRes << " != " << NoVecRes << "\n"; | ||
| exit(1); | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| for (auto _ : state) { | ||
| VecFn(&A[0], &B[0], &C[0], Threshold); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure this is working as expected. I think we need something like below to make sure the CAS result is used: Without a use of the result, compiler is probably able completely remove the variantst that don't have stores in the loop and also remove the unused CAS chain after inlining? |
||
| benchmark::DoNotOptimize(A); | ||
| benchmark::DoNotOptimize(B); | ||
| benchmark::DoNotOptimize(C); | ||
| benchmark::ClobberMemory(); | ||
| } | ||
| } | ||
|
|
||
| // Benchmark version with vectorization disabled. | ||
| template <typename T> | ||
| static void __attribute__((always_inline)) | ||
| benchmark_csa_novec(benchmark::State &state, CSAFunc<T> NoVecFn, T Threshold) { | ||
| std::unique_ptr<T[]> A(new T[ITERATIONS]); | ||
| std::unique_ptr<T[]> B(new T[ITERATIONS]); | ||
| std::unique_ptr<T[]> C(new T[ITERATIONS]); | ||
| init_data(&A[0], &B[0], &C[0]); | ||
|
|
||
| for (auto _ : state) { | ||
| NoVecFn(&A[0], &B[0], &C[0], Threshold); | ||
| benchmark::DoNotOptimize(A); | ||
| benchmark::DoNotOptimize(B); | ||
| benchmark::DoNotOptimize(C); | ||
| } | ||
| } | ||
|
|
||
| // Add benchmarks with and without auto-vectorization | ||
| #define ADD_BENCHMARK(ty, Threshold) \ | ||
| void BENCHMARK_single_csa_only_autovec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_autovec<ty>(state, run_single_csa_only_autovec, \ | ||
| run_single_csa_only_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_single_csa_only_autovec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); \ | ||
| \ | ||
| void BENCHMARK_single_csa_only_novec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_novec<ty>(state, run_single_csa_only_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_single_csa_only_novec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); \ | ||
| void BENCHMARK_multi_csa_only_autovec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_autovec<ty>(state, run_multi_csa_only_autovec, \ | ||
| run_multi_csa_only_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_multi_csa_only_autovec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); \ | ||
| \ | ||
| void BENCHMARK_multi_csa_only_novec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_novec<ty>(state, run_multi_csa_only_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_multi_csa_only_novec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); \ | ||
| void BENCHMARK_csa_with_arith_autovec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_autovec<ty>(state, run_csa_with_arith_autovec, \ | ||
| run_csa_with_arith_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_csa_with_arith_autovec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); \ | ||
| \ | ||
| void BENCHMARK_csa_with_arith_novec_##ty##_(benchmark::State &state) { \ | ||
| benchmark_csa_novec<ty>(state, run_csa_with_arith_novec, Threshold); \ | ||
| } \ | ||
| BENCHMARK(BENCHMARK_csa_with_arith_novec_##ty##_)->Unit( \ | ||
| benchmark::kNanosecond); | ||
|
|
||
| ADD_BENCHMARK(int32_t, 75) | ||
| ADD_BENCHMARK(uint8_t, 90) | ||
| ADD_BENCHMARK(int64_t, 60) | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was going to comment about the license header, but it seems that's not done here (looking at other files).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I wondered about that too.