Skip to content

Commit

Permalink
benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
baruxu committed Jun 6, 2024
1 parent 90a8bbf commit 56f2857
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 78 deletions.
22 changes: 6 additions & 16 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,26 +85,17 @@ FIXED_64_FORCEINLINE void PreventOptimizedAway(fixed val)
}

// prevent statment reordering
template<class T>
#ifdef _MSC_VER
#pragma optimize("",off)
#elif defined(__clang__)
#pragma clang optimize off
__declspec(noinline)
#else
#pragma GCC push_options
#pragma GCC optimize ("O0")
__attribute__((noinline))
#endif
template<class T>
void run_test(T& a, T& b, std::function<void(T&, T&)>&& f)
{
f(a,b);
}
#ifdef _MSC_VER
#pragma optimize("",on)
#elif defined(__clang__)
#pragma clang optimize on
#else
#pragma GCC pop_options
#endif



#define RUN_TEST(EXPR1, EXPR2, COUNT, Min, Max) \
Expand All @@ -118,7 +109,6 @@ void run_test(T& a, T& b, std::function<void(T&, T&)>&& f)
Counter c(totals[1]);\
run_test<fixed>(operand.fa, operand.fb, [COUNT](auto& a, auto& b){ TEST_LOOP(EXPR1, EXPR2, COUNT) });\
}\
prevent_optimized_float += operand.a, prevent_optimized_fixed += operand.fa;\
}


Expand All @@ -143,7 +133,7 @@ struct TestGroup

~TestGroup()
{
printf("%16s[%6.1f, %6.1f]| %3.4lf ns | %3.4lf ns |\n",
printf("%16s|[%6.1f, %6.1f]| %3.4lf ns | %3.4lf ns |\n",
name.c_str(),(float)min, (float)max,
double(totals[1]) /count / num_batch
,double(totals[0]) / count / num_batch
Expand Down Expand Up @@ -185,7 +175,7 @@ auto benchmark = [](){
const uint64_t count1 = 0xffff'ff;
const uint64_t count2 = 0xffff'f;

printf(" arithmetic[ min, max]|fixed point| hard float|\n");
printf(" arithmetic|[ min, max]|fixed point| hard float|\n");

RUN_BASIC_TEST_GROUP("add/sub", +, -, 0xff, count1, -100, 100);

Expand Down
151 changes: 89 additions & 62 deletions benchmark/benchmark.md
Original file line number Diff line number Diff line change
@@ -1,69 +1,96 @@
## Intel Core i9-12900K 3.2GHz windows11 clang c++20
compiler options: -O2 c++20

|Arithmetic|Fixed64|Hardware Float|
|:-|-:|-:|
add/sub[-100.0, 100.0]|0.0038 ns|0.4295 ns|
mul[-100.0, 100.0]|2.6078 ns|0.9425 ns|
mul[ 0.0, 0.5]|2.9100 ns|0.9226 ns|
mul[ 0.5, 1.0]|2.6604 ns|27.1776 ns|
mul[ 1.0, 2.0]|2.6227 ns|0.8468 ns|
mul[ 2.0, 100.0]|2.6320 ns|0.8465 ns|
div[-100.0, 100.0]|1.2967 ns|2.7318 ns|
div[ 0.0, 0.5]|12.0082 ns|2.3092 ns|
div[ 0.5, 1.0]|11.9341 ns|2.3041 ns|
div[ 1.0, 2.0]|5.8836 ns|29.7369 ns|
div[ 2.0, 100.0]|1.1417 ns|2.2860 ns|
ceil[ -2.0, 2.0]|0.3916 ns|1.9973 ns|
floor[ -2.0, 2.0]|0.3870 ns|1.7243 ns|
round[ -2.0, 2.0]|0.6668 ns|6.5249 ns|
abs[ -2.0, 2.0]|0.0538 ns|0.4180 ns|
exp[ 0.0, 1.0]|3.6085 ns|2.1249 ns|
exp2[ 0.0, 1.0]|2.7666 ns|30.3101 ns|
sqrt[ 0.0, 100.0]|13.7207 ns|1.6524 ns|
sin[ -10.0, 10.0]|1.0053 ns|3.6804 ns|
cos[ -10.0, 10.0]|1.3183 ns|3.4292 ns|
tan[ -10.0, 10.0]|7.3048 ns|3.5210 ns|
asin[ -1.0, 1.0]|23.3574 ns|2.7947 ns|
acos[ -1.0, 1.0]|22.9430 ns|2.8789 ns|
atan[ 1.0, 100.0]|6.9723 ns|3.0697 ns|
atan[ 1.0, 100.0]|6.9567 ns|3.4708 ns|
## i9-12900K Win11 clang

|Arithmetic|Range|Fixed64|Hardware Float|
|:-|:-|-:|-:|
add/sub|[-100.0, 100.0]| 0.0036 ns | 0.4204 ns |
mul|[-100.0, 100.0]| 2.6165 ns | 0.9398 ns |
mul|[ 0.0, 0.5]| 2.5972 ns | 0.8383 ns |
mul|[ 0.5, 1.0]| 2.6114 ns | 26.4593 ns |
mul|[ 1.0, 2.0]| 2.5969 ns | 0.8388 ns |
mul|[ 2.0, 100.0]| 2.6048 ns | 0.8352 ns |
div|[-100.0, 100.0]| 1.4431 ns | 2.4927 ns |
div|[ 0.0, 0.5]| 11.9117 ns | 2.2843 ns |
div|[ 0.5, 1.0]| 11.8804 ns | 2.2909 ns |
div|[ 1.0, 2.0]| 5.8355 ns | 29.3308 ns |
div|[ 2.0, 100.0]| 1.4095 ns | 2.2736 ns |
ceil|[ -2.0, 2.0]| 0.3901 ns | 1.9689 ns |
floor|[ -2.0, 2.0]| 0.3854 ns | 1.7478 ns |
round|[ -2.0, 2.0]| 0.6602 ns | 6.5413 ns |
abs|[ -2.0, 2.0]| 0.0520 ns | 0.4146 ns |
exp|[ 0.0, 1.0]| 3.5605 ns | 2.1270 ns |
exp2|[ 0.0, 1.0]| 2.9802 ns | 29.8439 ns |
sqrt|[ 0.0, 100.0]| 14.2020 ns | 1.6710 ns |
sin|[ -10.0, 10.0]| 0.9885 ns | 3.8397 ns |
cos|[ -10.0, 10.0]| 1.3271 ns | 3.3826 ns |
tan|[ -10.0, 10.0]| 7.3295 ns | 3.7090 ns |
asin|[ -1.0, 1.0]| 23.2946 ns | 2.6165 ns |
acos|[ -1.0, 1.0]| 22.5743 ns | 2.9936 ns |
atan|[ 1.0, 100.0]| 6.9854 ns | 2.9925 ns |
atan|[ 1.0, 100.0]| 6.9296 ns | 2.9368 ns |

## Intel Core i9-12900K 3.2GHz windows11 MSVC c++20

|Arithmetic|Fixed64|Hardware Float|
|:-|-:|-:|
add/sub[-100.0, 100.0]| 0.2107 ns | 0.4187 ns |
mul[-100.0, 100.0]| 2.7356 ns | 0.8556 ns |
mul[ 0.0, 0.5]| 2.6802 ns | 0.9478 ns |
mul[ 0.5, 1.0]| 2.6930 ns | 0.8574 ns |
mul[ 1.0, 2.0]| 2.6725 ns | 0.8455 ns |
mul[ 2.0, 100.0]| 2.6829 ns | 0.8512 ns |
div[-100.0, 100.0]| 1.6081 ns | 3.1777 ns |
div[ 0.0, 0.5]| 13.7667 ns | 28.6761 ns |
div[ 0.5, 1.0]| 13.7861 ns | 29.7010 ns |
div[ 1.0, 2.0]| 6.2241 ns | 29.6876 ns |
div[ 2.0, 100.0]| 1.5079 ns | 2.3074 ns |
ceil[ -2.0, 2.0]| 0.4330 ns | 2.0436 ns |
floor[ -2.0, 2.0]| 0.4341 ns | 2.0402 ns |
round[ -2.0, 2.0]| 2.0854 ns | 6.4961 ns |
abs[ -2.0, 2.0]| 0.2149 ns | 0.4175 ns |
exp[ 0.0, 1.0]| 10.8654 ns | 2.0519 ns |
exp2[ 0.0, 1.0]| 9.4355 ns | 30.2611 ns |
sqrt[ 0.0, 100.0]| 10.9269 ns | 0.6271 ns |
sin[ -10.0, 10.0]| 2.3128 ns | 3.9287 ns |
cos[ -10.0, 10.0]| 2.1791 ns | 3.6846 ns |
tan[ -10.0, 10.0]| 12.0050 ns | 3.6412 ns |
asin[ -1.0, 1.0]| 19.5066 ns | 2.7914 ns |
acos[ -1.0, 1.0]| 20.2079 ns | 2.9695 ns |
atan[ 1.0, 100.0]| 17.5488 ns | 3.1872 ns |
atan[ 1.0, 100.0]| 17.1658 ns | 3.0822 ns |
## i9-12900K Win11 MSVC

|Arithmetic|Range|Fixed64|Hardware Float|
|:-|:-|-:|-:|
add/sub|[-100.0, 100.0]| 0.2122 ns | 0.4275 ns |
mul|[-100.0, 100.0]| 2.6961 ns | 0.8426 ns |
mul|[ 0.0, 0.5]| 2.6816 ns | 1.4571 ns |
mul|[ 0.5, 1.0]| 2.6845 ns | 0.8434 ns |
mul|[ 1.0, 2.0]| 2.6695 ns | 0.8452 ns |
mul|[ 2.0, 100.0]| 2.6759 ns | 0.8479 ns |
div|[-100.0, 100.0]| 1.4308 ns | 2.9633 ns |
div|[ 0.0, 0.5]| 13.7768 ns | 29.2503 ns |
div|[ 0.5, 1.0]| 13.7641 ns | 29.8869 ns |
div|[ 1.0, 2.0]| 6.2073 ns | 29.6291 ns |
div|[ 2.0, 100.0]| 1.4783 ns | 2.2897 ns |
ceil|[ -2.0, 2.0]| 0.4355 ns | 2.0431 ns |
floor|[ -2.0, 2.0]| 0.4344 ns | 2.0569 ns |
round|[ -2.0, 2.0]| 2.0903 ns | 6.5360 ns |
abs|[ -2.0, 2.0]| 0.2166 ns | 0.4172 ns |
exp|[ 0.0, 1.0]| 10.9680 ns | 2.0647 ns |
exp2|[ 0.0, 1.0]| 9.6807 ns | 30.6651 ns |
sqrt|[ 0.0, 100.0]| 11.6761 ns | 0.6751 ns |
sin|[ -10.0, 10.0]| 2.2340 ns | 3.6828 ns |
cos|[ -10.0, 10.0]| 2.1662 ns | 3.6559 ns |
tan|[ -10.0, 10.0]| 12.2920 ns | 3.5168 ns |
asin|[ -1.0, 1.0]| 19.4584 ns | 2.8595 ns |
acos|[ -1.0, 1.0]| 20.0974 ns | 2.9194 ns |
atan|[ 1.0, 100.0]| 17.3275 ns | 3.1964 ns |
atan|[ 1.0, 100.0]| 17.4871 ns | 3.2199 ns |


## M1pro MAC clang

|Arithmetic|Range|Fixed64|Hardware Float|
|:-|:-|-:|-:|
add/sub|[-100.0, 100.0]| 0.3151 ns | 0.9438 ns |
mul|[-100.0, 100.0]| 4.1129 ns | 1.2485 ns |
mul|[ 0.0, 0.5]| 4.1266 ns | 1.2507 ns |
mul|[ 0.5, 1.0]| 4.1177 ns | 1.2495 ns |
mul|[ 1.0, 2.0]| 4.1753 ns | 1.2643 ns |
mul|[ 2.0, 100.0]| 4.2052 ns | 1.2736 ns |
div|[-100.0, 100.0]| 1.7595 ns | 2.6020 ns |
div|[ 0.0, 0.5]| 12.1721 ns | 2.5135 ns |
div|[ 0.5, 1.0]| 12.3408 ns | 2.5517 ns |
div|[ 1.0, 2.0]| 6.5035 ns | 2.5493 ns |
div|[ 2.0, 100.0]| 1.7344 ns | 2.6845 ns |
ceil|[ -2.0, 2.0]| 0.5135 ns | 0.9816 ns |
floor|[ -2.0, 2.0]| 0.5155 ns | 0.9914 ns |
round|[ -2.0, 2.0]| 0.8635 ns | 0.9871 ns |
abs|[ -2.0, 2.0]| 0.3289 ns | 0.9859 ns |
exp|[ 0.0, 1.0]| 11.0078 ns | 2.2297 ns |
exp2|[ 0.0, 1.0]| 3.7830 ns | 2.1083 ns |
sqrt|[ 0.0, 100.0]| 20.1100 ns | 0.9805 ns |
sin|[ -10.0, 10.0]| 1.3006 ns | 2.2373 ns |
cos|[ -10.0, 10.0]| 1.2941 ns | 2.3953 ns |
tan|[ -10.0, 10.0]| 7.3871 ns | 2.7365 ns |
asin|[ -1.0, 1.0]| 36.1667 ns | 1.8799 ns |
acos|[ -1.0, 1.0]| 37.1528 ns | 1.9013 ns |
atan|[ 1.0, 100.0]| 8.2928 ns | 2.4087 ns |
atan|[ 1.0, 100.0]| 8.2867 ns | 2.4150 ns |


## Apple M1 pro

|Arithmetic|Fixed64|Hardware Float|
|-|-:|-:|
|Addition/Subtraction|0.215 ns|0.953 ns|
|Multiplication|4.057 ns|1.246 ns|
|Division|1.102 ns|3.144 ns|

0 comments on commit 56f2857

Please sign in to comment.