-
Notifications
You must be signed in to change notification settings - Fork 12
Demos of the SWAR library #69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
7b1fda3
VSCode interaction
2270ac7
Builds with update
5060743
Zooification of Lemire 8 bytes
dc03288
Benchmarks operative
thecppzoo fd782f3
Implementations tested & benchmarked with Catch2 too; refactoring of …
thecppzoo 7a8d1d6
Draft of strlen
51db77c
Strlen implemented
thecppzoo 616fedd
Improvement in strlen, SIMD strlen, improved CMake to support AVX in …
thecppzoo File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,28 +1,64 @@ | ||
| cmake_minimum_required (VERSION 2.6) | ||
| cmake_minimum_required (VERSION 3.8) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "-std=c++17 -g3 -fno-unroll-loops") | ||
| project(ZooBenchmark) | ||
|
|
||
| set(CMAKE_BUILD_TYPE "Release") | ||
|
|
||
| project(ZooBenchmark) | ||
| set(CMAKE_CXX_FLAGS "-fno-unroll-loops") | ||
|
|
||
| set(CMAKE_CXX_STANDARD 17) | ||
| set(CMAKE_BUILD_TYPE "RelWithDebInfo") | ||
|
|
||
| if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo") | ||
| if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") | ||
| string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=native -fno-unroll-loops") | ||
| string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -march=native -fno-unroll-loops") | ||
| endif() | ||
| endif() | ||
|
|
||
| # Macro to set properties for Xcode targets | ||
| macro(set_xcode_properties TARGET_NAME) | ||
| if(CMAKE_GENERATOR STREQUAL Xcode) | ||
| set_target_properties(${TARGET_NAME} PROPERTIES | ||
| XCODE_ATTRIBUTE_ENABLE_AVX YES | ||
| XCODE_ATTRIBUTE_ENABLE_AVX2 YES | ||
| XCODE_ATTRIBUTE_OTHER_CPLUSPLUSFLAGS "-mavx -mavx2" | ||
| XCODE_ATTRIBUTE_OTHER_CFLAGS "-mavx -mavx2" | ||
| ) | ||
| endif() | ||
| endmacro() | ||
|
|
||
| include_directories( | ||
| ../test/inc | ||
| ../../benchmark/include | ||
| dependencies/google_benchmark/include | ||
| ../inc | ||
| ../junkyard/inc | ||
| ../test/third_party/Catch2/single_include | ||
| ) | ||
|
|
||
| # build google benchmark (target: benchmark) | ||
| # do not build tests of benchmarking lib | ||
| # set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE) | ||
| set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE) | ||
| set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type" FORCE) | ||
| add_subdirectory(dependencies/google_benchmark) | ||
|
|
||
| # add_subdirectory(dependencies/google_benchmark) | ||
| add_executable( | ||
| catch2Benchmark | ||
| catch2BenchmarkMain.cpp catch2Functions.cpp catch2swar-demo.cpp | ||
| atoi.cpp | ||
| egyptian.cpp | ||
| # RobinHood.benchmark.cpp | ||
| ) | ||
| set_xcode_properties(catch2Benchmark) | ||
|
|
||
| add_executable( | ||
| zoo-google-benchmark benchmark_main.cpp cfs.cpp cfs/cfs_utility.cpp | ||
| ) | ||
| set_xcode_properties(zoo-google-benchmark) | ||
|
|
||
| add_executable( | ||
| catch2Benchmark | ||
| catch2BenchmarkMain.cpp catch2Functions.cpp egyptian.cpp | ||
| RobinHood.benchmark.cpp | ||
| zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp | ||
| ) | ||
| set_xcode_properties(zoo-atoi-benchmark) | ||
|
|
||
| target_link_libraries(zoo-google-benchmark benchmark::benchmark) | ||
| target_link_libraries(zoo-atoi-benchmark benchmark::benchmark) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #include "atoi.h" | ||
|
|
||
| #include <vector> | ||
| #include <string> | ||
| #include <cstring> | ||
| #include <random> | ||
|
|
||
| struct Corpus8DecimalDigits { | ||
| std::vector<int> asNumbers_; | ||
| std::string characters_; | ||
|
|
||
| Corpus8DecimalDigits(std::vector<int> aNs, std::string cs): | ||
| asNumbers_(aNs), | ||
| characters_(cs) | ||
| {} | ||
|
|
||
| template<typename G> | ||
| static auto makeCorpus(G &generator) { | ||
| auto count = 1031; // 1031 is a prime number, this helps to disable in | ||
| // practice the branch predictor, the idea is to measure the performance | ||
| // of the code under measurement, not how the the unrealistic conditions | ||
| // of microbenchmarking help/hurt the code under measurement | ||
| std::string allCharacters; | ||
| allCharacters.resize(count * 9); | ||
Scottbruceheart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| std::vector<int> inputs; | ||
| std::uniform_int_distribution<> range(0, 100*1000*1000 - 1); | ||
| char *base = allCharacters.data(); | ||
| for(;;) { | ||
| auto input = range(generator); | ||
| snprintf(base, 9, "%08d", input); | ||
| inputs.push_back(input); | ||
| if(--count) { break; } | ||
| base += 9; | ||
| } | ||
| return Corpus8DecimalDigits(inputs, allCharacters); | ||
| } | ||
|
|
||
| struct Iterator { | ||
| Corpus8DecimalDigits *thy; | ||
| int *ip; | ||
| char *cp; | ||
Scottbruceheart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| Iterator &operator++() { | ||
| ++ip; | ||
| cp += 9; | ||
Scottbruceheart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return *this; | ||
| } | ||
|
|
||
| char *operator*() { | ||
| return cp; | ||
| } | ||
|
|
||
| auto next() noexcept { | ||
| ++(*this); | ||
| return cp != thy->characters_.data() + thy->characters_.size(); | ||
| } | ||
| }; | ||
|
|
||
| Iterator commence() { | ||
| return { this, asNumbers_.data(), characters_.data() }; | ||
| } | ||
| }; | ||
|
|
||
| #define PARSE8BYTES_CORPUS_X_LIST \ | ||
| X(Lemire, parse_eight_digits_swar)\ | ||
| X(Zoo, lemire_as_zoo_swar)\ | ||
| X(LIBC, atoi) | ||
|
|
||
| struct CorpusStringLength { | ||
| std::vector<int> skips_; | ||
| std::string characters_; | ||
|
|
||
| CorpusStringLength(std::vector<int> &&skips, std::string &&cs): | ||
| skips_{std::move(skips)}, characters_{std::move(cs)} | ||
| {} | ||
|
|
||
| template<typename G> | ||
| static auto makeCorpus(G &generator) { | ||
| auto count = 1031; // see Corpus8DecimalDigits for why 1031 | ||
Scottbruceheart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| std::vector<int> sizes; | ||
| std::string allCharacters; | ||
| std::uniform_int_distribution<> strSize(0, 101); // again a prime | ||
| std::uniform_int_distribution<> characters(1, 255); // notice 0 excluded | ||
|
|
||
| while(count--) { | ||
| auto length = strSize(generator); | ||
| sizes.push_back(length); | ||
| for(auto i = length; i--; ) { | ||
| allCharacters.append(1, characters(generator)); | ||
| } | ||
| allCharacters.append(1, '\0'); | ||
| } | ||
| return CorpusStringLength(std::move(sizes), std::move(allCharacters)); | ||
| } | ||
|
|
||
| struct Iterator { | ||
| int *skips, *sentinel; | ||
| char *cp; | ||
|
|
||
| Iterator &operator++() { | ||
| cp += *skips++; | ||
| return *this; | ||
| } | ||
|
|
||
| char *operator*() { | ||
| return cp; | ||
| } | ||
|
|
||
| auto next() noexcept { | ||
| ++(*this); | ||
| return sentinel != skips; | ||
| } | ||
| }; | ||
|
|
||
| Iterator commence() { | ||
| return { | ||
| skips_.data(), skips_.data() + skips_.size(), characters_.data() | ||
| }; | ||
| } | ||
| }; | ||
|
|
||
| #define STRLEN_CORPUS_X_LIST \ | ||
| X(LIBC_STRLEN, strlen) \ | ||
| X(ZOO_STRLEN, zoo::c_strLength) \ | ||
| X(ZOO_NATURAL_STRLEN, zoo::c_strLength_natural) \ | ||
| X(ZOO_MANUAL_STRLEN, zoo::c_strLength_manualComparison) \ | ||
| X(ZOO_AVX, zoo::avx2_strlen) \ | ||
| X(GENERIC_GLIBC_STRLEN, STRLEN_old) | ||
|
|
||
| #define X(Typename, FunctionToCall) \ | ||
| struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } }; | ||
|
|
||
| PARSE8BYTES_CORPUS_X_LIST | ||
| STRLEN_CORPUS_X_LIST | ||
| #undef X | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will run this on my M1 machine later on today, but i think you'll need another layer here of
will find the code i've used before which will do this only when AVX is actually detected on the machine, but i think this will need to be updated to work on ARM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wait, doesn't XCode in M1 allow you to cross compile transparently to x86-64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I see you're assuming it will make the dual ARM+Intel binary ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be cool to try and keep the code (where possible) architecture independent, otherwise could become a pain to try and go back and wrangle it to work in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keeping code as arch independent as possible seems critical if our objectives are to work everywhere. Which I think they are.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, friends, architecture independence is a primary objective. But we have the freedom to draft and experiment architecture specific.