Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,64 @@
cmake_minimum_required (VERSION 2.6)
cmake_minimum_required (VERSION 3.8)

set(CMAKE_CXX_FLAGS "-std=c++17 -g3 -fno-unroll-loops")
project(ZooBenchmark)

set(CMAKE_BUILD_TYPE "Release")

project(ZooBenchmark)
set(CMAKE_CXX_FLAGS "-fno-unroll-loops")

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")

if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -march=native -fno-unroll-loops")
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -march=native -fno-unroll-loops")
endif()
endif()

# Macro to set properties for Xcode targets
macro(set_xcode_properties TARGET_NAME)
if(CMAKE_GENERATOR STREQUAL Xcode)
set_target_properties(${TARGET_NAME} PROPERTIES
XCODE_ATTRIBUTE_ENABLE_AVX YES
XCODE_ATTRIBUTE_ENABLE_AVX2 YES
XCODE_ATTRIBUTE_OTHER_CPLUSPLUSFLAGS "-mavx -mavx2"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will run this on my M1 machine later on today, but i think you'll need another layer here of

if (${TARGET_PLATFORM} == X86_64) 
    // enable avx
endif()

will find the code i've used before which will do this only when AVX is actually detected on the machine, but i think this will need to be updated to work on ARM

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, doesn't XCode in M1 allow you to cross compile transparently to x86-64?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see you're assuming it will make the dual ARM+Intel binary ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be cool to try and keep the code (where possible) architecture independent, otherwise could become a pain to try and go back and wrangle it to work in the future.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keeping code as arch independent as possible seems critical if our objectives are to work everywhere. Which I think they are.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, friends, architecture independence is a primary objective. But we have the freedom to draft and experiment architecture specific.

XCODE_ATTRIBUTE_OTHER_CFLAGS "-mavx -mavx2"
)
endif()
endmacro()

include_directories(
../test/inc
../../benchmark/include
dependencies/google_benchmark/include
../inc
../junkyard/inc
../test/third_party/Catch2/single_include
)

# build google benchmark (target: benchmark)
# do not build tests of benchmarking lib
# set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type" FORCE)
add_subdirectory(dependencies/google_benchmark)

# add_subdirectory(dependencies/google_benchmark)
add_executable(
catch2Benchmark
catch2BenchmarkMain.cpp catch2Functions.cpp catch2swar-demo.cpp
atoi.cpp
egyptian.cpp
# RobinHood.benchmark.cpp
)
set_xcode_properties(catch2Benchmark)

add_executable(
zoo-google-benchmark benchmark_main.cpp cfs.cpp cfs/cfs_utility.cpp
)
set_xcode_properties(zoo-google-benchmark)

add_executable(
catch2Benchmark
catch2BenchmarkMain.cpp catch2Functions.cpp egyptian.cpp
RobinHood.benchmark.cpp
zoo-atoi-benchmark benchmark_main.cpp bm-swar.cpp atoi.cpp
)
set_xcode_properties(zoo-atoi-benchmark)

target_link_libraries(zoo-google-benchmark benchmark::benchmark)
target_link_libraries(zoo-atoi-benchmark benchmark::benchmark)
135 changes: 135 additions & 0 deletions benchmark/atoi-corpus.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "atoi.h"

#include <vector>
#include <string>
#include <cstring>
#include <random>

struct Corpus8DecimalDigits {
std::vector<int> asNumbers_;
std::string characters_;

Corpus8DecimalDigits(std::vector<int> aNs, std::string cs):
asNumbers_(aNs),
characters_(cs)
{}

template<typename G>
static auto makeCorpus(G &generator) {
auto count = 1031; // 1031 is a prime number, this helps to disable in
// practice the branch predictor, the idea is to measure the performance
// of the code under measurement, not how the the unrealistic conditions
// of microbenchmarking help/hurt the code under measurement
std::string allCharacters;
allCharacters.resize(count * 9);
std::vector<int> inputs;
std::uniform_int_distribution<> range(0, 100*1000*1000 - 1);
char *base = allCharacters.data();
for(;;) {
auto input = range(generator);
snprintf(base, 9, "%08d", input);
inputs.push_back(input);
if(--count) { break; }
base += 9;
}
return Corpus8DecimalDigits(inputs, allCharacters);
}

struct Iterator {
Corpus8DecimalDigits *thy;
int *ip;
char *cp;

Iterator &operator++() {
++ip;
cp += 9;
return *this;
}

char *operator*() {
return cp;
}

auto next() noexcept {
++(*this);
return cp != thy->characters_.data() + thy->characters_.size();
}
};

Iterator commence() {
return { this, asNumbers_.data(), characters_.data() };
}
};

#define PARSE8BYTES_CORPUS_X_LIST \
X(Lemire, parse_eight_digits_swar)\
X(Zoo, lemire_as_zoo_swar)\
X(LIBC, atoi)

struct CorpusStringLength {
std::vector<int> skips_;
std::string characters_;

CorpusStringLength(std::vector<int> &&skips, std::string &&cs):
skips_{std::move(skips)}, characters_{std::move(cs)}
{}

template<typename G>
static auto makeCorpus(G &generator) {
auto count = 1031; // see Corpus8DecimalDigits for why 1031
std::vector<int> sizes;
std::string allCharacters;
std::uniform_int_distribution<> strSize(0, 101); // again a prime
std::uniform_int_distribution<> characters(1, 255); // notice 0 excluded

while(count--) {
auto length = strSize(generator);
sizes.push_back(length);
for(auto i = length; i--; ) {
allCharacters.append(1, characters(generator));
}
allCharacters.append(1, '\0');
}
return CorpusStringLength(std::move(sizes), std::move(allCharacters));
}

struct Iterator {
int *skips, *sentinel;
char *cp;

Iterator &operator++() {
cp += *skips++;
return *this;
}

char *operator*() {
return cp;
}

auto next() noexcept {
++(*this);
return sentinel != skips;
}
};

Iterator commence() {
return {
skips_.data(), skips_.data() + skips_.size(), characters_.data()
};
}
};

#define STRLEN_CORPUS_X_LIST \
X(LIBC_STRLEN, strlen) \
X(ZOO_STRLEN, zoo::c_strLength) \
X(ZOO_NATURAL_STRLEN, zoo::c_strLength_natural) \
X(ZOO_MANUAL_STRLEN, zoo::c_strLength_manualComparison) \
X(ZOO_AVX, zoo::avx2_strlen) \
X(GENERIC_GLIBC_STRLEN, STRLEN_old)

#define X(Typename, FunctionToCall) \
struct Invoke##Typename { int operator()(const char *p) { return FunctionToCall(p); } };

PARSE8BYTES_CORPUS_X_LIST
STRLEN_CORPUS_X_LIST
#undef X
Loading