diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index 246797a1b275e..e58bd9dfbf807 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -22,7 +22,9 @@ bench_bench_bitcoin_SOURCES = \ bench/mempool_eviction.cpp \ bench/verify_script.cpp \ bench/base58.cpp \ - bench/lockedpool.cpp + bench/lockedpool.cpp \ + bench/perf.cpp \ + bench/perf.h nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES) diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index 8942da8c74ac6..af3d152c9a4ce 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -3,6 +3,7 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" +#include "perf.h" #include #include @@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func) void BenchRunner::RunAll(double elapsedTimeForOne) { - std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n"; + perf_init(); + std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "," + << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; for (std::map::iterator it = benchmarks.begin(); it != benchmarks.end(); ++it) { @@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne) BenchFunction& func = it->second; func(state); } + perf_fini(); } bool State::KeepRunning() @@ -44,8 +48,10 @@ bool State::KeepRunning() return true; } double now; + uint64_t nowCycles; if (count == 0) { lastTime = beginTime = now = gettimedouble(); + lastCycles = beginCycles = nowCycles = perf_cpucycles(); } else { now = gettimedouble(); @@ -53,6 +59,13 @@ bool State::KeepRunning() double elapsedOne = elapsed * countMaskInv; if (elapsedOne < minTime) minTime = elapsedOne; if (elapsedOne > maxTime) maxTime = elapsedOne; + + // We only use relative values, so don't have to handle 64-bit wrap-around specially + nowCycles = perf_cpucycles(); + uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv; + if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; + if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; + if (elapsed*128 < maxElapsed) { // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. // The restart avoids including the overhead of this code in the measurement. @@ -61,6 +74,8 @@ bool State::KeepRunning() count = 0; minTime = std::numeric_limits::max(); maxTime = std::numeric_limits::min(); + minCycles = std::numeric_limits::max(); + maxCycles = std::numeric_limits::min(); return true; } if (elapsed*16 < maxElapsed) { @@ -72,6 +87,7 @@ bool State::KeepRunning() } } lastTime = now; + lastCycles = nowCycles; ++count; if (now - beginTime < maxElapsed) return true; // Keep going @@ -80,7 +96,9 @@ bool State::KeepRunning() // Output results double average = (now-beginTime)/count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n"; + int64_t averageCycles = (nowCycles-beginCycles)/count; + std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "," + << minCycles << "," << maxCycles << "," << averageCycles << "\n"; return false; } diff --git a/src/bench/bench.h b/src/bench/bench.h index f13b145aaf0c7..caf73e949b4f7 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -41,12 +41,18 @@ namespace benchmark { double maxElapsed; double beginTime; double lastTime, minTime, maxTime, countMaskInv; - int64_t count; - int64_t countMask; + uint64_t count; + uint64_t countMask; + uint64_t beginCycles; + uint64_t lastCycles; + uint64_t minCycles; + uint64_t maxCycles; public: State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { minTime = std::numeric_limits::max(); maxTime = std::numeric_limits::min(); + minCycles = std::numeric_limits::max(); + maxCycles = std::numeric_limits::min(); countMask = 1; countMaskInv = 1./(countMask + 1); } diff --git a/src/bench/perf.cpp b/src/bench/perf.cpp new file mode 100644 index 0000000000000..1f43e5d3ac6f1 --- /dev/null +++ b/src/bench/perf.cpp @@ -0,0 +1,53 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "perf.h" + +#if defined(__i386__) || defined(__x86_64__) + +/* These architectures support quering the cycle counter + * from user space, no need for any syscall overhead. + */ +void perf_init(void) { } +void perf_fini(void) { } + +#elif defined(__linux__) + +#include +#include +#include + +static int fd = -1; +static struct perf_event_attr attr; + +void perf_init(void) +{ + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); +} + +void perf_fini(void) +{ + if (fd != -1) { + close(fd); + } +} + +uint64_t perf_cpucycles(void) +{ + uint64_t result = 0; + if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) { + return 0; + } + return result; +} + +#else /* Unhandled platform */ + +void perf_init(void) { } +void perf_fini(void) { } +uint64_t perf_cpucycles(void) { return 0; } + +#endif diff --git a/src/bench/perf.h b/src/bench/perf.h new file mode 100644 index 0000000000000..681bd0c8a27e6 --- /dev/null +++ b/src/bench/perf.h @@ -0,0 +1,37 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +/** Functions for measurement of CPU cycles */ +#ifndef H_PERF +#define H_PERF + +#include + +#if defined(__i386__) + +static inline uint64_t perf_cpucycles(void) +{ + uint64_t x; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); + return x; +} + +#elif defined(__x86_64__) + +static inline uint64_t perf_cpucycles(void) +{ + uint32_t hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)lo)|(((uint64_t)hi)<<32); +} +#else + +uint64_t perf_cpucycles(void); + +#endif + +void perf_init(void); +void perf_fini(void); + +#endif // H_PERF