Skip to content

Commit

Permalink
bench: Add support for measuring CPU cycles
Browse files Browse the repository at this point in the history
This adds cycle min/max/avg to the statistics.

Supported on x86 and x86_64 (natively through rdtsc), as well as Linux
(perf syscall).
  • Loading branch information
laanwj committed Nov 22, 2016
1 parent 55b2edd commit 3532818
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 5 deletions.
4 changes: 3 additions & 1 deletion src/Makefile.bench.include
Expand Up @@ -22,7 +22,9 @@ bench_bench_bitcoin_SOURCES = \
bench/mempool_eviction.cpp \
bench/verify_script.cpp \
bench/base58.cpp \
bench/lockedpool.cpp
bench/lockedpool.cpp \
bench/perf.cpp \
bench/perf.h

nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES)

Expand Down
22 changes: 20 additions & 2 deletions src/bench/bench.cpp
Expand Up @@ -3,6 +3,7 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "bench.h"
#include "perf.h"

#include <iostream>
#include <iomanip>
Expand All @@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func)
void
BenchRunner::RunAll(double elapsedTimeForOne)
{
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n";
perf_init();
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << ","
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";

for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin();
it != benchmarks.end(); ++it) {
Expand All @@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne)
BenchFunction& func = it->second;
func(state);
}
perf_fini();
}

bool State::KeepRunning()
Expand All @@ -44,15 +48,24 @@ bool State::KeepRunning()
return true;
}
double now;
uint64_t nowCycles;
if (count == 0) {
lastTime = beginTime = now = gettimedouble();
lastCycles = beginCycles = nowCycles = perf_cpucycles();
}
else {
now = gettimedouble();
double elapsed = now - lastTime;
double elapsedOne = elapsed * countMaskInv;
if (elapsedOne < minTime) minTime = elapsedOne;
if (elapsedOne > maxTime) maxTime = elapsedOne;

// We only use relative values, so don't have to handle 64-bit wrap-around specially
nowCycles = perf_cpucycles();
uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv;
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;

if (elapsed*128 < maxElapsed) {
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
// The restart avoids including the overhead of this code in the measurement.
Expand All @@ -61,6 +74,8 @@ bool State::KeepRunning()
count = 0;
minTime = std::numeric_limits<double>::max();
maxTime = std::numeric_limits<double>::min();
minCycles = std::numeric_limits<uint64_t>::max();
maxCycles = std::numeric_limits<uint64_t>::min();
return true;
}
if (elapsed*16 < maxElapsed) {
Expand All @@ -72,6 +87,7 @@ bool State::KeepRunning()
}
}
lastTime = now;
lastCycles = nowCycles;
++count;

if (now - beginTime < maxElapsed) return true; // Keep going
Expand All @@ -80,7 +96,9 @@ bool State::KeepRunning()

// Output results
double average = (now-beginTime)/count;
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n";
int64_t averageCycles = (nowCycles-beginCycles)/count;
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << ","
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";

return false;
}
10 changes: 8 additions & 2 deletions src/bench/bench.h
Expand Up @@ -41,12 +41,18 @@ namespace benchmark {
double maxElapsed;
double beginTime;
double lastTime, minTime, maxTime, countMaskInv;
int64_t count;
int64_t countMask;
uint64_t count;
uint64_t countMask;
uint64_t beginCycles;
uint64_t lastCycles;
uint64_t minCycles;
uint64_t maxCycles;
public:
State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) {
minTime = std::numeric_limits<double>::max();
maxTime = std::numeric_limits<double>::min();
minCycles = std::numeric_limits<uint64_t>::max();
maxCycles = std::numeric_limits<uint64_t>::min();
countMask = 1;
countMaskInv = 1./(countMask + 1);
}
Expand Down
53 changes: 53 additions & 0 deletions src/bench/perf.cpp
@@ -0,0 +1,53 @@
// Copyright (c) 2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "perf.h"

#if defined(__i386__) || defined(__x86_64__)

/* These architectures support quering the cycle counter
* from user space, no need for any syscall overhead.
*/
void perf_init(void) { }
void perf_fini(void) { }

#elif defined(__linux__)

#include <unistd.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

static int fd = -1;
static struct perf_event_attr attr;

void perf_init(void)
{
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
}

void perf_fini(void)
{
if (fd != -1) {
close(fd);
}
}

uint64_t perf_cpucycles(void)
{
uint64_t result = 0;
if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) {
return 0;
}
return result;
}

#else /* Unhandled platform */

void perf_init(void) { }
void perf_fini(void) { }
uint64_t perf_cpucycles(void) { return 0; }

#endif
37 changes: 37 additions & 0 deletions src/bench/perf.h
@@ -0,0 +1,37 @@
// Copyright (c) 2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

/** Functions for measurement of CPU cycles */
#ifndef H_PERF
#define H_PERF

#include <stdint.h>

#if defined(__i386__)

static inline uint64_t perf_cpucycles(void)
{
uint64_t x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}

#elif defined(__x86_64__)

static inline uint64_t perf_cpucycles(void)
{
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
}
#else

uint64_t perf_cpucycles(void);

#endif

void perf_init(void);
void perf_fini(void);

#endif // H_PERF

0 comments on commit 3532818

Please sign in to comment.