43 changes: 14 additions & 29 deletions third-party/benchmark/src/complexity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,12 @@ BigOFunc* FittingCurve(BigO complexity) {
return [](IterationCount n) -> double { return std::pow(n, 3); };
case oLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
return [](IterationCount n) {
return kLog2E * std::log(static_cast<double>(n));
};
return
[](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
case oNLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
return [](IterationCount n) {
return kLog2E * static_cast<double>(n) *
std::log(static_cast<double>(n));
return kLog2E * n * log(static_cast<double>(n));
};
case o1:
default:
Expand Down Expand Up @@ -77,12 +75,12 @@ std::string GetBigOString(BigO complexity) {
// given by the lambda expression.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };).
// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).

// For a deeper explanation on the algorithm logic, please refer to
// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics

LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
double sigma_gn_squared = 0.0;
Expand All @@ -107,12 +105,12 @@ LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
double rms = 0.0;
for (size_t i = 0; i < n.size(); ++i) {
double fit = result.coef * fitting_curve(n[i]);
rms += std::pow((time[i] - fit), 2);
rms += pow((time[i] - fit), 2);
}

// Normalized RMS by the mean of the observed values
double mean = sigma_time / static_cast<double>(n.size());
result.rms = std::sqrt(rms / static_cast<double>(n.size())) / mean;
double mean = sigma_time / n.size();
result.rms = sqrt(rms / n.size()) / mean;

return result;
}
Expand All @@ -124,7 +122,7 @@ LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
// - complexity : If different than oAuto, the fitting curve will stick to
// this one. If it is oAuto, it will be calculated the best
// fitting curve.
LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time, const BigO complexity) {
BM_CHECK_EQ(n.size(), time.size());
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
Expand Down Expand Up @@ -164,7 +162,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
if (reports.size() < 2) return results;

// Accumulators.
std::vector<ComplexityN> n;
std::vector<int64_t> n;
std::vector<double> real_time;
std::vector<double> cpu_time;

Expand All @@ -173,10 +171,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
BM_CHECK_GT(run.complexity_n, 0)
<< "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
real_time.push_back(run.real_accumulated_time /
static_cast<double>(run.iterations));
cpu_time.push_back(run.cpu_accumulated_time /
static_cast<double>(run.iterations));
real_time.push_back(run.real_accumulated_time / run.iterations);
cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
}

LeastSq result_cpu;
Expand All @@ -186,19 +182,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
} else {
const BigO* InitialBigO = &reports[0].complexity;
const bool use_real_time_for_initial_big_o =
reports[0].use_real_time_for_initial_big_o;
if (use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
InitialBigO = &result_real.complexity;
// The Big-O complexity for CPU time must have the same Big-O function!
}
result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO);
InitialBigO = &result_cpu.complexity;
if (!use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
}
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
}

// Drop the 'args' when reporting complexity.
Expand Down
2 changes: 1 addition & 1 deletion third-party/benchmark/src/complexity.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);

// This data structure will contain the result returned by MinimalLeastSq
// - coef : Estimated coefficient for the high-order term as
// - coef : Estimated coeficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
Expand Down
34 changes: 7 additions & 27 deletions third-party/benchmark/src/console_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

namespace benchmark {

BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
Expand All @@ -42,22 +41,17 @@ bool ConsoleReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);

#ifdef BENCHMARK_OS_WINDOWS
if ((output_options_ & OO_Color)) {
auto stdOutBuf = std::cout.rdbuf();
auto outStreamBuf = GetOutputStream().rdbuf();
if (stdOutBuf != outStreamBuf) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
#endif

return true;
}

BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
std::string str =
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
Expand All @@ -75,7 +69,6 @@ void ConsoleReporter::PrintHeader(const Run& run) {
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}

BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
Expand Down Expand Up @@ -106,9 +99,6 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
}

static std::string FormatTime(double time) {
// For the time columns of the console printer 13 digits are reserved. One of
// them is a space and max two of them are the time unit (e.g ns). That puts
// us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
Expand All @@ -119,15 +109,9 @@ static std::string FormatTime(double time) {
if (time < 100.0) {
return FormatString("%10.1f", time);
}
// Assuming the time is at max 9.9999e+99 and we have 10 digits for the
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
if (time > 9999999999 /*max 10 digit number*/) {
return FormatString("%1.4e", time);
}
return FormatString("%10.0f", time);
}

BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
Expand All @@ -139,13 +123,9 @@ void ConsoleReporter::PrintRunData(const Run& result) {
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());

if (internal::SkippedWithError == result.skipped) {
if (result.error_occurred) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
} else if (internal::SkippedWithMessage == result.skipped) {
printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
result.error_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
Expand Down
4 changes: 2 additions & 2 deletions third-party/benchmark/src/counter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ double Finish(Counter const& c, IterationCount iterations, double cpu_time,
v /= num_threads;
}
if (c.flags & Counter::kIsIterationInvariant) {
v *= static_cast<double>(iterations);
v *= iterations;
}
if (c.flags & Counter::kAvgIterations) {
v /= static_cast<double>(iterations);
v /= iterations;
}

if (c.flags & Counter::kInvert) { // Invert is *always* last.
Expand Down
23 changes: 6 additions & 17 deletions third-party/benchmark/src/csv_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,11 @@ std::string CsvEscape(const std::string& s) {
return '"' + tmp + '"';
}

BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}

BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();

Expand Down Expand Up @@ -105,14 +103,13 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
}
}

BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
if (run.skipped) {
if (run.error_occurred) {
Out << std::string(elements.size() - 3, ',');
Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
Out << CsvEscape(run.skip_message) << "\n";
Out << "true,";
Out << CsvEscape(run.error_message) << "\n";
return;
}

Expand All @@ -122,21 +119,13 @@ void CSVReporter::PrintRunData(const Run& run) {
}
Out << ",";

if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
Out << run.GetAdjustedRealTime() << ",";
Out << run.GetAdjustedCPUTime() << ",";
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
Out << run.real_accumulated_time << ",";
Out << run.cpu_accumulated_time << ",";
}
Out << run.GetAdjustedRealTime() << ",";
Out << run.GetAdjustedCPUTime() << ",";

// Do not print timeLabel on bigO and RMS report
if (run.report_big_o) {
Out << GetBigOString(run.complexity);
} else if (!run.report_rms &&
run.aggregate_unit != StatisticUnit::kPercentage) {
} else if (!run.report_rms) {
Out << GetTimeUnitString(run.time_unit);
}
Out << ",";
Expand Down
40 changes: 12 additions & 28 deletions third-party/benchmark/src/cycleclock.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
!defined(_M_ARM64EC)
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
Expand Down Expand Up @@ -115,7 +114,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
Expand All @@ -133,7 +132,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {

// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution (which is noticeable at
// because is provides nanosecond resolution (which is noticable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
Expand Down Expand Up @@ -174,7 +173,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__loongarch__) || defined(__csky__)
#elif defined(__loongarch__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
Expand All @@ -189,16 +188,15 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
#endif
return tsc;
#elif defined(__riscv) // RISC-V
// Use RDTIME (and RDTIMEH on riscv32).
// RDCYCLE is a privileged instruction since Linux 6.6.
// Use RDCYCLE (and RDCYCLEH on riscv32)
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
// This asm also includes the PowerPC overflow handling strategy, as above.
// Implemented in assembly because Clang insisted on branching.
asm volatile(
"rdtimeh %0\n"
"rdtime %1\n"
"rdtimeh %2\n"
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
Expand All @@ -207,31 +205,17 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
#else
uint64_t cycles;
asm volatile("rdtime %0" : "=r"(cycles));
asm volatile("rdcycle %0" : "=r"(cycles));
return cycles;
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hexagon__)
uint64_t pcycle;
asm volatile("%0 = C15:14" : "=r"(pcycle));
return static_cast<double>(pcycle);
#elif defined(__alpha__)
// Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
// integer and thus wraps every ~4s, making using it for tick counts
// unreliable beyond this time range. The real-time clock is low-precision,
// roughtly ~1ms, but it is the only option that can reasonable count
// indefinitely.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is
// available.
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is available.
#error You need to define CycleTimer for your OS and CPU
#endif
}
Expand Down
17 changes: 2 additions & 15 deletions third-party/benchmark/src/internal_macros.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_

#include "benchmark/benchmark.h"

/* Needed to detect STL */
#include <cstdlib>

Expand Down Expand Up @@ -42,19 +44,6 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
// We include windows.h which implicitly includes winapifamily.h for compatibility.
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#if defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define BENCHMARK_OS_WINDOWS_WIN32 1
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
#define BENCHMARK_OS_WINDOWS_RT 1
#endif
#endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
Expand Down Expand Up @@ -91,8 +80,6 @@
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
#elif defined(__hexagon__)
#define BENCHMARK_OS_QURT 1
#endif

#if defined(__ANDROID__) && defined(__GLIBCXX__)
Expand Down
36 changes: 16 additions & 20 deletions third-party/benchmark/src/json_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
#include "timers.h"

namespace benchmark {
namespace internal {
extern std::map<std::string, std::string>* global_context;
}

namespace {

std::string StrEscape(const std::string& s) {
Expand Down Expand Up @@ -85,6 +89,12 @@ std::string FormatKV(std::string const& key, int64_t value) {
return ss.str();
}

std::string FormatKV(std::string const& key, IterationCount value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": " << value;
return ss.str();
}

std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": ";
Expand Down Expand Up @@ -167,25 +177,15 @@ bool JSONReporter::ReportContext(const Context& context) {
}
out << "],\n";

out << indent << FormatKV("library_version", GetBenchmarkVersion());
out << ",\n";

#if defined(NDEBUG)
const char build_type[] = "release";
#else
const char build_type[] = "debug";
#endif
out << indent << FormatKV("library_build_type", build_type);
out << ",\n";

// NOTE: our json schema is not strictly tied to the library version!
out << indent << FormatKV("json_schema_version", int64_t(1));

std::map<std::string, std::string>* global_context =
internal::GetGlobalContext();

if (global_context != nullptr) {
for (const auto& kv : *global_context) {
if (internal::global_context != nullptr) {
for (const auto& kv : *internal::global_context) {
out << ",\n";
out << indent << FormatKV(kv.first, kv.second);
}
Expand Down Expand Up @@ -261,12 +261,9 @@ void JSONReporter::PrintRunData(Run const& run) {
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
if (internal::SkippedWithError == run.skipped) {
out << indent << FormatKV("error_occurred", true) << ",\n";
out << indent << FormatKV("error_message", run.skip_message) << ",\n";
} else if (internal::SkippedWithMessage == run.skipped) {
out << indent << FormatKV("skipped", true) << ",\n";
out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
if (run.error_occurred) {
out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
out << indent << FormatKV("error_message", run.error_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
Expand Down Expand Up @@ -304,8 +301,7 @@ void JSONReporter::PrintRunData(Run const& run) {
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);

auto report_if_present = [&out, &indent](const std::string& label,
int64_t val) {
auto report_if_present = [&out, &indent](const char* label, int64_t val) {
if (val != MemoryManager::TombstoneValue)
out << ",\n" << indent << FormatKV(label, val);
};
Expand Down
26 changes: 6 additions & 20 deletions third-party/benchmark/src/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@
#include <iostream>
#include <ostream>

// NOTE: this is also defined in benchmark.h but we're trying to avoid a
// dependency.
// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
#define BENCHMARK_HAS_CXX11
#endif
#include "benchmark/benchmark.h"

namespace benchmark {
namespace internal {
Expand All @@ -28,16 +23,7 @@ class LogType {
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;

// NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
// a dependency on benchmark.h from here.
#ifndef BENCHMARK_HAS_CXX11
LogType(const LogType&);
LogType& operator=(const LogType&);
#else
LogType(const LogType&) = delete;
LogType& operator=(const LogType&) = delete;
#endif
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
};

template <class Tp>
Expand All @@ -61,13 +47,13 @@ inline int& LogLevel() {
}

inline LogType& GetNullLogInstance() {
static LogType null_log(static_cast<std::ostream*>(nullptr));
return null_log;
static LogType log(nullptr);
return log;
}

inline LogType& GetErrorLogInstance() {
static LogType error_log(&std::clog);
return error_log;
static LogType log(&std::clog);
return log;
}

inline LogType& GetLogInstanceForLevel(int level) {
Expand Down
242 changes: 46 additions & 196 deletions third-party/benchmark/src/perf_counters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "perf_counters.h"

#include <cstring>
#include <memory>
#include <vector>

#if defined HAVE_LIBPFM
Expand All @@ -29,254 +28,105 @@ namespace internal {
constexpr size_t PerfCounterValues::kMaxCounters;

#if defined HAVE_LIBPFM

size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
// Create a pointer for multiple reads
const size_t bufsize = values_.size() * sizeof(values_[0]);
char* ptr = reinterpret_cast<char*>(values_.data());
size_t size = bufsize;
for (int lead : leaders) {
auto read_bytes = ::read(lead, ptr, size);
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
// Actual data bytes are all bytes minus initial padding
std::size_t data_bytes = read_bytes - sizeof(uint64_t);
// This should be very cheap since it's in hot cache
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
// Increment our counters
ptr += data_bytes;
size -= data_bytes;
} else {
int err = errno;
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
<< " " << ::strerror(err) << "\n";
return 0;
}
}
return (bufsize - size) / sizeof(uint64_t);
}

const bool PerfCounters::kSupported = true;

// Initializes libpfm only on the first call. Returns whether that single
// initialization was successful.
bool PerfCounters::Initialize() {
// Function-scope static gets initialized only once on first call.
static const bool success = []() {
return pfm_initialize() == PFM_SUCCESS;
}();
return success;
}

bool PerfCounters::IsCounterSupported(const std::string& name) {
Initialize();
perf_event_attr_t attr;
std::memset(&attr, 0, sizeof(attr));
pfm_perf_encode_arg_t arg;
std::memset(&arg, 0, sizeof(arg));
arg.attr = &attr;
const int mode = PFM_PLM3; // user mode only
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
&arg);
return (ret == PFM_SUCCESS);
}
bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }

PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
Initialize();
if (counter_names.empty()) {
return NoCounters();
}
if (counter_names.size() > PerfCounterValues::kMaxCounters) {
GetErrorLogInstance()
<< counter_names.size()
<< " counters were requested. The minimum is 1, the maximum is "
<< PerfCounterValues::kMaxCounters << "\n";
return NoCounters();
}
std::vector<int> counter_ids(counter_names.size());

// Valid counters will populate these arrays but we start empty
std::vector<std::string> valid_names;
std::vector<int> counter_ids;
std::vector<int> leader_ids;

// Resize to the maximum possible
valid_names.reserve(counter_names.size());
counter_ids.reserve(counter_names.size());

const int kCounterMode = PFM_PLM3; // user mode only

// Group leads will be assigned on demand. The idea is that once we cannot
// create a counter descriptor, the reason is that this group has maxed out
// so we set the group_id again to -1 and retry - giving the algorithm a
// chance to create a new group leader to hold the next set of counters.
int group_id = -1;

// Loop through all performance counters
const int mode = PFM_PLM3; // user mode only
for (size_t i = 0; i < counter_names.size(); ++i) {
// we are about to push into the valid names vector
// check if we did not reach the maximum
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
// Log a message if we maxed out and stop adding
GetErrorLogInstance()
<< counter_names.size() << " counters were requested. The maximum is "
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
<< " were already added. All remaining counters will be ignored\n";
// stop the loop and return what we have already
break;
}

// Check if this name is empty
const bool is_first = i == 0;
struct perf_event_attr attr {};
attr.size = sizeof(attr);
const int group_id = !is_first ? counter_ids[0] : -1;
const auto& name = counter_names[i];
if (name.empty()) {
GetErrorLogInstance()
<< "A performance counter name was the empty string\n";
continue;
GetErrorLogInstance() << "A counter name was the empty string\n";
return NoCounters();
}

// Here first means first in group, ie the group leader
const bool is_first = (group_id < 0);

// This struct will be populated by libpfm from the counter string
// and then fed into the syscall perf_event_open
struct perf_event_attr attr {};
attr.size = sizeof(attr);

// This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
PFM_OS_PERF_EVENT, &arg);

const int pfm_get =
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance()
<< "Unknown performance counter name: " << name << "\n";
continue;
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
return NoCounters();
}

// We then proceed to populate the remaining fields in our attribute struct
// Note: the man page for perf_event_create suggests inherit = true and
attr.disabled = is_first;
// Note: the man page for perf_event_create suggests inerit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;

// Read all counters in a group in one read.
// Read all counters in one read.
attr.read_format = PERF_FORMAT_GROUP;

int id = -1;
while (id < 0) {
static constexpr size_t kNrOfSyscallRetries = 5;
// Retry syscall as it was interrupted often (b/64774091).
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
++num_retries) {
id = perf_event_open(&attr, 0, -1, group_id, 0);
if (id >= 0 || errno != EINTR) {
break;
}
}
if (id < 0) {
// If the file descriptor is negative we might have reached a limit
// in the current group. Set the group_id to -1 and retry
if (group_id >= 0) {
// Create a new group
group_id = -1;
} else {
// At this point we have already retried to set a new group id and
// failed. We then give up.
break;
}
static constexpr size_t kNrOfSyscallRetries = 5;
// Retry syscall as it was interrupted often (b/64774091).
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
++num_retries) {
id = perf_event_open(&attr, 0, -1, group_id, 0);
if (id >= 0 || errno != EINTR) {
break;
}
}

// We failed to get a new file descriptor. We might have reached a hard
// hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
"for performance counter "
<< name << ". Ignoring\n";

// We give up on this counter but try to keep going
// as the others would be fine
continue;
}
if (group_id < 0) {
// This is a leader, store and assign it to the current file descriptor
leader_ids.push_back(id);
group_id = id;
}
// This is a valid counter, add it to our descriptor's list
counter_ids.push_back(id);
valid_names.push_back(name);
}

// Loop through all group leaders activating them
// There is another option of starting ALL counters in a process but
// that would be far reaching an intrusion. If the user is using PMCs
// by themselves then this would have a side effect on them. It is
// friendlier to loop through all groups individually.
for (int lead : leader_ids) {
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
// This should never happen but if it does, we give up on the
// entire batch as recovery would be a mess.
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
"Claring out all counters.\n";

// Close all peformance counters
for (int id : counter_ids) {
::close(id);
}

// Return an empty object so our internal state is still good and
// the process can continue normally without impact
GetErrorLogInstance()
<< "Failed to get a file descriptor for " << name << "\n";
return NoCounters();
}

counter_ids[i] = id;
}
if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
GetErrorLogInstance() << "Failed to start counters\n";
return NoCounters();
}

return PerfCounters(std::move(valid_names), std::move(counter_ids),
std::move(leader_ids));
return PerfCounters(counter_names, std::move(counter_ids));
}

void PerfCounters::CloseCounters() const {
PerfCounters::~PerfCounters() {
if (counter_ids_.empty()) {
return;
}
for (int lead : leader_ids_) {
ioctl(lead, PERF_EVENT_IOC_DISABLE);
}
ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }

const bool PerfCounters::kSupported = false;

bool PerfCounters::Initialize() { return false; }

bool PerfCounters::IsCounterSupported(const std::string&) { return false; }

PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
GetErrorLogInstance() << "Performance counters not supported.\n";
GetErrorLogInstance() << "Performance counters not supported.";
}
return NoCounters();
}

void PerfCounters::CloseCounters() const {}
PerfCounters::~PerfCounters() = default;
#endif // defined HAVE_LIBPFM

PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
counters_ = PerfCounters::Create(counter_names);
}

PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
if (this != &other) {
CloseCounters();

counter_ids_ = std::move(other.counter_ids_);
leader_ids_ = std::move(other.leader_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
}
} // namespace internal
} // namespace benchmark
108 changes: 40 additions & 68 deletions third-party/benchmark/src/perf_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,16 @@

#include <array>
#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>

#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
#include "mutex.h"

#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
#endif

#if defined(_MSC_VER)
#pragma warning(push)
// C4251: <symbol> needs to have dll-interface to be used by clients of class
#pragma warning(disable : 4251)
#endif

namespace benchmark {
namespace internal {

Expand All @@ -45,21 +36,18 @@ namespace internal {
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
// to Snapshot(). The Read() method relocates individual reads, discarding
// the initial padding from each group leader in the values buffer such that
// all user accesses through the [] operator are correct.
class BENCHMARK_EXPORT PerfCounterValues {
// to Snapshot(). The values are populated such that
// perfCounters->names()[i]'s value is obtained at position i (as given by
// operator[]) of this object.
class PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}

// We are reading correctly now so the values don't need to skip padding
uint64_t operator[](size_t pos) const { return values_[pos]; }
uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }

// Increased the maximum to 32 only since the buffer
// is std::array<> backed
static constexpr size_t kMaxCounters = 32;
static constexpr size_t kMaxCounters = 3;

private:
friend class PerfCounters;
Expand All @@ -70,49 +58,35 @@ class BENCHMARK_EXPORT PerfCounterValues {
sizeof(uint64_t) * (kPadding + nr_counters_)};
}

// This reading is complex and as the goal of this class is to
// abstract away the intrincacies of the reading process, this is
// a better place for it
size_t Read(const std::vector<int>& leaders);

// Move the padding to 2 due to the reading algorithm (1st padding plus a
// current read padding)
static constexpr size_t kPadding = 2;
static constexpr size_t kPadding = 1;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};

// Collect PMU counters. The object, once constructed, is ready to be used by
// calling read(). PMU counter collection is enabled from the time create() is
// called, to obtain the object, until the object's destructor is called.
class BENCHMARK_EXPORT PerfCounters final {
class PerfCounters final {
public:
// True iff this platform supports performance counters.
static const bool kSupported;

// Returns an empty object
bool IsValid() const { return is_valid_; }
static PerfCounters NoCounters() { return PerfCounters(); }

~PerfCounters() { CloseCounters(); }
PerfCounters() = default;
~PerfCounters();
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
PerfCounters& operator=(const PerfCounters&) = delete;

// Platform-specific implementations may choose to do some library
// initialization here.
static bool Initialize();

// Check if the given counter is supported, if the app wants to
// check before passing
static bool IsCounterSupported(const std::string& name);

// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
// In case of failure, this method will in the worst case return an
// empty object whose state will still be valid.
// TODO: once we move to C++-17, this should be a std::optional, and then the
// IsValid() boolean can be dropped.
static PerfCounters Create(const std::vector<std::string>& counter_names);

// Take a snapshot of the current value of the counters into the provided
Expand All @@ -121,7 +95,10 @@ class BENCHMARK_EXPORT PerfCounters final {
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
return values->Read(leader_ids_) == counter_ids_.size();
assert(IsValid());
auto buffer = values->get_data_buffer();
auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
return static_cast<size_t>(read_bytes) == buffer.second;
#else
(void)values;
return false;
Expand All @@ -133,68 +110,63 @@ class BENCHMARK_EXPORT PerfCounters final {

private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
std::vector<int>&& counter_ids)
: counter_ids_(std::move(counter_ids)),
leader_ids_(std::move(leader_ids)),
counter_names_(counter_names) {}

void CloseCounters() const;
counter_names_(counter_names),
is_valid_(true) {}
PerfCounters() : is_valid_(false) {}

std::vector<int> counter_ids_;
std::vector<int> leader_ids_;
std::vector<std::string> counter_names_;
const std::vector<std::string> counter_names_;
const bool is_valid_;
};

// Typical usage of the above primitives.
class BENCHMARK_EXPORT PerfCountersMeasurement final {
class PerfCountersMeasurement final {
public:
PerfCountersMeasurement(const std::vector<std::string>& counter_names);

size_t num_counters() const { return counters_.num_counters(); }
PerfCountersMeasurement(PerfCounters&& c)
: counters_(std::move(c)),
start_values_(counters_.IsValid() ? counters_.names().size() : 0),
end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}

std::vector<std::string> names() const { return counters_.names(); }
bool IsValid() const { return counters_.IsValid(); }

BENCHMARK_ALWAYS_INLINE bool Start() {
if (num_counters() == 0) return true;
BENCHMARK_ALWAYS_INLINE void Start() {
assert(IsValid());
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&start_values_);
counters_.Snapshot(&start_values_);
ClobberMemory();

return valid_read_;
}

BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
if (num_counters() == 0) return true;
BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
StopAndGetMeasurements() {
assert(IsValid());
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&end_values_);
counters_.Snapshot(&end_values_);
ClobberMemory();

std::vector<std::pair<std::string, double>> ret;
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
measurements.push_back({counters_.names()[i], measurement});
ret.push_back({counters_.names()[i], measurement});
}

return valid_read_;
return ret;
}

private:
PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};

BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();

} // namespace internal
} // namespace benchmark

#if defined(_MSC_VER)
#pragma warning(pop)
#endif

#endif // BENCHMARK_PERF_COUNTERS_H
2 changes: 1 addition & 1 deletion third-party/benchmark/src/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
defined(HAVE_STD_REGEX) && \
defined(BENCHMARK_HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
Expand Down
14 changes: 5 additions & 9 deletions third-party/benchmark/src/reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
#include "timers.h"

namespace benchmark {
namespace internal {
extern std::map<std::string, std::string> *global_context;
}

BenchmarkReporter::BenchmarkReporter()
: output_stream_(&std::cout), error_stream_(&std::cerr) {}
Expand All @@ -36,11 +39,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
BM_CHECK(out) << "cannot be null";
auto &Out = *out;

#ifndef BENCHMARK_OS_QURT
// Date/time information is not available on QuRT.
// Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
#endif

if (context.executable_name)
Out << "Running " << context.executable_name << "\n";
Expand Down Expand Up @@ -68,11 +67,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Out << "\n";
}

std::map<std::string, std::string> *global_context =
internal::GetGlobalContext();

if (global_context != nullptr) {
for (const auto &kv : *global_context) {
if (internal::global_context != nullptr) {
for (const auto &kv : *internal::global_context) {
Out << kv.first << ": " << kv.second << "\n";
}
}
Expand Down
66 changes: 66 additions & 0 deletions third-party/benchmark/src/sleep.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sleep.h"

#include <cerrno>
#include <cstdlib>
#include <ctime>

#include "internal_macros.h"

#ifdef BENCHMARK_OS_WINDOWS
#include <windows.h>
#endif

#ifdef BENCHMARK_OS_ZOS
#include <unistd.h>
#endif

namespace benchmark {
#ifdef BENCHMARK_OS_WINDOWS
// Window's Sleep takes milliseconds argument.
void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
void SleepForSeconds(double seconds) {
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
}
#else // BENCHMARK_OS_WINDOWS
void SleepForMicroseconds(int microseconds) {
#ifdef BENCHMARK_OS_ZOS
// z/OS does not support nanosleep. Instead call sleep() and then usleep() to
// sleep for the remaining microseconds because usleep() will fail if its
// argument is greater than 1000000.
div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
int seconds = sleepTime.quot;
while (seconds != 0) seconds = sleep(seconds);
while (usleep(sleepTime.rem) == -1 && errno == EINTR)
;
#else
struct timespec sleep_time;
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
; // Ignore signals and wait for the full interval to elapse.
#endif
}

void SleepForMilliseconds(int milliseconds) {
SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
}

void SleepForSeconds(double seconds) {
SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
}
#endif // BENCHMARK_OS_WINDOWS
} // end namespace benchmark
15 changes: 15 additions & 0 deletions third-party/benchmark/src/sleep.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef BENCHMARK_SLEEP_H_
#define BENCHMARK_SLEEP_H_

namespace benchmark {
const int kNumMillisPerSecond = 1000;
const int kNumMicrosPerMilli = 1000;
const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
const int kNumNanosPerMicro = 1000;
const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;

void SleepForMilliseconds(int milliseconds);
void SleepForSeconds(double seconds);
} // end namespace benchmark

#endif // BENCHMARK_SLEEP_H_
38 changes: 16 additions & 22 deletions third-party/benchmark/src/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ auto StatisticsSum = [](const std::vector<double>& v) {

double StatisticsMean(const std::vector<double>& v) {
if (v.empty()) return 0.0;
return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
return StatisticsSum(v) * (1.0 / v.size());
}

double StatisticsMedian(const std::vector<double>& v) {
Expand All @@ -42,13 +42,13 @@ double StatisticsMedian(const std::vector<double>& v) {
auto center = copy.begin() + v.size() / 2;
std::nth_element(copy.begin(), center, copy.end());

// Did we have an odd number of samples? If yes, then center is the median.
// If not, then we are looking for the average between center and the value
// before. Instead of resorting, we just look for the max value before it,
// which is not necessarily the element immediately preceding `center` Since
// `copy` is only partially sorted by `nth_element`.
// did we have an odd number of samples?
// if yes, then center is the median
// it no, then we are looking for the average between center and the value
// before
if (v.size() % 2 == 1) return *center;
auto center2 = std::max_element(copy.begin(), center);
auto center2 = copy.begin() + v.size() / 2 - 1;
std::nth_element(copy.begin(), center2, copy.end());
return (*center + *center2) / 2.0;
}

Expand All @@ -71,11 +71,8 @@ double StatisticsStdDev(const std::vector<double>& v) {
// Sample standard deviation is undefined for n = 1
if (v.size() == 1) return 0.0;

const double avg_squares =
SumSquares(v) * (1.0 / static_cast<double>(v.size()));
return Sqrt(static_cast<double>(v.size()) /
(static_cast<double>(v.size()) - 1.0) *
(avg_squares - Sqr(mean)));
const double avg_squares = SumSquares(v) * (1.0 / v.size());
return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
}

double StatisticsCV(const std::vector<double>& v) {
Expand All @@ -84,8 +81,6 @@ double StatisticsCV(const std::vector<double>& v) {
const auto stddev = StatisticsStdDev(v);
const auto mean = StatisticsMean(v);

if (std::fpclassify(mean) == FP_ZERO) return 0.0;

return stddev / mean;
}

Expand All @@ -94,8 +89,9 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;

auto error_count = std::count_if(reports.begin(), reports.end(),
[](Run const& run) { return run.skipped; });
auto error_count =
std::count_if(reports.begin(), reports.end(),
[](Run const& run) { return run.error_occurred; });

if (reports.size() - error_count < 2) {
// We don't report aggregated data if there was a single run.
Expand All @@ -122,13 +118,11 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
for (auto const& cnt : r.counters) {
auto it = counter_stats.find(cnt.first);
if (it == counter_stats.end()) {
it = counter_stats
.emplace(cnt.first,
CounterStat{cnt.second, std::vector<double>{}})
.first;
counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
it = counter_stats.find(cnt.first);
it->second.s.reserve(reports.size());
} else {
BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
}
}
}
Expand All @@ -137,7 +131,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
for (Run const& run : reports) {
BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
BM_CHECK_EQ(run_iterations, run.iterations);
if (run.skipped) continue;
if (run.error_occurred) continue;
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
// user counters
Expand Down
12 changes: 3 additions & 9 deletions third-party/benchmark/src/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,15 @@

namespace benchmark {

// Return a vector containing the mean, median and standard deviation
// information (and any user-specified info) for the specified list of reports.
// If 'reports' contains less than two non-errored runs an empty vector is
// returned
BENCHMARK_EXPORT
// Return a vector containing the mean, median and standard devation information
// (and any user-specified info) for the specified list of reports. If 'reports'
// contains less than two non-errored runs an empty vector is returned
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports);

BENCHMARK_EXPORT
double StatisticsMean(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsMedian(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsStdDev(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsCV(const std::vector<double>& v);

} // end namespace benchmark
Expand Down
65 changes: 38 additions & 27 deletions third-party/benchmark/src/string_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,16 @@
#include <sstream>

#include "arraysize.h"
#include "benchmark/benchmark.h"

namespace benchmark {
namespace {

// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"};
const char kBigSIUnits[] = "kMGTPEZY";
// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti",
"Pi", "Ei", "Zi", "Yi"};
const char kBigIECUnits[] = "KMGTPEZY";
// milli, micro, nano, pico, femto, atto, zepto, yocto.
const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"};
const char kSmallSIUnits[] = "munpfazy";

// We require that all three arrays have the same size.
static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
Expand All @@ -31,8 +30,9 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),

static const int64_t kUnitsSize = arraysize(kBigSIUnits);

void ToExponentAndMantissa(double val, int precision, double one_k,
std::string* mantissa, int64_t* exponent) {
void ToExponentAndMantissa(double val, double thresh, int precision,
double one_k, std::string* mantissa,
int64_t* exponent) {
std::stringstream mantissa_stream;

if (val < 0) {
Expand All @@ -43,8 +43,8 @@ void ToExponentAndMantissa(double val, int precision, double one_k,
// Adjust threshold so that it never excludes things which can't be rendered
// in 'precision' digits.
const double adjusted_threshold =
std::max(1.0, 1.0 / std::pow(10.0, precision));
const double big_threshold = (adjusted_threshold * one_k) - 1;
std::max(thresh, 1.0 / std::pow(10.0, precision));
const double big_threshold = adjusted_threshold * one_k;
const double small_threshold = adjusted_threshold;
// Values in ]simple_threshold,small_threshold[ will be printed as-is
const double simple_threshold = 0.01;
Expand Down Expand Up @@ -92,20 +92,37 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
if (index >= kUnitsSize) return "";

const char* const* array =
const char* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);

return std::string(array[index]);
if (iec)
return array[index] + std::string("i");
else
return std::string(1, array[index]);
}

std::string ToBinaryStringFullySpecified(double value, int precision,
Counter::OneK one_k) {
std::string ToBinaryStringFullySpecified(double value, double threshold,
int precision, double one_k = 1024.0) {
std::string mantissa;
int64_t exponent;
ToExponentAndMantissa(value, precision,
one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa,
ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa,
&exponent);
return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024);
return mantissa + ExponentToPrefix(exponent, false);
}

} // end namespace

void AppendHumanReadable(int n, std::string* str) {
std::stringstream ss;
// Round down to the nearest SI prefix.
ss << ToBinaryStringFullySpecified(n, 1.0, 0);
*str += ss.str();
}

std::string HumanReadableNumber(double n, double one_k) {
// 1.1 means that figures up to 1.1k should be shown with the next unit down;
// this softens edge effects.
// 1 means that we should show one decimal place of precision.
return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
}

std::string StrFormatImp(const char* msg, va_list args) {
Expand All @@ -116,34 +133,28 @@ std::string StrFormatImp(const char* msg, va_list args) {
// TODO(ericwf): use std::array for first attempt to avoid one memory
// allocation guess what the size might be
std::array<char, 256> local_buff;

std::size_t size = local_buff.size();
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp);
auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);

va_end(args_cp);

// handle empty expansion
if (ret == 0) return std::string{};
if (static_cast<std::size_t>(ret) < local_buff.size())
if (static_cast<std::size_t>(ret) < size)
return std::string(local_buff.data());

// we did not provide a long enough buffer on our first attempt.
// add 1 to size to account for null-byte in size cast to prevent overflow
std::size_t size = static_cast<std::size_t>(ret) + 1;
size = static_cast<std::size_t>(ret) + 1;
auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
vsnprintf(buff_ptr.get(), size, msg, args);
return std::string(buff_ptr.get());
}

} // end namespace

std::string HumanReadableNumber(double n, Counter::OneK one_k) {
return ToBinaryStringFullySpecified(n, 1, one_k);
}

std::string StrFormat(const char* format, ...) {
va_list args;
va_start(args, format);
Expand Down
11 changes: 3 additions & 8 deletions third-party/benchmark/src/string_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "benchmark/benchmark.h"
#include "benchmark/export.h"
#include "check.h"
#include "internal_macros.h"

namespace benchmark {

BENCHMARK_EXPORT
std::string HumanReadableNumber(double n, Counter::OneK one_k);
void AppendHumanReadable(int n, std::string* str);

std::string HumanReadableNumber(double n, double one_k = 1024.0);

BENCHMARK_EXPORT
#if defined(__MINGW32__)
__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2)))
#elif defined(__GNUC__)
Expand All @@ -42,7 +38,6 @@ inline std::string StrCat(Args&&... args) {
return ss.str();
}

BENCHMARK_EXPORT
std::vector<std::string> StrSplit(const std::string& str, char delim);

// Disable lint checking for this block since it re-implements C functions.
Expand Down
443 changes: 149 additions & 294 deletions third-party/benchmark/src/sysinfo.cc

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions third-party/benchmark/src/thread_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ class ThreadManager {
double manual_time_used = 0;
int64_t complexity_n = 0;
std::string report_label_;
std::string skip_message_;
internal::Skipped skipped_ = internal::NotSkipped;
std::string error_message_;
bool has_error_ = false;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
Expand Down
24 changes: 5 additions & 19 deletions third-party/benchmark/src/timers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <windows.h>
#else
#include <fcntl.h>
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#ifndef BENCHMARK_OS_FUCHSIA
#include <sys/resource.h>
#endif
#include <sys/time.h>
Expand All @@ -38,9 +38,6 @@
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#endif
#if defined(BENCHMARK_OS_QURT)
#include <qurt.h>
#endif
#endif

#ifdef BENCHMARK_OS_EMSCRIPTEN
Expand All @@ -59,6 +56,7 @@

#include "check.h"
#include "log.h"
#include "sleep.h"
#include "string_util.h"

namespace benchmark {
Expand All @@ -67,9 +65,6 @@ namespace benchmark {
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
#if defined(__NVCOMPILER)
#pragma diag_suppress declared_but_not_referenced
#endif

namespace {
#if defined(BENCHMARK_OS_WINDOWS)
Expand All @@ -84,7 +79,7 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
static_cast<double>(user.QuadPart)) *
1e-7;
}
#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#elif !defined(BENCHMARK_OS_FUCHSIA)
double MakeTime(struct rusage const& ru) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
Expand All @@ -102,8 +97,7 @@ double MakeTime(thread_basic_info_data_t const& info) {
#endif
#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID)
double MakeTime(struct timespec const& ts) {
return static_cast<double>(ts.tv_sec) +
(static_cast<double>(ts.tv_nsec) * 1e-9);
return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
}
#endif

Expand All @@ -125,15 +119,11 @@ double ProcessCPUUsage() {
&user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
#elif defined(BENCHMARK_OS_QURT)
return static_cast<double>(
qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
// Use Emscripten-specific API. Reported CPU time would be exactly the
// same as total time, but this is ok because there aren't long-latency
// synchronous system calls in Emscripten.
// syncronous system calls in Emscripten.
return emscripten_get_now() * 1e-3;
#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
Expand All @@ -159,10 +149,6 @@ double ThreadCPUUsage() {
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
&user_time);
return MakeTime(kernel_time, user_time);
#elif defined(BENCHMARK_OS_QURT)
return static_cast<double>(
qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
// See https://github.com/google/benchmark/pull/292
Expand Down
21 changes: 0 additions & 21 deletions third-party/benchmark/test/AssemblyTests.cmake
Original file line number Diff line number Diff line change
@@ -1,23 +1,3 @@
set(CLANG_SUPPORTED_VERSION "5.0.0")
set(GCC_SUPPORTED_VERSION "5.5.0")

if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION})
message (WARNING
"Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION}
". Expected is " ${CLANG_SUPPORTED_VERSION}
". Assembly tests may be broken.")
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION})
message (WARNING
"Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION}
". Expected is " ${GCC_SUPPORTED_VERSION}
". Assembly tests may be broken.")
endif()
else()
message (WARNING "Unsupported compiler. Assembly tests may be broken.")
endif()

include(split_list)

Expand All @@ -43,7 +23,6 @@ string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER)
macro(add_filecheck_test name)
cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV})
add_library(${name} OBJECT ${name}.cc)
target_link_libraries(${name} PRIVATE benchmark::benchmark)
set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}")
set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s")
add_custom_target(copy_${name} ALL
Expand Down
127 changes: 43 additions & 84 deletions third-party/benchmark/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
# Enable the tests

set(THREADS_PREFER_PTHREAD_FLAG ON)

find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)

add_cxx_compiler_flag(-Wno-unused-variable)

# NOTE: Some tests use `<cassert>` to perform the test. Therefore we must
# strip -DNDEBUG from the default CMake flags in DEBUG mode.
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
Expand All @@ -26,10 +22,6 @@ if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
endforeach()
endif()

if (NOT BUILD_SHARED_LIBS)
add_definitions(-DBENCHMARK_STATIC_DEFINE)
endif()

check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
set(BENCHMARK_O3_FLAG "")
if (BENCHMARK_HAS_O3_FLAG)
Expand All @@ -43,14 +35,10 @@ if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
endif()

add_library(output_test_helper STATIC output_test_helper.cc output_test.h)
target_link_libraries(output_test_helper PRIVATE benchmark::benchmark)

macro(compile_benchmark_test name)
add_executable(${name} "${name}.cc")
target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
target_compile_options( ${name} PRIVATE --diag_suppress partial_override )
endif()
endmacro(compile_benchmark_test)

macro(compile_benchmark_test_with_main name)
Expand All @@ -60,43 +48,26 @@ endmacro(compile_benchmark_test_with_main)

macro(compile_output_test name)
add_executable(${name} "${name}.cc" output_test.h)
target_link_libraries(${name} output_test_helper benchmark::benchmark_main
target_link_libraries(${name} output_test_helper benchmark::benchmark
${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endmacro(compile_output_test)

macro(benchmark_add_test)
add_test(${ARGV})
if(WIN32 AND BUILD_SHARED_LIBS)
cmake_parse_arguments(TEST "" "NAME" "" ${ARGN})
set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>")
endif()
endmacro(benchmark_add_test)

# Demonstration executable
compile_benchmark_test(benchmark_test)
benchmark_add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)
add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01)

compile_benchmark_test(spec_arg_test)
benchmark_add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)

compile_benchmark_test(spec_arg_verbosity_test)
benchmark_add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42)
add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)

compile_benchmark_test(benchmark_setup_teardown_test)
benchmark_add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)

compile_benchmark_test(filter_test)
macro(add_filter_test name filter expect)
benchmark_add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect})
benchmark_add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
endmacro(add_filter_test)

compile_benchmark_test(benchmark_min_time_flag_time_test)
benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test)

compile_benchmark_test(benchmark_min_time_flag_iters_test)
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)

add_filter_test(filter_simple "Foo" 3)
add_filter_test(filter_simple_negative "-Foo" 2)
add_filter_test(filter_suffix "BM_.*" 4)
Expand All @@ -117,83 +88,78 @@ add_filter_test(filter_regex_end ".*Ba$" 1)
add_filter_test(filter_regex_end_negative "-.*Ba$" 4)

compile_benchmark_test(options_test)
benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)
add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01)

compile_benchmark_test(basic_test)
benchmark_add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s)
add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01)

compile_output_test(repetitions_test)
benchmark_add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3)
add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01 --benchmark_repetitions=3)

compile_benchmark_test(diagnostics_test)
benchmark_add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s)
add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01)

compile_benchmark_test(skip_with_error_test)
benchmark_add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s)
add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01)

compile_benchmark_test(donotoptimize_test)
# Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)).
check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations")
endif()
# Some of the issues with DoNotOptimize only occur when optimization is enabled
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
if (BENCHMARK_HAS_O3_FLAG)
set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3")
endif()
benchmark_add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s)
add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01)

compile_benchmark_test(fixture_test)
benchmark_add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s)
add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01)

compile_benchmark_test(register_benchmark_test)
benchmark_add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s)
add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01)

compile_benchmark_test(map_test)
benchmark_add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s)
add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01)

compile_benchmark_test(multiple_ranges_test)
benchmark_add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s)
add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01)

compile_benchmark_test(args_product_test)
benchmark_add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s)
add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01)

compile_benchmark_test_with_main(link_main_test)
benchmark_add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s)
add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01)

compile_output_test(reporter_output_test)
benchmark_add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s)
add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01)

compile_output_test(templated_fixture_test)
benchmark_add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)
add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01)

compile_output_test(user_counters_test)
benchmark_add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)
add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)

compile_output_test(perf_counters_test)
benchmark_add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,INSTRUCTIONS)
add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES)

compile_output_test(internal_threading_test)
benchmark_add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)

compile_output_test(report_aggregates_only_test)
benchmark_add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01)

compile_output_test(display_aggregates_only_test)
benchmark_add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s)
add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01)

compile_output_test(user_counters_tabular_test)
benchmark_add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s)
add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01)

compile_output_test(user_counters_thousands_test)
benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s)
add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01)

compile_output_test(memory_manager_test)
benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01)

# MSVC does not allow to set the language standard to C++98/03.
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
if (BENCHMARK_HAS_CXX03_FLAG)
compile_benchmark_test(cxx03_test)
set_target_properties(cxx03_test
PROPERTIES
Expand All @@ -204,22 +170,22 @@ if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
# causing the test to fail to compile. To prevent this we explicitly disable
# the warning.
check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR)
check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
# Cannot set_target_properties multiple times here because the warnings will
# be overwritten on each call
set (DISABLE_LTO_WARNINGS "")
if (BENCHMARK_HAS_WNO_ODR)
set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr")
if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR)
set_target_properties(cxx03_test
PROPERTIES
LINK_FLAGS "-Wno-odr")
endif()
if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch")
endif()
set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}")
benchmark_add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01)
endif()

# Attempt to work around flaky test failures when running on Appveyor servers.
if (DEFINED ENV{APPVEYOR})
set(COMPLEXITY_MIN_TIME "0.5")
else()
set(COMPLEXITY_MIN_TIME "0.01")
endif()
compile_output_test(complexity_test)
benchmark_add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=1000000x)
add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})

###############################################################################
# GoogleTest Unit Tests
Expand All @@ -234,12 +200,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)

macro(add_gtest name)
compile_gtest(${name})
benchmark_add_test(NAME ${name} COMMAND ${name})
if(WIN32 AND BUILD_SHARED_LIBS)
set_tests_properties(${name} PROPERTIES
ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>;PATH=path_list_prepend:$<TARGET_FILE_DIR:gmock_main>"
)
endif()
add_test(NAME ${name} COMMAND ${name})
endmacro()

add_gtest(benchmark_gtest)
Expand All @@ -249,8 +210,6 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
add_gtest(statistics_gtest)
add_gtest(string_util_gtest)
add_gtest(perf_counters_gtest)
add_gtest(time_unit_gtest)
add_gtest(min_time_parse_gtest)
endif(BENCHMARK_ENABLE_GTEST_TESTS)

###############################################################################
Expand Down
4 changes: 2 additions & 2 deletions third-party/benchmark/test/args_product_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ArgsProductFixture : public ::benchmark::Fixture {
{2, 15, 10, 9},
{4, 5, 6, 11}}) {}

void SetUp(const ::benchmark::State& state) override {
void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
std::vector<int64_t> ranges = {state.range(0), state.range(1),
state.range(2), state.range(3)};

Expand All @@ -34,7 +34,7 @@ class ArgsProductFixture : public ::benchmark::Fixture {

// NOTE: This is not TearDown as we want to check after _all_ runs are
// complete.
~ArgsProductFixture() override {
virtual ~ArgsProductFixture() {
if (actualValues != expectedValues) {
std::cout << "EXPECTED\n";
for (const auto& v : expectedValues) {
Expand Down
9 changes: 4 additions & 5 deletions third-party/benchmark/test/basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

void BM_empty(benchmark::State& state) {
for (auto _ : state) {
auto iterations = double(state.iterations()) * double(state.iterations());
benchmark::DoNotOptimize(iterations);
benchmark::DoNotOptimize(state.iterations());
}
}
BENCHMARK(BM_empty);
Expand Down Expand Up @@ -148,7 +147,7 @@ void BM_OneTemplateFunc(benchmark::State& state) {
auto arg = state.range(0);
T sum = 0;
for (auto _ : state) {
sum += static_cast<T>(arg);
sum += arg;
}
}
BENCHMARK(BM_OneTemplateFunc<int>)->Arg(1);
Expand All @@ -160,8 +159,8 @@ void BM_TwoTemplateFunc(benchmark::State& state) {
A sum = 0;
B prod = 1;
for (auto _ : state) {
sum += static_cast<A>(arg);
prod *= static_cast<B>(arg);
sum += arg;
prod *= arg;
}
}
BENCHMARK(BM_TwoTemplateFunc<int, double>)->Arg(1);
Expand Down
14 changes: 5 additions & 9 deletions third-party/benchmark/test/benchmark_gtest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
#include <vector>

#include "../src/benchmark_register.h"
#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

namespace benchmark {
namespace internal {
extern std::map<std::string, std::string>* global_context;

namespace {

Expand Down Expand Up @@ -38,9 +38,8 @@ TEST(AddRangeTest, Advanced64) {

TEST(AddRangeTest, FullRange8) {
std::vector<int8_t> dst;
AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8});
EXPECT_THAT(
dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127}));
AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8);
EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127));
}

TEST(AddRangeTest, FullRange64) {
Expand Down Expand Up @@ -130,13 +129,11 @@ TEST(AddRangeTest, FullNegativeRange64) {

TEST(AddRangeTest, Simple8) {
std::vector<int8_t> dst;
AddRange<int8_t>(&dst, int8_t{1}, int8_t{8}, int8_t{2});
EXPECT_THAT(dst,
testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8}));
AddRange<int8_t>(&dst, 1, 8, 2);
EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8));
}

TEST(AddCustomContext, Simple) {
std::map<std::string, std::string> *&global_context = GetGlobalContext();
EXPECT_THAT(global_context, nullptr);

AddCustomContext("foo", "bar");
Expand All @@ -151,7 +148,6 @@ TEST(AddCustomContext, Simple) {
}

TEST(AddCustomContext, DuplicateKey) {
std::map<std::string, std::string> *&global_context = GetGlobalContext();
EXPECT_THAT(global_context, nullptr);

AddCustomContext("foo", "bar");
Expand Down
8 changes: 0 additions & 8 deletions third-party/benchmark/test/benchmark_name_gtest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ TEST(BenchmarkNameTest, MinTime) {
EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s");
}

TEST(BenchmarkNameTest, MinWarmUpTime) {
auto name = BenchmarkName();
name.function_name = "function_name";
name.args = "some_args:3/4";
name.min_warmup_time = "min_warmup_time:3.5s";
EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s");
}

TEST(BenchmarkNameTest, Iterations) {
auto name = BenchmarkName();
name.function_name = "function_name";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ class BenchmarkTest : public testing::Test {
void Execute(const std::string& pattern) {
queue->Clear();

std::unique_ptr<BenchmarkReporter> reporter(new NullReporter());
BenchmarkReporter* reporter = new NullReporter;
FLAGS_benchmark_filter = pattern;
RunSpecifiedBenchmarks(reporter.get());
RunSpecifiedBenchmarks(reporter);
delete reporter;

queue->Put("DONE"); // End marker
}
Expand Down
18 changes: 9 additions & 9 deletions third-party/benchmark/test/benchmark_setup_teardown_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@

// Test that Setup() and Teardown() are called exactly once
// for each benchmark run (single-threaded).
namespace singlethreaded {
namespace single {
static int setup_call = 0;
static int teardown_call = 0;
} // namespace singlethreaded
} // namespace single
static void DoSetup1(const benchmark::State& state) {
++singlethreaded::setup_call;
++single::setup_call;

// Setup/Teardown should never be called with any thread_idx != 0.
assert(state.thread_index() == 0);
}

static void DoTeardown1(const benchmark::State& state) {
++singlethreaded::teardown_call;
++single::teardown_call;
assert(state.thread_index() == 0);
}

Expand Down Expand Up @@ -80,11 +80,11 @@ int fixture_setup = 0;

class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State&) override {
void SetUp(const ::benchmark::State&) BENCHMARK_OVERRIDE {
fixture_interaction::fixture_setup++;
}

~FIXTURE_BECHMARK_NAME() override {}
~FIXTURE_BECHMARK_NAME() {}
};

BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) {
Expand Down Expand Up @@ -134,8 +134,8 @@ int main(int argc, char** argv) {
assert(ret > 0);

// Setup/Teardown is called once for each arg group (1,3,5,7).
assert(singlethreaded::setup_call == 4);
assert(singlethreaded::teardown_call == 4);
assert(single::setup_call == 4);
assert(single::teardown_call == 4);

// 3 group of threads calling this function (3,5,10).
assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3);
Expand All @@ -145,7 +145,7 @@ int main(int argc, char** argv) {

// Setup is called 4 times, once for each arg group (1,3,5,7)
assert(fixture_interaction::setup == 4);
// Fixture::Setup is called every time the bm routine is run.
// Fixture::Setup is called everytime the bm routine is run.
// The exact number is indeterministic, so we just assert that
// it's more than setup.
assert(fixture_interaction::fixture_setup > fixture_interaction::setup);
Expand Down
65 changes: 6 additions & 59 deletions third-party/benchmark/test/benchmark_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <stdint.h>

#include <chrono>
#include <complex>
#include <cstdlib>
#include <iostream>
#include <limits>
Expand All @@ -16,7 +15,6 @@
#include <sstream>
#include <string>
#include <thread>
#include <type_traits>
#include <utility>
#include <vector>

Expand All @@ -28,7 +26,7 @@

namespace {

int BENCHMARK_NOINLINE Factorial(int n) {
int BENCHMARK_NOINLINE Factorial(uint32_t n) {
return (n == 1) ? 1 : n * Factorial(n - 1);
}

Expand Down Expand Up @@ -76,8 +74,7 @@ BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
for (auto _ : state) {
double pi = CalculatePi(static_cast<int>(depth));
benchmark::DoNotOptimize(pi);
benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth)));
}
}
BENCHMARK(BM_CalculatePi)->Threads(8);
Expand All @@ -93,8 +90,7 @@ static void BM_SetInsert(benchmark::State& state) {
for (int j = 0; j < state.range(1); ++j) data.insert(rand());
}
state.SetItemsProcessed(state.iterations() * state.range(1));
state.SetBytesProcessed(state.iterations() * state.range(1) *
static_cast<int64_t>(sizeof(int)));
state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
}

// Test many inserts at once to reduce the total iterations needed. Otherwise,
Expand All @@ -112,7 +108,7 @@ static void BM_Sequential(benchmark::State& state) {
}
const int64_t items_processed = state.iterations() * state.range(0);
state.SetItemsProcessed(items_processed);
state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v)));
state.SetBytesProcessed(items_processed * sizeof(v));
}
BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
->Range(1 << 0, 1 << 10);
Expand All @@ -126,10 +122,7 @@ static void BM_StringCompare(benchmark::State& state) {
size_t len = static_cast<size_t>(state.range(0));
std::string s1(len, '-');
std::string s2(len, '-');
for (auto _ : state) {
auto comp = s1.compare(s2);
benchmark::DoNotOptimize(comp);
}
for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2));
}
BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);

Expand Down Expand Up @@ -176,7 +169,7 @@ static void BM_ParallelMemset(benchmark::State& state) {
for (int i = from; i < to; i++) {
// No need to lock test_vector_mu as ranges
// do not overlap between threads.
benchmark::DoNotOptimize(test_vector->at(static_cast<size_t>(i)) = 1);
benchmark::DoNotOptimize(test_vector->at(i) = 1);
}
}

Expand Down Expand Up @@ -227,31 +220,6 @@ void BM_non_template_args(benchmark::State& state, int, double) {
}
BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);

template <class T, class U, class... ExtraArgs>
void BM_template2_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
static_assert(std::is_same<T, void>::value, "");
static_assert(std::is_same<U, char*>::value, "");
static_assert(std::is_same<ExtraArgs..., unsigned int>::value, "");
unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...};
assert(dummy[0] == 42);
for (auto _ : state) {
}
}
BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U);
BENCHMARK_CAPTURE((BM_template2_capture<void, char*>), foo, 42U);

template <class T, class... ExtraArgs>
void BM_template1_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
static_assert(std::is_same<T, void>::value, "");
static_assert(std::is_same<ExtraArgs..., unsigned long>::value, "");
unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...};
assert(dummy[0] == 24);
for (auto _ : state) {
}
}
BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL);
BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL);

#endif // BENCHMARK_HAS_CXX11

static void BM_DenseThreadRanges(benchmark::State& st) {
Expand All @@ -276,25 +244,4 @@ BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);

static void BM_BenchmarkName(benchmark::State& state) {
for (auto _ : state) {
}

// Check that the benchmark name is passed correctly to `state`.
assert("BM_BenchmarkName" == state.name());
}
BENCHMARK(BM_BenchmarkName);

// regression test for #1446
template <typename type>
static void BM_templated_test(benchmark::State& state) {
for (auto _ : state) {
type created_string;
benchmark::DoNotOptimize(created_string);
}
}

static auto BM_templated_test_double = BM_templated_test<std::complex<double>>;
BENCHMARK(BM_templated_test_double);

BENCHMARK_MAIN();
1 change: 0 additions & 1 deletion third-party/benchmark/test/clobber_memory_assembly_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
BENCHMARK_DISABLE_DEPRECATED_WARNING

extern "C" {

Expand Down
158 changes: 57 additions & 101 deletions third-party/benchmark/test/complexity_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int AddComplexityTest(const std::string &test_name,
AddCases(
TC_ConsoleOut,
{{"^%bigo_name %bigo_str %bigo_str[ ]*$"},
{"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name.
{"^%bigo_name", MR_Not}, // Assert we didn't only matched a name.
{"^%rms_name %rms %rms[ ]*$", MR_Next}});
AddCases(
TC_JSONOut,
Expand Down Expand Up @@ -69,44 +69,35 @@ int AddComplexityTest(const std::string &test_name,

void BM_Complexity_O1(benchmark::State &state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
benchmark::DoNotOptimize(state.iterations());
long tmp = state.iterations();
benchmark::DoNotOptimize(tmp);
for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
benchmark::DoNotOptimize(state.iterations());
tmp *= state.iterations();
benchmark::DoNotOptimize(tmp);
for (int i = 0; i < 1024; ++i) {
benchmark::DoNotOptimize(&i);
}

// always 1ns per iteration
state.SetIterationTime(42 * 1e-9);
}
state.SetComplexityN(state.range(0));
}
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
BENCHMARK(BM_Complexity_O1)
->Range(1, 1 << 18)
->UseManualTime()
->Complexity(benchmark::o1);
BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->UseManualTime()->Complexity();
BENCHMARK(BM_Complexity_O1)
->Range(1, 1 << 18)
->UseManualTime()
->Complexity([](benchmark::IterationCount) { return 1.0; });

const char *one_test_name = "BM_Complexity_O1/manual_time";
const char *big_o_1_test_name = "BM_Complexity_O1/manual_time_BigO";
const char *rms_o_1_test_name = "BM_Complexity_O1/manual_time_RMS";
const char *enum_auto_big_o_1 = "\\([0-9]+\\)";
const char *one_test_name = "BM_Complexity_O1";
const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
const char *rms_o_1_test_name = "BM_Complexity_O1_RMS";
const char *enum_big_o_1 = "\\([0-9]+\\)";
// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto
// deduced.
// See https://github.com/google/benchmark/issues/272
const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
const char *lambda_big_o_1 = "f\\(N\\)";

// Add enum tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
enum_auto_big_o_1, /*family_index=*/0);
enum_big_o_1, /*family_index=*/0);

// Add auto tests
// Add auto enum tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
enum_auto_big_o_1, /*family_index=*/1);
auto_big_o_1, /*family_index=*/1);

// Add lambda tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
Expand All @@ -116,102 +107,83 @@ ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
// --------------------------- Testing BigO O(N) --------------------------- //
// ========================================================================= //

std::vector<int> ConstructRandomVector(int64_t size) {
std::vector<int> v;
v.reserve(static_cast<int>(size));
for (int i = 0; i < size; ++i) {
v.push_back(static_cast<int>(std::rand() % size));
}
return v;
}

void BM_Complexity_O_N(benchmark::State &state) {
auto v = ConstructRandomVector(state.range(0));
// Test worst case scenario (item not in vector)
const int64_t item_not_in_vector = state.range(0) * 2;
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
benchmark::DoNotOptimize(state.iterations());
long tmp = state.iterations();
benchmark::DoNotOptimize(tmp);
for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
benchmark::DoNotOptimize(state.iterations());
tmp *= state.iterations();
benchmark::DoNotOptimize(tmp);
}

// 1ns per iteration per entry
state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
}
state.SetComplexityN(state.range(0));
}
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20)
->UseManualTime()
->Range(1 << 10, 1 << 16)
->Complexity(benchmark::oN);
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20)
->UseManualTime()
->Complexity();
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20)
->UseManualTime()
->Range(1 << 10, 1 << 16)
->Complexity([](benchmark::IterationCount n) -> double {
return static_cast<double>(n);
});
BENCHMARK(BM_Complexity_O_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16)
->Complexity();

const char *n_test_name = "BM_Complexity_O_N/manual_time";
const char *big_o_n_test_name = "BM_Complexity_O_N/manual_time_BigO";
const char *rms_o_n_test_name = "BM_Complexity_O_N/manual_time_RMS";
const char *n_test_name = "BM_Complexity_O_N";
const char *big_o_n_test_name = "BM_Complexity_O_N_BigO";
const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS";
const char *enum_auto_big_o_n = "N";
const char *lambda_big_o_n = "f\\(N\\)";

// Add enum tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
enum_auto_big_o_n, /*family_index=*/3);

// Add auto tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
enum_auto_big_o_n, /*family_index=*/4);

// Add lambda tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
lambda_big_o_n, /*family_index=*/5);
lambda_big_o_n, /*family_index=*/4);

// ========================================================================= //
// ------------------------- Testing BigO O(NlgN) ------------------------- //
// ------------------------- Testing BigO O(N*lgN) ------------------------- //
// ========================================================================= //

static const double kLog2E = 1.44269504088896340736;
static void BM_Complexity_O_N_log_N(benchmark::State &state) {
auto v = ConstructRandomVector(state.range(0));
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
benchmark::DoNotOptimize(state.iterations());
long tmp = state.iterations();
benchmark::DoNotOptimize(tmp);
for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
benchmark::DoNotOptimize(state.iterations());
tmp *= state.iterations();
benchmark::DoNotOptimize(tmp);
}

state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
std::log(state.range(0)) * 42.0 * 1e-9);
std::sort(v.begin(), v.end());
}
state.SetComplexityN(state.range(0));
}
static const double kLog2E = 1.44269504088896340736;
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
->Range(1 << 10, 1U << 24)
->UseManualTime()
->Range(1 << 10, 1 << 16)
->Complexity(benchmark::oNLogN);
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
->Range(1 << 10, 1U << 24)
->UseManualTime()
->Complexity();
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
->Range(1 << 10, 1U << 24)
->UseManualTime()
->Range(1 << 10, 1 << 16)
->Complexity([](benchmark::IterationCount n) {
return kLog2E * static_cast<double>(n) * std::log(static_cast<double>(n));
return kLog2E * n * log(static_cast<double>(n));
});
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16)
->Complexity();

const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time";
const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_BigO";
const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_RMS";
const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N";
const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
const char *enum_auto_big_o_n_lg_n = "NlgN";
const char *lambda_big_o_n_lg_n = "f\\(N\\)";

Expand All @@ -220,15 +192,10 @@ ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
/*family_index=*/6);

// NOTE: auto big-o is wron.g
ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
/*family_index=*/7);

//// Add lambda tests
// Add lambda tests
ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n,
/*family_index=*/8);
/*family_index=*/7);

// ========================================================================= //
// -------- Testing formatting of Complexity with captured args ------------ //
Expand All @@ -238,30 +205,19 @@ void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
benchmark::DoNotOptimize(state.iterations());
long tmp = state.iterations();
benchmark::DoNotOptimize(tmp);
for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
benchmark::DoNotOptimize(state.iterations());
tmp *= state.iterations();
benchmark::DoNotOptimize(tmp);
}

state.SetIterationTime(static_cast<double>(state.range(0)) * 42.0 * 1e-9);
}
state.SetComplexityN(n);
}

BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100)
->UseManualTime()
->Complexity(benchmark::oN)
->Ranges({{1, 2}, {3, 4}});

const std::string complexity_capture_name =
"BM_ComplexityCaptureArgs/capture_test/manual_time";
"BM_ComplexityCaptureArgs/capture_test";

ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO",
complexity_capture_name + "_RMS", "N",
/*family_index=*/9);
complexity_capture_name + "_RMS", "N", /*family_index=*/9);

// ========================================================================= //
// --------------------------- TEST CASES END ------------------------------ //
Expand Down
15 changes: 2 additions & 13 deletions third-party/benchmark/test/diagnostics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ void BM_diagnostic_test(benchmark::State& state) {
if (called_once == false) try_invalid_pause_resume(state);

for (auto _ : state) {
auto iterations = double(state.iterations()) * double(state.iterations());
benchmark::DoNotOptimize(iterations);
benchmark::DoNotOptimize(state.iterations());
}

if (called_once == false) try_invalid_pause_resume(state);
Expand All @@ -65,8 +64,7 @@ void BM_diagnostic_test_keep_running(benchmark::State& state) {
if (called_once == false) try_invalid_pause_resume(state);

while (state.KeepRunning()) {
auto iterations = double(state.iterations()) * double(state.iterations());
benchmark::DoNotOptimize(iterations);
benchmark::DoNotOptimize(state.iterations());
}

if (called_once == false) try_invalid_pause_resume(state);
Expand All @@ -76,16 +74,7 @@ void BM_diagnostic_test_keep_running(benchmark::State& state) {
BENCHMARK(BM_diagnostic_test_keep_running);

int main(int argc, char* argv[]) {
#ifdef NDEBUG
// This test is exercising functionality for debug builds, which are not
// available in release builds. Skip the test if we are in that environment
// to avoid a test failure.
std::cout << "Diagnostic test disabled in release build" << std::endl;
(void)argc;
(void)argv;
#else
benchmark::internal::GetAbortHandler() = &TestHandler;
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
#endif
}
40 changes: 0 additions & 40 deletions third-party/benchmark/test/donotoptimize_assembly_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
BENCHMARK_DISABLE_DEPRECATED_WARNING

extern "C" {

extern int ExternInt;
extern int ExternInt2;
extern int ExternInt3;
extern int BigArray[2049];

const int ConstBigArray[2049]{};

inline int Add42(int x) { return x + 42; }

Expand All @@ -27,15 +23,7 @@ struct Large {
int value;
int data[2];
};

struct ExtraLarge {
int arr[2049];
};
}

extern ExtraLarge ExtraLargeObj;
const ExtraLarge ConstExtraLargeObj{};

// CHECK-LABEL: test_with_rvalue:
extern "C" void test_with_rvalue() {
benchmark::DoNotOptimize(Add42(0));
Expand Down Expand Up @@ -80,22 +68,6 @@ extern "C" void test_with_large_lvalue() {
// CHECK: ret
}

// CHECK-LABEL: test_with_extra_large_lvalue_with_op:
extern "C" void test_with_extra_large_lvalue_with_op() {
ExtraLargeObj.arr[16] = 42;
benchmark::DoNotOptimize(ExtraLargeObj);
// CHECK: movl $42, ExtraLargeObj+64(%rip)
// CHECK: ret
}

// CHECK-LABEL: test_with_big_array_with_op
extern "C" void test_with_big_array_with_op() {
BigArray[16] = 42;
benchmark::DoNotOptimize(BigArray);
// CHECK: movl $42, BigArray+64(%rip)
// CHECK: ret
}

// CHECK-LABEL: test_with_non_trivial_lvalue:
extern "C" void test_with_non_trivial_lvalue() {
NotTriviallyCopyable NTC(ExternInt);
Expand Down Expand Up @@ -124,18 +96,6 @@ extern "C" void test_with_large_const_lvalue() {
// CHECK: ret
}

// CHECK-LABEL: test_with_const_extra_large_obj:
extern "C" void test_with_const_extra_large_obj() {
benchmark::DoNotOptimize(ConstExtraLargeObj);
// CHECK: ret
}

// CHECK-LABEL: test_with_const_big_array
extern "C" void test_with_const_big_array() {
benchmark::DoNotOptimize(ConstBigArray);
// CHECK: ret
}

// CHECK-LABEL: test_with_non_trivial_const_lvalue:
extern "C" void test_with_non_trivial_const_lvalue() {
const NotTriviallyCopyable Obj(ExternInt);
Expand Down
28 changes: 6 additions & 22 deletions third-party/benchmark/test/donotoptimize_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

namespace {
#if defined(__GNUC__)
std::int64_t double_up(const std::int64_t x) __attribute__((const));
std::uint64_t double_up(const std::uint64_t x) __attribute__((const));
#endif
std::int64_t double_up(const std::int64_t x) { return x * 2; }
std::uint64_t double_up(const std::uint64_t x) { return x * 2; }
} // namespace

// Using DoNotOptimize on types like BitRef seem to cause a lot of problems
Expand All @@ -29,15 +29,6 @@ struct BitRef {
int main(int, char*[]) {
// this test verifies compilation of DoNotOptimize() for some types

char buffer1[1] = "";
benchmark::DoNotOptimize(buffer1);

char buffer2[2] = "";
benchmark::DoNotOptimize(buffer2);

char buffer3[3] = "";
benchmark::DoNotOptimize(buffer3);

char buffer8[8] = "";
benchmark::DoNotOptimize(buffer8);

Expand All @@ -46,24 +37,17 @@ int main(int, char*[]) {

char buffer1024[1024] = "";
benchmark::DoNotOptimize(buffer1024);
char* bptr = &buffer1024[0];
benchmark::DoNotOptimize(bptr);
benchmark::DoNotOptimize(&buffer1024[0]);

int x = 123;
benchmark::DoNotOptimize(x);
int* xp = &x;
benchmark::DoNotOptimize(xp);
benchmark::DoNotOptimize(&x);
benchmark::DoNotOptimize(x += 42);

std::int64_t y = double_up(x);
benchmark::DoNotOptimize(y);
benchmark::DoNotOptimize(double_up(x));

// These tests are to e
benchmark::DoNotOptimize(BitRef::Make());
BitRef lval = BitRef::Make();
benchmark::DoNotOptimize(lval);

#ifdef BENCHMARK_HAS_CXX11
// Check that accept rvalue.
benchmark::DoNotOptimize(BitRef::Make());
#endif
}
31 changes: 16 additions & 15 deletions third-party/benchmark/test/filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,28 @@ namespace {

class TestReporter : public benchmark::ConsoleReporter {
public:
bool ReportContext(const Context& context) override {
virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
return ConsoleReporter::ReportContext(context);
};

void ReportRuns(const std::vector<Run>& report) override {
virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
++count_;
max_family_index_ = std::max(max_family_index_, report[0].family_index);
max_family_index_ =
std::max<size_t>(max_family_index_, report[0].family_index);
ConsoleReporter::ReportRuns(report);
};

TestReporter() : count_(0), max_family_index_(0) {}

~TestReporter() override {}
virtual ~TestReporter() {}

int GetCount() const { return count_; }
size_t GetCount() const { return count_; }

int64_t GetMaxFamilyIndex() const { return max_family_index_; }
size_t GetMaxFamilyIndex() const { return max_family_index_; }

private:
mutable int count_;
mutable int64_t max_family_index_;
mutable size_t count_;
mutable size_t max_family_index_;
};

} // end namespace
Expand Down Expand Up @@ -78,13 +79,13 @@ int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);

TestReporter test_reporter;
const int64_t returned_count =
static_cast<int64_t>(benchmark::RunSpecifiedBenchmarks(&test_reporter));
const size_t returned_count =
benchmark::RunSpecifiedBenchmarks(&test_reporter);

if (argc == 2) {
// Make sure we ran all of the tests
std::stringstream ss(argv[1]);
int64_t expected_return;
size_t expected_return;
ss >> expected_return;

if (returned_count != expected_return) {
Expand All @@ -94,17 +95,17 @@ int main(int argc, char** argv) {
return -1;
}

const int64_t expected_reports = list_only ? 0 : expected_return;
const int64_t reports_count = test_reporter.GetCount();
const size_t expected_reports = list_only ? 0 : expected_return;
const size_t reports_count = test_reporter.GetCount();
if (reports_count != expected_reports) {
std::cerr << "ERROR: Expected " << expected_reports
<< " tests to be run but reported_count = " << reports_count
<< std::endl;
return -1;
}

const int64_t max_family_index = test_reporter.GetMaxFamilyIndex();
const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index;
const size_t max_family_index = test_reporter.GetMaxFamilyIndex();
const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index;
if (num_families != expected_reports) {
std::cerr << "ERROR: Expected " << expected_reports
<< " test families to be run but num_families = "
Expand Down
6 changes: 3 additions & 3 deletions third-party/benchmark/test/fixture_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@

class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) override {
void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
if (state.thread_index() == 0) {
assert(data.get() == nullptr);
data.reset(new int(42));
}
}

void TearDown(const ::benchmark::State& state) override {
void TearDown(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
if (state.thread_index() == 0) {
assert(data.get() != nullptr);
data.reset();
}
}

~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); }
~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); }

std::unique_ptr<int> data;
};
Expand Down
3 changes: 1 addition & 2 deletions third-party/benchmark/test/link_main_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

void BM_empty(benchmark::State& state) {
for (auto _ : state) {
auto iterations = double(state.iterations()) * double(state.iterations());
benchmark::DoNotOptimize(iterations);
benchmark::DoNotOptimize(state.iterations());
}
}
BENCHMARK(BM_empty);
Loading