Skip to content

Commit

Permalink
apacheGH-39303: [Archery][Benchmarking] Allow setting C++ repetition …
Browse files Browse the repository at this point in the history
…min time

We want to be able to increase the number of repetitions for each C++ micro-benchmark without increase the total runtime.
  • Loading branch information
pitrou committed Dec 20, 2023
1 parent 7265689 commit 9a5374d
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 17 deletions.
18 changes: 11 additions & 7 deletions dev/archery/archery/benchmark/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ class GoogleBenchmarkCommand(Command):
notably `--benchmark_filter`, `--benchmark_format`, etc...
"""

def __init__(self, benchmark_bin, benchmark_filter=None):
def __init__(self, benchmark_bin, benchmark_filter=None, benchmark_extras=None):
self.bin = benchmark_bin
self.benchmark_filter = benchmark_filter
self.benchmark_extras = benchmark_extras or []

def list_benchmarks(self):
argv = ["--benchmark_list_tests"]
Expand All @@ -49,16 +50,19 @@ def list_benchmarks(self):
stderr=subprocess.PIPE)
return str.splitlines(result.stdout.decode("utf-8"))

def results(self, repetitions=1):
def results(self, repetitions=1, repetition_min_time=None):
with NamedTemporaryFile() as out:
argv = ["--benchmark_repetitions={}".format(repetitions),
"--benchmark_out={}".format(out.name),
argv = [f"--benchmark_repetitions={repetitions}",
f"--benchmark_out={out.name}",
"--benchmark_out_format=json"]

if repetition_min_time is not None:
argv.append(f"--benchmark_min_time={repetition_min_time:.6f}")

if self.benchmark_filter:
argv.append(
"--benchmark_filter={}".format(self.benchmark_filter)
)
argv.append(f"--benchmark_filter={self.benchmark_filter}")

argv += self.benchmark_extras

self.run(*argv, check=True)
return json.load(out)
Expand Down
14 changes: 10 additions & 4 deletions dev/archery/archery/benchmark/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ def regex_filter(re_expr):

class BenchmarkRunner:
def __init__(self, suite_filter=None, benchmark_filter=None,
repetitions=DEFAULT_REPETITIONS):
repetitions=DEFAULT_REPETITIONS, repetition_min_time=None):
self.suite_filter = suite_filter
self.benchmark_filter = benchmark_filter
self.repetitions = repetitions
self.repetition_min_time = repetition_min_time

@property
def suites(self):
Expand Down Expand Up @@ -107,9 +108,10 @@ def __repr__(self):
class CppBenchmarkRunner(BenchmarkRunner):
""" Run suites from a CMakeBuild. """

def __init__(self, build, **kwargs):
def __init__(self, build, benchmark_extras, **kwargs):
""" Initialize a CppBenchmarkRunner. """
self.build = build
self.benchmark_extras = benchmark_extras
super().__init__(**kwargs)

@staticmethod
Expand Down Expand Up @@ -142,14 +144,17 @@ def suites_binaries(self):

def suite(self, name, suite_bin):
""" Returns the resulting benchmarks for a given suite. """
suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter)
suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter,
self.benchmark_extras)

# Ensure there will be data
benchmark_names = suite_cmd.list_benchmarks()
if not benchmark_names:
return None

results = suite_cmd.results(repetitions=self.repetitions)
results = suite_cmd.results(
repetitions=self.repetitions,
repetition_min_time=self.repetition_min_time)
benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
return BenchmarkSuite(name, benchmarks)

Expand Down Expand Up @@ -252,6 +257,7 @@ def suite(self, name):
if not benchmark_names:
return None

# TODO: support `repetition_min_time`
results = suite_cmd.results(repetitions=self.repetitions)
benchmarks = JavaMicrobenchmarkHarness.from_json(results)
return BenchmarkSuite(name, benchmarks)
Expand Down
25 changes: 19 additions & 6 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,10 @@ def check_language(ctx, param, value):
"Can be stacked. For language=java"),
click.option("--cmake-extras", type=str, multiple=True,
help="Extra flags/options to pass to cmake invocation. "
"Can be stacked. For language=cpp")
"Can be stacked. For language=cpp"),
click.option("--cpp-benchmark-extras", type=str, multiple=True,
help="Extra flags/options to pass to C++ benchmark executables. "
"Can be stacked. For language=cpp"),
]

cmd = java_toolchain_options(cmd)
Expand Down Expand Up @@ -440,12 +443,16 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
@click.option("--repetitions", type=int, default=-1,
help=("Number of repetitions of each benchmark. Increasing "
"may improve result precision. "
"[default: 1 for cpp, 5 for java"))
"[default: 1 for cpp, 5 for java]"))
@click.option("--repetition-min-time", type=float, default=None,
help=("Minimum duration of each repetition in seconds. "
"Currently only supported for language=cpp. "
"[default: use runner-specific defaults]"))
@click.pass_context
def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
java_home, java_options, build_extras, benchmark_extras,
language, suite_filter, benchmark_filter, repetitions,
**kwargs):
repetition_min_time, cpp_benchmark_extras, **kwargs):
""" Run benchmark suite.
This command will run the benchmark suite for a single build. This is
Expand All @@ -468,13 +475,18 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
\b
archery benchmark run
\b
# Run the benchmarks on an existing build directory
\b
archery benchmark run /build/cpp
\b
# Run the benchmarks on current previous commit
\b
archery benchmark run HEAD~1
\b
# Run the benchmarks on current previous commit
# Run the benchmarks on current git workspace and output results as a JSON file.
\b
archery benchmark run --output=run.json
"""
Expand All @@ -488,8 +500,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
repetitions = repetitions if repetitions != -1 else 1
runner_base = CppBenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf,
repetitions=repetitions,
suite_filter=suite_filter, benchmark_filter=benchmark_filter)
repetitions=repetitions, repetition_min_time=repetition_min_time,
suite_filter=suite_filter, benchmark_filter=benchmark_filter,
benchmark_extras=cpp_benchmark_extras)

elif language == "java":
for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
Expand Down
173 changes: 173 additions & 0 deletions dev/archery/archery/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,53 @@ def test_static_runner_from_json_not_a_regression():
assert not comparison.regression


def test_static_runner_from_json_multiple_values_not_a_regression():
# Same as above, but with multiple repetitions
archery_result = {
"suites": [
{
"name": "arrow-value-parsing-benchmark",
"benchmarks": [
{
"name": "FloatParsing<DoubleType>",
"unit": "items_per_second",
"less_is_better": False,
"values": [
93588476.22327498,
94873831.3818328,
95593675.20810866,
95797325.6543961,
96134728.05794072
],
"time_unit": "ns",
"times": [
10537.724568456104,
10575.162068480413,
10599.271208720838,
10679.028059166194,
10827.995119861762
],
"counters": {
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "FloatParsing<DoubleType>",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 10656
}
}
]
}
]
}

contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
[comparison] = RunnerComparator(contender, baseline).comparisons
assert not comparison.regression


def test_static_runner_from_json_regression():
archery_result = {
"suites": [
Expand Down Expand Up @@ -114,6 +161,58 @@ def test_static_runner_from_json_regression():
assert comparison.regression


def test_static_runner_from_json_multiple_values_regression():
# Same as above, but with multiple repetitions
archery_result = {
"suites": [
{
"name": "arrow-value-parsing-benchmark",
"benchmarks": [
{
"name": "FloatParsing<DoubleType>",
"unit": "items_per_second",
"less_is_better": False,
"values": [
93588476.22327498,
94873831.3818328,
95593675.20810866,
95797325.6543961,
96134728.05794072
],
"time_unit": "ns",
"times": [
10537.724568456104,
10575.162068480413,
10599.271208720838,
10679.028059166194,
10827.995119861762
],
"counters": {
"family_index": 0,
"per_family_instance_index": 0,
"run_name": "FloatParsing<DoubleType>",
"repetitions": 5,
"repetition_index": 0,
"threads": 1,
"iterations": 10656
}
}
]
}
]
}

contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))

# introduce artificial regression
values = archery_result['suites'][0]['benchmarks'][0]['values']
values[:] = [v * 2 for v in values]
baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))

[comparison] = RunnerComparator(contender, baseline).comparisons
assert comparison.regression


def test_benchmark_median():
assert median([10]) == 10
assert median([1, 2, 3]) == 2
Expand Down Expand Up @@ -381,3 +480,77 @@ def test_omits_aggregates():
benchmark = GoogleBenchmark(name, [observation1, observation2])
result = json.dumps(benchmark, cls=JsonEncoder)
assert json.loads(result) == archery_result


def test_multiple_observations():
name = "FloatParsing<DoubleType>"
google_results = [
{
'cpu_time': 10627.38199641615,
'family_index': 0,
'items_per_second': 94096551.75067839,
'iterations': 9487,
'name': 'FloatParsing<DoubleType>',
'per_family_instance_index': 0,
'real_time': 10628.84905663701,
'repetition_index': 0,
'repetitions': 3,
'run_name': 'FloatParsing<DoubleType>',
'run_type': 'iteration',
'threads': 1,
'time_unit': 'ns'
},
{
'cpu_time': 10633.318014124594,
'family_index': 0,
'items_per_second': 94044022.63448404,
'iterations': 9487,
'name': 'FloatParsing<DoubleType>',
'per_family_instance_index': 0,
'real_time': 10634.858754122948,
'repetition_index': 1,
'repetitions': 3,
'run_name': 'FloatParsing<DoubleType>',
'run_type': 'iteration',
'threads': 1,
'time_unit': 'ns'
},
{
'cpu_time': 10664.315484347,
'family_index': 0,
'items_per_second': 93770669.24434038,
'iterations': 9487,
'name': 'FloatParsing<DoubleType>',
'per_family_instance_index': 0,
'real_time': 10665.584589337563,
'repetition_index': 2,
'repetitions': 3,
'run_name': 'FloatParsing<DoubleType>',
'run_type': 'iteration',
'threads': 1,
'time_unit': 'ns'
}
]

archery_result = {
'counters': {
'family_index': 0,
'iterations': 9487,
'per_family_instance_index': 0,
'repetition_index': 2,
'repetitions': 3,
'run_name': 'FloatParsing<DoubleType>',
'threads': 1
},
'less_is_better': False,
'name': 'FloatParsing<DoubleType>',
'time_unit': 'ns',
'times': [10628.84905663701, 10634.858754122948, 10665.584589337563],
'unit': 'items_per_second',
'values': [93770669.24434038, 94044022.63448404, 94096551.75067839]
}

observations = [GoogleBenchmarkObservation(**g) for g in google_results]
benchmark = GoogleBenchmark(name, observations)
result = json.dumps(benchmark, cls=JsonEncoder)
assert json.loads(result) == archery_result

0 comments on commit 9a5374d

Please sign in to comment.