diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index 56cf4aca236f9..44463385b81a7 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -471,7 +471,7 @@ removed from the Standard. These tests should be written like: Benchmarks ========== -Libc++'s test suite also contains benchmarks. The benchmarks are written using the `Google Benchmark`_ +Libc++'s test suite also contains benchmarks. Many benchmarks are written using the `Google Benchmark`_ library, a copy of which is stored in the LLVM monorepo. For more information about using the Google Benchmark library, see the `official documentation `_. @@ -490,27 +490,46 @@ run through ``check-cxx`` for anything, instead run the benchmarks manually usin the instructions for running individual tests. If you want to compare the results of different benchmark runs, we recommend using the -``libcxx-compare-benchmarks`` helper tool. First, configure CMake in a build directory -and run the benchmark: +``compare-benchmarks`` helper tool. Note that the script has some dependencies, which can +be installed with: .. code-block:: bash - $ cmake -S runtimes -B [...] - $ libcxx/utils/libcxx-lit libcxx/test/benchmarks/string.bench.cpp --param optimization=speed + $ python -m venv .venv && source .venv/bin/activate # Optional but recommended + $ pip install -r libcxx/utils/requirements.txt -Then, do the same for the second configuration you want to test. Use a different build -directory for that configuration: +Once that's done, start by configuring CMake in a build directory and running one or +more benchmarks, as usual: .. code-block:: bash - $ cmake -S runtimes -B [...] - $ libcxx/utils/libcxx-lit libcxx/test/benchmarks/string.bench.cpp --param optimization=speed + $ cmake -S runtimes -B [...] + $ libcxx/utils/libcxx-lit libcxx/test/benchmarks/string.bench.cpp --param optimization=speed -Finally, use ``libcxx-compare-benchmarks`` to compare both: +Then, get the consolidated benchmark output for that run using ``consolidate-benchmarks``: .. code-block:: bash - $ libcxx/utils/libcxx-compare-benchmarks libcxx/test/benchmarks/string.bench.cpp + $ libcxx/utils/consolidate-benchmarks > baseline.lnt + +The ``baseline.lnt`` file will contain a consolidation of all the benchmark results present in the build +directory. You can then make the desired modifications to the code, run the benchmark(s) again, and then run: + +.. code-block:: bash + + $ libcxx/utils/consolidate-benchmarks > candidate.lnt + +Finally, use ``compare-benchmarks`` to compare both: + +.. code-block:: bash + + $ libcxx/utils/compare-benchmarks baseline.lnt candidate.lnt + + # Useful one-liner when iterating locally: + $ libcxx/utils/compare-benchmarks baseline.lnt <(libcxx/utils/consolidate-benchmarks ) + +The ``compare-benchmarks`` script provides some useful options like creating a chart to easily visualize +differences in a browser window. Use ``compare-benchmarks --help`` for details. .. _`Google Benchmark`: https://github.com/google/benchmark diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks new file mode 100755 index 0000000000000..9bda5f1a27949 --- /dev/null +++ b/libcxx/utils/compare-benchmarks @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 + +import argparse +import re +import statistics +import sys + +import plotly +import tabulate + +def parse_lnt(lines): + """ + Parse lines in LNT format and return a dictionnary of the form: + + { + 'benchmark1': { + 'metric1': [float], + 'metric2': [float], + ... + }, + 'benchmark2': { + 'metric1': [float], + 'metric2': [float], + ... + }, + ... + } + + Each metric may have multiple values. + """ + results = {} + for line in lines: + line = line.strip() + if not line: + continue + + (identifier, value) = line.split(' ') + (name, metric) = identifier.split('.') + if name not in results: + results[name] = {} + if metric not in results[name]: + results[name][metric] = [] + results[name][metric].append(float(value)) + return results + +def plain_text_comparison(benchmarks, baseline, candidate): + """ + Create a tabulated comparison of the baseline and the candidate. + """ + headers = ['Benchmark', 'Baseline', 'Candidate', 'Difference', '% Difference'] + fmt = (None, '.2f', '.2f', '.2f', '.2f') + table = [] + for (bm, base, cand) in zip(benchmarks, baseline, candidate): + diff = (cand - base) if base and cand else None + percent = 100 * (diff / base) if base and cand else None + row = [bm, base, cand, diff, percent] + table.append(row) + return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right') + +def create_chart(benchmarks, baseline, candidate): + """ + Create a bar chart comparing 'baseline' and 'candidate'. + """ + figure = plotly.graph_objects.Figure() + figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=baseline, name='Baseline')) + figure.add_trace(plotly.graph_objects.Bar(x=benchmarks, y=candidate, name='Candidate')) + return figure + +def prepare_series(baseline, candidate, metric, aggregate=statistics.median): + """ + Prepare the data for being formatted or displayed as a chart. + + Metrics that have more than one value are aggregated using the given aggregation function. + """ + all_benchmarks = sorted(list(set(baseline.keys()) | set(candidate.keys()))) + baseline_series = [] + candidate_series = [] + for bm in all_benchmarks: + baseline_series.append(aggregate(baseline[bm][metric]) if bm in baseline and metric in baseline[bm] else None) + candidate_series.append(aggregate(candidate[bm][metric]) if bm in candidate and metric in candidate[bm] else None) + return (all_benchmarks, baseline_series, candidate_series) + +def main(argv): + parser = argparse.ArgumentParser( + prog='compare-benchmarks', + description='Compare the results of two sets of benchmarks in LNT format.', + epilog='This script requires the `tabulate` and the `plotly` Python modules.') + parser.add_argument('baseline', type=argparse.FileType('r'), + help='Path to a LNT format file containing the benchmark results for the baseline.') + parser.add_argument('candidate', type=argparse.FileType('r'), + help='Path to a LNT format file containing the benchmark results for the candidate.') + parser.add_argument('--metric', type=str, default='execution_time', + help='The metric to compare. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- ' + 'this option allows selecting which metric is being analyzed. The default is "execution_time".') + parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, + help='Path of a file where to output the resulting comparison. Default to stdout.') + parser.add_argument('--filter', type=str, required=False, + help='An optional regular expression used to filter the benchmarks included in the comparison. ' + 'Only benchmarks whose names match the regular expression will be included.') + parser.add_argument('--format', type=str, choices=['text', 'chart'], default='text', + help='Select the output format. "text" generates a plain-text comparison in tabular form, and "chart" ' + 'generates a self-contained HTML graph that can be opened in a browser. The default is text.') + args = parser.parse_args(argv) + + baseline = parse_lnt(args.baseline.readlines()) + candidate = parse_lnt(args.candidate.readlines()) + + if args.filter is not None: + regex = re.compile(args.filter) + baseline = {k: v for (k, v) in baseline.items() if regex.search(k)} + candidate = {k: v for (k, v) in candidate.items() if regex.search(k)} + + (benchmarks, baseline_series, candidate_series) = prepare_series(baseline, candidate, args.metric) + + if args.format == 'chart': + figure = create_chart(benchmarks, baseline_series, candidate_series) + plotly.io.write_html(figure, file=args.output) + else: + diff = plain_text_comparison(benchmarks, baseline_series, candidate_series) + args.output.write(diff) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/consolidate-benchmarks b/libcxx/utils/consolidate-benchmarks new file mode 100755 index 0000000000000..c84607f1991c1 --- /dev/null +++ b/libcxx/utils/consolidate-benchmarks @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import argparse +import pathlib +import sys + +def main(argv): + parser = argparse.ArgumentParser( + prog='consolidate-benchmarks', + description='Consolidate benchmark result files (in LNT format) into a single LNT-format file.') + parser.add_argument('files_or_directories', type=str, nargs='+', + help='Path to files or directories containing LNT data to consolidate. Directories are searched ' + 'recursively for files with a .lnt extension.') + parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, + help='Where to output the result. Default to stdout.') + args = parser.parse_args(argv) + + files = [] + for arg in args.files_or_directories: + path = pathlib.Path(arg) + if path.is_dir(): + for p in path.rglob('*.lnt'): + files.append(p) + else: + files.append(path) + + for file in files: + for line in file.open().readlines(): + line = line.strip() + if not line: + continue + args.output.write(line) + args.output.write('\n') + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libcxx/utils/libcxx-benchmark-json b/libcxx/utils/libcxx-benchmark-json deleted file mode 100755 index 7f743c32caf40..0000000000000 --- a/libcxx/utils/libcxx-benchmark-json +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash - -set -e - -PROGNAME="$(basename "${0}")" -MONOREPO_ROOT="$(realpath $(dirname "${PROGNAME}"))" -function usage() { -cat < benchmarks... - -Print the path to the JSON files containing benchmark results for the given benchmarks. - -This requires those benchmarks to have already been run, i.e. this only resolves the path -to the benchmark .json file within the build directory. - - The path to the build directory. -benchmarks... Paths of the benchmarks to extract the results for. Those paths are relative to ''. - -Example -======= -$ cmake -S runtimes -B build/ -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" -$ libcxx-lit build/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ less \$(${PROGNAME} build/ libcxx/test/benchmarks/algorithms/for_each.bench.cpp) -EOF -} - -if [[ "${1}" == "-h" || "${1}" == "--help" ]]; then - usage - exit 0 -fi - -if [[ $# -lt 1 ]]; then - usage - exit 1 -fi - -build_dir="${1}" -shift - -for benchmark in ${@}; do - # Normalize the paths by turning all benchmarks paths into absolute ones and then making them - # relative to the root of the monorepo. - benchmark="$(realpath ${benchmark})" - relative=$(python -c "import os; import sys; print(os.path.relpath(sys.argv[1], sys.argv[2]))" "${benchmark}" "${MONOREPO_ROOT}") - - # Extract components of the benchmark path - directory="$(dirname ${relative})" - file="$(basename ${relative})" - - # Reconstruct the (slightly weird) path to the benchmark json file. This should be kept in sync - # whenever the test suite changes. - json="${build_dir}/${directory}/Output/${file}.dir/benchmark-result.json" - if [[ -f "${json}" ]]; then - echo "${json}" - fi -done diff --git a/libcxx/utils/libcxx-compare-benchmarks b/libcxx/utils/libcxx-compare-benchmarks deleted file mode 100755 index 08c53b2420c8e..0000000000000 --- a/libcxx/utils/libcxx-compare-benchmarks +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env bash - -set -e - -PROGNAME="$(basename "${0}")" -MONOREPO_ROOT="$(realpath $(dirname "${PROGNAME}"))" -function usage() { -cat < benchmarks... [-- gbench-args...] - -Compare the given benchmarks between the baseline and the candidate build directories. - -This requires those benchmarks to have already been generated in both build directories. - - The path to the build directory considered the baseline. - The path to the build directory considered the candidate. -benchmarks... Paths of the benchmarks to compare. Those paths are relative to ''. -[-- gbench-args...] Any arguments provided after '--' will be passed as-is to GoogleBenchmark's compare.py tool. - -Example -======= -$ libcxx-lit build1/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ libcxx-lit build2/ -sv libcxx/test/benchmarks/algorithms/for_each.bench.cpp -$ ${PROGNAME} build1/ build2/ libcxx/test/benchmarks/algorithms/for_each.bench.cpp -EOF -} - -if [[ "${1}" == "-h" || "${1}" == "--help" ]]; then - usage - exit 0 -fi - -if [[ $# -lt 1 ]]; then - usage - exit 1 -fi - -baseline="${1}" -candidate="${2}" -shift; shift - -GBENCH="${MONOREPO_ROOT}/third-party/benchmark" - -python3 -m venv /tmp/libcxx-compare-benchmarks-venv -source /tmp/libcxx-compare-benchmarks-venv/bin/activate -pip3 install -r ${GBENCH}/tools/requirements.txt - -benchmarks="" -while [[ $# -gt 0 ]]; do - if [[ "${1}" == "--" ]]; then - shift - break - fi - benchmarks+=" ${1}" - shift -done - -for benchmark in ${benchmarks}; do - base="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${baseline} ${benchmark})" - cand="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${candidate} ${benchmark})" - - if [[ ! -e "${base}" ]]; then - echo "Benchmark ${benchmark} does not exist in the baseline" - continue - fi - if [[ ! -e "${cand}" ]]; then - echo "Benchmark ${benchmark} does not exist in the candidate" - continue - fi - - "${GBENCH}/tools/compare.py" benchmarks "${base}" "${cand}" ${@} -done diff --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt new file mode 100644 index 0000000000000..de6e123eec54a --- /dev/null +++ b/libcxx/utils/requirements.txt @@ -0,0 +1,2 @@ +plotly +tabulate