Skip to content

Conversation

ldionne
Copy link
Member

@ldionne ldionne commented Sep 8, 2025

This is a step towards being able to easily report benchmark results to a LNT instance. The LNT format can also be the basis for other tools to perform A/B comparisons when doing experiments.

This is a step towards being able to easily report benchmark results
to a LNT instance. The LNT format can also be the basis for other tools
to perform A/B comparisons when doing experiments.
@ldionne ldionne requested a review from a team as a code owner September 8, 2025 13:53
@llvmbot llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Sep 8, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 8, 2025

@llvm/pr-subscribers-libcxx

Author: Louis Dionne (ldionne)

Changes

This is a step towards being able to easily report benchmark results to a LNT instance. The LNT format can also be the basis for other tools to perform A/B comparisons when doing experiments.


Full diff: https://github.com/llvm/llvm-project/pull/157466.diff

4 Files Affected:

  • (modified) libcxx/test/benchmarks/spec.gen.py (+1-1)
  • (modified) libcxx/utils/libcxx/test/format.py (+4)
  • (added) libcxx/utils/parse-google-benchmark-results (+45)
  • (renamed) libcxx/utils/parse-spec-results (+1-1)
diff --git a/libcxx/test/benchmarks/spec.gen.py b/libcxx/test/benchmarks/spec.gen.py
index f7a7b7b047fdb..eb411823c25bb 100644
--- a/libcxx/test/benchmarks/spec.gen.py
+++ b/libcxx/test/benchmarks/spec.gen.py
@@ -74,5 +74,5 @@
 
     # Parse the results into a LNT-compatible format. This also errors out if there are no CSV files, which
     # means that the benchmark didn't run properly (the `runcpu` command above never reports a failure).
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-result %T/result/CPUv8.001.*.train.csv --output-format=lnt > %T/results.lnt')
+    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/CPUv8.001.*.train.csv --output-format=lnt > %T/results.lnt')
     print(f'RUN: cat %T/results.lnt')
diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py
index a0b7b5bdb5b9f..5765afec399cf 100644
--- a/libcxx/utils/libcxx/test/format.py
+++ b/libcxx/utils/libcxx/test/format.py
@@ -12,6 +12,8 @@
 import os
 import re
 
+THIS_FILE = os.path.abspath(__file__)
+LIBCXX_UTILS = os.path.dirname(os.path.dirname(os.path.dirname(THIS_FILE)))
 
 def _getTempPaths(test):
     """
@@ -353,6 +355,8 @@ def execute(self, test, litConfig):
             ]
             if "enable-benchmarks=run" in test.config.available_features:
                 steps += ["%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%T/benchmark-result.json --benchmark_out_format=json"]
+                parse_results = os.path.join(LIBCXX_UTILS, 'parse-google-benchmark-results')
+                steps += [f"{parse_results} %T/benchmark-result.json --output-format=lnt > %T/results.lnt"]
             return self._executeShTest(test, litConfig, steps)
         elif re.search('[.]gen[.][^.]+$', filename): # This only happens when a generator test is not supported
             return self._executeShTest(test, litConfig, [])
diff --git a/libcxx/utils/parse-google-benchmark-results b/libcxx/utils/parse-google-benchmark-results
new file mode 100755
index 0000000000000..280c8045db6c9
--- /dev/null
+++ b/libcxx/utils/parse-google-benchmark-results
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import json
+import sys
+
+def main(argv):
+    parser = argparse.ArgumentParser(
+        prog='parse-google-benchmark-results',
+        description='Parse Google Benchmark result files (in JSON format) into CSV or LNT compatible output.')
+    parser.add_argument('filename', type=argparse.FileType('r'), nargs='+',
+        help='One of more JSON files to extract the results from. The results parsed from each '
+             'file are concatenated together.')
+    parser.add_argument('--timing', type=str, choices=['real_time', 'cpu_time'], default='real_time',
+        help='The timing to extract from the Google Benchmark results. This can either be the '
+             '"real time" or the "CPU time". Default is "real time".')
+    parser.add_argument('--output-format', type=str, choices=['csv', 'lnt'], default='csv',
+        help='The desired output format for the data. `csv` is CSV format and `lnt` is a format compatible with '
+             '`lnt importreport` (see https://llvm.org/docs/lnt/importing_data.html#importing-data-in-a-text-file).')
+    args = parser.parse_args(argv)
+
+    # Parse the data from all files, aggregating the results
+    headers = ['Benchmark', args.timing]
+    rows = []
+    for file in args.filename:
+        js = json.load(file)
+        for bm in js['benchmarks']:
+            row = [bm['name'], bm[args.timing]]
+            rows.append(row)
+
+    # Print the results in the right format
+    if args.output_format == 'csv':
+        writer = csv.writer(sys.stdout)
+        writer.writerow(headers)
+        for row in rows:
+            writer.writerow(row)
+    elif args.output_format == 'lnt':
+        benchmark = headers.index('Benchmark')
+        time = headers.index(args.timing)
+        for row in rows:
+            print(f'{row[benchmark].replace(".", "_")}.execution_time {row[time]}')
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/libcxx/utils/parse-spec-result b/libcxx/utils/parse-spec-results
similarity index 98%
rename from libcxx/utils/parse-spec-result
rename to libcxx/utils/parse-spec-results
index bdfc966932255..3aff206f8959c 100755
--- a/libcxx/utils/parse-spec-result
+++ b/libcxx/utils/parse-spec-results
@@ -45,7 +45,7 @@ def main(argv):
         description='Parse SPEC result files (in CSV format) and extract the selected result table, in the selected format.')
     parser.add_argument('filename', type=argparse.FileType('r'), nargs='+',
         help='One of more CSV files to extract the results from. The results parsed from each file are concatenated '
-             'together, creating a single CSV table.')
+             'together.')
     parser.add_argument('--table', type=str, choices=['full', 'selected'], default='full',
         help='The name of the table to extract from SPEC results. `full` means extracting the Full Results Table '
              'and `selected` means extracting the Selected Results Table. Default is `full`.')

Copy link

github-actions bot commented Sep 8, 2025

⚠️ Python code formatter, darker found issues in your code. ⚠️

You can test this locally with the following command:
darker --check --diff -r origin/main...HEAD libcxx/test/benchmarks/spec.gen.py libcxx/utils/libcxx/test/format.py

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from darker here.
--- test/benchmarks/spec.gen.py	2025-09-08 13:53:20.000000 +0000
+++ test/benchmarks/spec.gen.py	2025-09-08 13:55:45.875775 +0000
@@ -72,7 +72,9 @@
     print(f'RUN: %{{spec_dir}}/bin/runcpu --config %T/spec-config.cfg --size train --output-root %T --rebuild {benchmark}')
     print(f'RUN: rm -rf %T/benchspec') # remove the temporary directory, which can become quite large
 
     # Parse the results into a LNT-compatible format. This also errors out if there are no CSV files, which
     # means that the benchmark didn't run properly (the `runcpu` command above never reports a failure).
-    print(f'RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/CPUv8.001.*.train.csv --output-format=lnt > %T/results.lnt')
-    print(f'RUN: cat %T/results.lnt')
+    print(
+        f"RUN: %{{libcxx-dir}}/utils/parse-spec-results %T/result/CPUv8.001.*.train.csv --output-format=lnt > %T/results.lnt"
+    )
+    print(f"RUN: cat %T/results.lnt")
--- utils/libcxx/test/format.py	2025-09-08 13:53:20.000000 +0000
+++ utils/libcxx/test/format.py	2025-09-08 13:55:45.983884 +0000
@@ -352,13 +352,19 @@
                 )
             steps = [
                 "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} %{benchmark_flags} %{link_flags} -o %t.exe",
             ]
             if "enable-benchmarks=run" in test.config.available_features:
-                steps += ["%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%T/benchmark-result.json --benchmark_out_format=json"]
-                parse_results = os.path.join(LIBCXX_UTILS, 'parse-google-benchmark-results')
-                steps += [f"{parse_results} %T/benchmark-result.json --output-format=lnt > %T/results.lnt"]
+                steps += [
+                    "%dbg(EXECUTED AS) %{exec} %t.exe --benchmark_out=%T/benchmark-result.json --benchmark_out_format=json"
+                ]
+                parse_results = os.path.join(
+                    LIBCXX_UTILS, "parse-google-benchmark-results"
+                )
+                steps += [
+                    f"{parse_results} %T/benchmark-result.json --output-format=lnt > %T/results.lnt"
+                ]
             return self._executeShTest(test, litConfig, steps)
         elif re.search('[.]gen[.][^.]+$', filename): # This only happens when a generator test is not supported
             return self._executeShTest(test, litConfig, [])
         else:
             return lit.Test.Result(

@ldionne ldionne merged commit c424468 into llvm:main Sep 8, 2025
69 of 71 checks passed
@ldionne ldionne deleted the review/parse-gbench-results branch September 8, 2025 18:32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants