[libc++] Support comparison of more than two data sets in compare-benchmarks

ldionne · ldionne · commit d636dc835826 · 2025-09-26T11:27:15.000-04:00
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import argparse
+import functools
 import pathlib
 import re
 import statistics
@@ -62,35 +63,35 @@ def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None)
     """
     Create a tabulated comparison of the baseline and the candidate for the given metric.
     """
+    # Compute additional info in new columns. In text mode, we can assume that we are
+    # comparing exactly two data sets (suffixed _0 and _1).
+    data['difference'] = data[f'{metric}_1'] - data[f'{metric}_0']
+    data['percent'] = 100 * (data['difference'] / data[f'{metric}_0'])
+
     data = data.replace(numpy.nan, None).sort_values(by='benchmark') # avoid NaNs in tabulate output
     headers = ['Benchmark', baseline_name, candidate_name, 'Difference', '% Difference']
     fmt = (None, '.2f', '.2f', '.2f', '.2f')
-    table = data[['benchmark', f'{metric}_baseline', f'{metric}_candidate', 'difference', 'percent']].set_index('benchmark')
+    table = data[['benchmark', f'{metric}_0', f'{metric}_1', 'difference', 'percent']].set_index('benchmark')
     return tabulate.tabulate(table, headers=headers, floatfmt=fmt, numalign='right')
 
-def create_chart(data, metric, subtitle=None, baseline_name=None, candidate_name=None):
+def create_chart(data, metric, subtitle=None, series_names=None):
     """
-    Create a bar chart comparing the given metric between the baseline and the candidate.
+    Create a bar chart comparing the given metric across the provided series.
     """
-    data = data.sort_values(by='benchmark').rename(columns={
-        f'{metric}_baseline': baseline_name,
-        f'{metric}_candidate': candidate_name
-    })
-    figure = plotly.express.bar(data, title=f'{baseline_name} vs {candidate_name}',
-                                      subtitle=subtitle,
-                                      x='benchmark', y=[baseline_name, candidate_name], barmode='group')
+    data = data.sort_values(by='benchmark').rename(columns={f'{metric}_{i}': series_names[i] for i in range(len(series_names))})
+    title = ' vs '.join(series_names)
+    figure = plotly.express.bar(data, title=title, subtitle=subtitle, x='benchmark', y=series_names, barmode='group')
     figure.update_layout(xaxis_title='', yaxis_title='', legend_title='')
     return figure
 
 def main(argv):
     parser = argparse.ArgumentParser(
         prog='compare-benchmarks',
-        description='Compare the results of two sets of benchmarks in LNT format.',
+        description='Compare the results of multiple sets of benchmarks in LNT format.',
         epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
-    parser.add_argument('baseline', type=argparse.FileType('r'),
-        help='Path to a LNT format file containing the benchmark results for the baseline.')
-    parser.add_argument('candidate', type=argparse.FileType('r'),
-        help='Path to a LNT format file containing the benchmark results for the candidate.')
+    parser.add_argument('files', type=argparse.FileType('r'), nargs='+',
+        help='Path to LNT format files containing the benchmark results to compare. In the text format, '
+             'exactly two files must be compared.')
     parser.add_argument('--output', '-o', type=pathlib.Path, required=False,
         help='Path of a file where to output the resulting comparison. If the output format is `text`, '
              'default to stdout. If the output format is `chart`, default to a temporary file which is '
@@ -107,43 +108,52 @@ def main(argv):
     parser.add_argument('--open', action='store_true',
         help='Whether to automatically open the generated HTML file when finished. This option only makes sense '
              'when the output format is `chart`.')
-    parser.add_argument('--baseline-name', type=str, default='Baseline',
-        help='Optional name to use for the "baseline" label.')
-    parser.add_argument('--candidate-name', type=str, default='Candidate',
-        help='Optional name to use for the "candidate" label.')
+    parser.add_argument('--series-names', type=str, required=False,
+        help='Optional comma-delimited list of names to use for the various series. By default, we use '
+             'Baseline and Candidate for two input files, and CandidateN for subsequent inputs.')
     parser.add_argument('--subtitle', type=str, required=False,
         help='Optional subtitle to use for the chart. This can be used to help identify the contents of the chart. '
              'This option cannot be used with the plain text output.')
     args = parser.parse_args(argv)
 
-    if args.format == 'text' and args.subtitle is not None:
-        parser.error('Passing --subtitle makes no sense with --format=text')
-
-    if args.format == 'text' and args.open:
-        parser.error('Passing --open makes no sense with --format=text')
+    if args.format == 'text':
+        if len(args.files) != 2:
+            parser.error('--format=text requires exactly two input files to compare')
+        if args.subtitle is not None:
+            parser.error('Passing --subtitle makes no sense with --format=text')
+        if args.open:
+            parser.error('Passing --open makes no sense with --format=text')
+
+    if args.series_names is None:
+        args.series_names = ['Baseline']
+        if len(args.files) == 2:
+            args.series_names += ['Candidate']
+        elif len(args.files) > 2:
+            args.series_names.extend(f'Candidate{n}' for n in range(1, len(args.files)))
+    else:
+        args.series_names = args.series_names.split(',')
+        if len(args.series_names) != len(args.files):
+            parser.error(f'Passed incorrect number of series names: got {len(args.series_names)} series names but {len(args.files)} inputs to compare')
 
-    baseline = pandas.DataFrame(parse_lnt(args.baseline.readlines()))
-    candidate = pandas.DataFrame(parse_lnt(args.candidate.readlines()))
+    # Parse the raw LNT data and store each input in a dataframe
+    lnt_inputs = [parse_lnt(file.readlines()) for file in args.files]
+    inputs = [pandas.DataFrame(lnt).rename(columns={args.metric: f'{args.metric}_{i}'}) for (i, lnt) in enumerate(lnt_inputs)]
 
-    # Join the baseline and the candidate into a single dataframe and add some new columns
-    data = baseline.merge(candidate, how='outer', on='benchmark', suffixes=('_baseline', '_candidate'))
-    data['difference'] = data[f'{args.metric}_candidate'] - data[f'{args.metric}_baseline']
-    data['percent'] = 100 * (data['difference'] / data[f'{args.metric}_baseline'])
+    # Join the inputs into a single dataframe
+    data = functools.reduce(lambda a, b: a.merge(b, how='outer', on='benchmark'), inputs)
 
     if args.filter is not None:
         keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
         data = data[data['benchmark'].isin(keeplist)]
 
     if args.format == 'chart':
-        figure = create_chart(data, args.metric, subtitle=args.subtitle,
-                                                 baseline_name=args.baseline_name,
-                                                 candidate_name=args.candidate_name)
+        figure = create_chart(data, args.metric, subtitle=args.subtitle, series_names=args.series_names)
         do_open = args.output is None or args.open
         output = args.output or tempfile.NamedTemporaryFile(suffix='.html').name
         plotly.io.write_html(figure, file=output, auto_open=do_open)
     else:
-        diff = plain_text_comparison(data, args.metric, baseline_name=args.baseline_name,
-                                                        candidate_name=args.candidate_name)
+        diff = plain_text_comparison(data, args.metric, baseline_name=args.series_names[0],
+                                                        candidate_name=args.series_names[1])
         diff += '\n'
         if args.output is not None:
             with open(args.output, 'w') as out: