[libc++] Add a utility to visualize historical benchmark data locally

ldionne · ldionne · commit 00333ed37779 · 2025-09-19T10:47:09.000-04:00
This should eventually be done using `lnt` instead, but for the time
being this makes it easy to visualize historical data without having
an instance of `lnt` running.
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks
@@ -89,11 +89,11 @@ def main(argv):
         help='Path to a LNT format file containing the benchmark results for the baseline.')
     parser.add_argument('candidate', type=argparse.FileType('r'),
         help='Path to a LNT format file containing the benchmark results for the candidate.')
+    parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
+        help='Path of a file where to output the resulting comparison. Default to stdout.')
     parser.add_argument('--metric', type=str, default='execution_time',
         help='The metric to compare. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- '
              'this option allows selecting which metric is being analyzed. The default is "execution_time".')
-    parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
-        help='Path of a file where to output the resulting comparison. Default to stdout.')
     parser.add_argument('--filter', type=str, required=False,
         help='An optional regular expression used to filter the benchmarks included in the comparison. '
              'Only benchmarks whose names match the regular expression will be included.')
diff --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt
@@ -1,2 +1,3 @@
 plotly
 tabulate
+tqdm
diff --git a/libcxx/utils/visualize-historical b/libcxx/utils/visualize-historical
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+
+import argparse
+import functools
+import os
+import pathlib
+import re
+import statistics
+import subprocess
+import sys
+import tempfile
+
+import plotly
+import tqdm
+
+@functools.total_ordering
+class Commit:
+    """
+    This class represents a commit inside a given Git repository.
+    """
+
+    def __init__(self, git_repo, sha):
+        self._git_repo = git_repo
+        self._sha = sha
+
+    def __eq__(self, other):
+        """
+        Return whether two commits refer to the same commit.
+
+        This doesn't take into account the content of the Git tree at those commits, only the
+        'identity' of the commits themselves.
+        """
+        return self.fullrev == other.fullrev
+
+    def __lt__(self, other):
+        """
+        Return whether a commit is an ancestor of another commit in the Git repository.
+        """
+        # Is self._sha an ancestor of other._sha?
+        res = subprocess.run(['git', '-C', self._git_repo, 'merge-base', '--is-ancestor', self._sha, other._sha])
+        if res.returncode not in (0, 1):
+            raise RuntimeError(f'Error when trying to obtain the commit order for {self._sha} and {other._sha}')
+        return res.returncode == 0
+
+    def show(self, include_diff=False):
+        """
+        Return the commit information equivalent to `git show` associated to this commit.
+        """
+        cmd = ['git', '-C', self._git_repo, 'show', self._sha]
+        if not include_diff:
+            cmd.append('--no-patch')
+        return subprocess.check_output(cmd, text=True)
+
+    @functools.cached_property
+    def shortrev(self):
+        """
+        Return the shortened version of the given SHA.
+        """
+        return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', '--short', self._sha], text=True).strip()
+
+    @functools.cached_property
+    def fullrev(self):
+        """
+        Return the full SHA associated to this commit.
+        """
+        return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', self._sha], text=True).strip()
+
+    def prefetch(self):
+        """
+        Prefetch cached properties associated to this commit object.
+
+        This makes it possible to control when time is spent recovering that information from Git for
+        e.g. better reporting to the user.
+        """
+        self.shortrev
+        self.fullrev
+
+    def __str__(self):
+        return self._sha
+
+def truncate_lines(string, n, marker=None):
+    """
+    Truncate the given string at a certain number of lines.
+
+    Optionally, add a marker on the last line to identify that truncation has happened.
+    """
+    lines = string.splitlines()
+    truncated = lines[:n]
+    if marker is not None and len(lines) > len(truncated):
+        truncated[-1] = marker
+    assert len(truncated) <= n, "broken post-condition"
+    return '\n'.join(truncated)
+
+def create_plot(commits, benchmarks, data):
+    """
+    Create a plot object showing the evolution of each benchmark throughout the given commits.
+    """
+    figure = plotly.graph_objects.Figure(layout_title_text=f'{commits[0].shortrev} to {commits[-1].shortrev}')
+
+    # Create the X axis and the hover information
+    x_axis = [commit.shortrev for commit in commits]
+    hover_info = [truncate_lines(commit.show(), 30, marker='...').replace('\n', '<br>') for commit in commits]
+
+    # For each benchmark, get the metric for that benchmark for each commit.
+    #
+    # Some commits may not have any data associated to a benchmark (e.g. runtime or compilation error).
+    # Use None, which is handled properly by plotly.
+    for benchmark in benchmarks:
+        series = [commit_data.get(benchmark, None) for commit_data in data]
+        scatter = plotly.graph_objects.Scatter(x=x_axis, y=series, text=hover_info, name=benchmark)
+        figure.add_trace(scatter)
+
+    return figure
+
+def directory_path(string):
+    if os.path.isdir(string):
+        return pathlib.Path(string)
+    else:
+        raise NotADirectoryError(string)
+
+def parse_lnt(lines):
+    """
+    Parse lines in LNT format and return a dictionnary of the form:
+
+        {
+            'benchmark1': {
+                'metric1': [float],
+                'metric2': [float],
+                ...
+            },
+            'benchmark2': {
+                'metric1': [float],
+                'metric2': [float],
+                ...
+            },
+            ...
+        }
+
+    Each metric may have multiple values.
+    """
+    results = {}
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+
+        (identifier, value) = line.split(' ')
+        (name, metric) = identifier.split('.')
+        if name not in results:
+            results[name] = {}
+        if metric not in results[name]:
+            results[name][metric] = []
+        results[name][metric].append(float(value))
+    return results
+
+def main(argv):
+    parser = argparse.ArgumentParser(
+        prog='visualize-historical',
+        description='Visualize historical data in LNT format. This program generates a HTML file that embeds an '
+                    'interactive plot with the provided data. The HTML file can then be opened in a browser to '
+                    'visualize the data as a chart.',
+        epilog='This script depends on the `plotly` and the `tqdm` Python modules.')
+    parser.add_argument('directory', type=directory_path,
+        help='Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
+             'This is also the format generated by the `benchmark-historical` utility.')
+    parser.add_argument('--output', '-o', type=pathlib.Path, required=False,
+        help='Optional path where to output the resulting HTML file. If it already exists, it is overwritten. '
+             'Defaults to a temporary file which is opened automatically once generated, but not removed after '
+             'creation.')
+    parser.add_argument('--metric', type=str, default='execution_time',
+        help='The metric to compare. LNT data may contain multiple metrics (e.g. code size, execution time, etc) -- '
+             'this option allows selecting which metric is being visualized. The default is "execution_time".')
+    parser.add_argument('--filter', type=str, required=False,
+        help='An optional regular expression used to filter the benchmarks included in the chart. '
+             'Only benchmarks whose names match the regular expression will be included. '
+             'Since the chart is interactive, it generally makes most sense to include all the benchmarks '
+             'and to then filter them in the browser, but in some cases producing a chart with a reduced '
+             'number of data series is useful.')
+    parser.add_argument('--git-repo', type=directory_path, default=pathlib.Path(os.getcwd()),
+        help='Path to the git repository to use for ordering commits in time. '
+             'By default, the current working directory is used.')
+    parser.add_argument('--open', action='store_true',
+        help='Whether to automatically open the generated HTML file when finished. If no output file is provided, '
+             'the resulting benchmark is opened automatically by default.')
+    args = parser.parse_args(argv)
+
+    # Extract benchmark data from the directory and keep only the metric we're interested in.
+    #
+    # Some data points may have multiple values associated to the metric (e.g. if we performed
+    # multiple runs to reduce noise), in which case we aggregate them using a median.
+    historical_data = []
+    files = [f for f in args.directory.glob('*.lnt')]
+    for file in tqdm.tqdm(files, desc='Parsing LNT files'):
+        (commit, _) = os.path.splitext(os.path.basename(file))
+        commit = Commit(args.git_repo, commit)
+        with open(file, 'r') as f:
+            lnt_data = parse_lnt(f.readlines())
+            commit_data = {}
+            for (bm, metrics) in lnt_data.items():
+                commit_data[bm] = statistics.median(metrics[args.metric]) if args.metric in metrics else None
+        historical_data.append((commit, commit_data))
+
+    # Obtain commit information which is then cached throughout the program. Do this
+    # eagerly so we can provide a progress bar.
+    for (commit, _) in tqdm.tqdm(historical_data, desc='Prefetching Git information'):
+        commit.prefetch()
+
+    # Sort the data based on the ordering of commits inside the provided Git repository
+    historical_data.sort(key=lambda x: x[0])
+
+    # Filter the benchmarks if needed
+    benchmarks = {b for (_, commit_data) in historical_data for b in commit_data.keys()}
+    if args.filter is not None:
+        regex = re.compile(args.filter)
+        benchmarks = {b for b in benchmarks if regex.search(b)}
+
+    # Plot the data for all the required benchmarks
+    figure = create_plot([commit for (commit, _) in historical_data],
+                         sorted(list(benchmarks)),
+                         [data for (_, data) in historical_data])
+    do_open = args.output is None or args.open
+    output = args.output if args.output is not None else tempfile.NamedTemporaryFile(suffix='.html').name
+    plotly.io.write_html(figure, file=output, auto_open=do_open)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])