Skip to content

Commit 72c512f

Browse files
committed
[libc++] Major improvements to visualize-historical
This patch moves to using pandas.DataFrame for representing the data, which is a lot more expressive and better suited to computations than regular Python objects. - Instead of producing line charts, produce a scatter plot with trendlines, which is immensely more useful due to the inherent nature of historical benchmark results, which contain noise. - Greatly reduce the duration for sorting commits by using `git rev-list` and passing all commits at once instead of requiring one subprocess call for each comparison.
1 parent f99b7f9 commit 72c512f

File tree

2 files changed

+79
-85
lines changed

2 files changed

+79
-85
lines changed

libcxx/utils/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
numpy
2+
pandas
13
plotly
4+
statsmodels
25
tabulate
36
tqdm

libcxx/utils/visualize-historical

Lines changed: 76 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ import subprocess
1010
import sys
1111
import tempfile
1212

13+
import pandas
1314
import plotly
15+
import plotly.express
1416
import tqdm
1517

1618
@functools.total_ordering
@@ -48,6 +50,7 @@ class Commit:
4850
"""
4951
return hash(self.fullrev)
5052

53+
@functools.cache
5154
def show(self, include_diff=False):
5255
"""
5356
Return the commit information equivalent to `git show` associated to this commit.
@@ -78,8 +81,9 @@ class Commit:
7881
This makes it possible to control when time is spent recovering that information from Git for
7982
e.g. better reporting to the user.
8083
"""
81-
self.shortrev
8284
self.fullrev
85+
self.shortrev
86+
self.show()
8387

8488
def __str__(self):
8589
return self._sha
@@ -97,25 +101,20 @@ def truncate_lines(string, n, marker=None):
97101
assert len(truncated) <= n, "broken post-condition"
98102
return '\n'.join(truncated)
99103

100-
def create_plot(commits, benchmarks, data):
104+
def create_plot(data, metric):
101105
"""
102-
Create a plot object showing the evolution of each benchmark throughout the given commits.
106+
Create a plot object showing the evolution of each benchmark throughout the given commits for
107+
the given metric.
103108
"""
104-
figure = plotly.graph_objects.Figure(layout_title_text=f'{commits[0].shortrev} to {commits[-1].shortrev}')
105-
106-
# Create the X axis and the hover information
107-
x_axis = [commit.shortrev for commit in commits]
108-
hover_info = [truncate_lines(commit.show(), 30, marker='...').replace('\n', '<br>') for commit in commits]
109-
110-
# For each benchmark, get the metric for that benchmark for each commit.
111-
#
112-
# Some commits may not have any data associated to a benchmark (e.g. runtime or compilation error).
113-
# Use None, which is handled properly by plotly.
114-
for benchmark in benchmarks:
115-
series = [commit_data.get(benchmark, None) for commit_data in data]
116-
scatter = plotly.graph_objects.Scatter(x=x_axis, y=series, text=hover_info, name=benchmark)
117-
figure.add_trace(scatter)
118-
109+
data = data.sort_values(by='revlist_order')
110+
revlist = pandas.unique(data['commit']) # list of all commits in chronological order
111+
hover_info = {c: truncate_lines(c.show(), 30, marker='...').replace('\n', '<br>') for c in revlist}
112+
figure = plotly.express.scatter(data, title=f"{revlist[0].shortrev} to {revlist[-1].shortrev}",
113+
x='revlist_order', y=metric,
114+
symbol='benchmark',
115+
color='benchmark',
116+
hover_name=[hover_info[c] for c in data['commit']],
117+
trendline="ols")
119118
return figure
120119

121120
def directory_path(string):
@@ -124,63 +123,60 @@ def directory_path(string):
124123
else:
125124
raise NotADirectoryError(string)
126125

127-
def parse_lnt(lines):
126+
def parse_lnt(lines, aggregate=statistics.median):
128127
"""
129-
Parse lines in LNT format and return a dictionnary of the form:
128+
Parse lines in LNT format and return a list of dictionnaries of the form:
130129
131-
{
132-
'benchmark1': {
133-
'metric1': [float],
134-
'metric2': [float],
130+
[
131+
{
132+
'benchmark': <benchmark1>,
133+
<metric1>: float,
134+
<metric2>: float,
135135
...
136136
},
137-
'benchmark2': {
138-
'metric1': [float],
139-
'metric2': [float],
137+
{
138+
'benchmark': <benchmark2>,
139+
<metric1>: float,
140+
<metric2>: float,
140141
...
141142
},
142143
...
143-
}
144+
]
144145
145-
Each metric may have multiple values.
146+
If a metric has multiple values associated to it, they are aggregated into a single
147+
value using the provided aggregation function.
146148
"""
147-
results = {}
149+
results = []
148150
for line in lines:
149151
line = line.strip()
150152
if not line:
151153
continue
152154

153155
(identifier, value) = line.split(' ')
154-
(name, metric) = identifier.split('.')
155-
if name not in results:
156-
results[name] = {}
157-
if metric not in results[name]:
158-
results[name][metric] = []
159-
results[name][metric].append(float(value))
160-
return results
156+
(benchmark, metric) = identifier.split('.')
157+
if not any(x['benchmark'] == benchmark for x in results):
158+
results.append({'benchmark': benchmark})
161159

162-
def find_outliers(xs, ys, threshold):
163-
"""
164-
Given a list of x coordinates and a list of y coordinates, find (x, y) pairs where the y
165-
value differs from the previous y value by more than the given relative difference.
160+
entry = next(x for x in results if x['benchmark'] == benchmark)
161+
if metric not in entry:
162+
entry[metric] = []
163+
entry[metric].append(float(value))
166164

167-
The threshold is given as a floating point representing a percentage, e.g. 0.25 will result in
168-
detecting points that differ from their previous value by more than 25%. The difference is in
169-
absolute value, i.e. both positive and negative spikes are detected.
170-
"""
171-
outliers = []
172-
previous = None
173-
for (x, y) in zip(xs, ys):
174-
if y is None: # skip data points that don't contain values
175-
continue
165+
for entry in results:
166+
for metric in entry:
167+
if isinstance(entry[metric], list):
168+
entry[metric] = aggregate(entry[metric])
176169

177-
if previous is not None:
178-
diff = y - previous
179-
if (diff / previous) > threshold:
180-
outliers.append((x, y))
181-
previous = y
182-
return outliers
170+
return results
183171

172+
def sorted_revlist(git_repo, commits):
173+
"""
174+
Return the list of commits sorted by their chronological order (from oldest to newest) in the
175+
provided Git repository. Items earlier in the list are older than items later in the list.
176+
"""
177+
revlist_cmd = ['git', '-C', git_repo, 'rev-list', '--no-walk'] + list(commits)
178+
revlist = subprocess.check_output(revlist_cmd, text=True).strip().splitlines()
179+
return list(reversed(revlist))
184180

185181
def main(argv):
186182
parser = argparse.ArgumentParser(
@@ -206,7 +202,7 @@ def main(argv):
206202
'and to then filter them in the browser, but in some cases producing a chart with a reduced '
207203
'number of data series is useful.')
208204
parser.add_argument('--find-outliers', metavar='FLOAT', type=float, required=False,
209-
help='When building the chart, detect commits that show a large spike (more than the given relative threshold) '
205+
help='Instead of building a chart, detect commits that show a large spike (more than the given relative threshold) '
210206
'with the previous result and print those to standard output. This can be used to generate a list of '
211207
'potential outliers that we might want to re-generate the data for. The threshold is expressed as a '
212208
'floating point number, e.g. 0.25 will detect points that differ by more than 25%% from their previous '
@@ -220,50 +216,45 @@ def main(argv):
220216
'the resulting benchmark is opened automatically by default.')
221217
args = parser.parse_args(argv)
222218

223-
# Extract benchmark data from the directory and keep only the metric we're interested in.
224-
#
225-
# Some data points may have multiple values associated to the metric (e.g. if we performed
226-
# multiple runs to reduce noise), in which case we aggregate them using a median.
227-
historical_data = []
219+
# Extract benchmark data from the directory.
220+
data = []
228221
files = [f for f in args.directory.glob('*.lnt')]
229222
for file in tqdm.tqdm(files, desc='Parsing LNT files'):
230223
(commit, _) = os.path.splitext(os.path.basename(file))
231224
commit = Commit(args.git_repo, commit)
232225
with open(file, 'r') as f:
233-
lnt_data = parse_lnt(f.readlines())
234-
commit_data = {}
235-
for (bm, metrics) in lnt_data.items():
236-
commit_data[bm] = statistics.median(metrics[args.metric]) if args.metric in metrics else None
237-
historical_data.append((commit, commit_data))
226+
rows = parse_lnt(f.readlines())
227+
data.extend((commit, row) for row in rows)
238228

239229
# Obtain commit information which is then cached throughout the program. Do this
240230
# eagerly so we can provide a progress bar.
241-
for (commit, _) in tqdm.tqdm(historical_data, desc='Prefetching Git information'):
231+
for (commit, _) in tqdm.tqdm(data, desc='Prefetching Git information'):
242232
commit.prefetch()
243233

244-
# Sort the data based on the ordering of commits inside the provided Git repository
245-
historical_data.sort(key=lambda x: x[0])
234+
# Create a dataframe from the raw data and add some columns to it:
235+
# - 'commit' represents the Commit object associated to the results in that row
236+
# - `revlist_order` represents the order of the commit within the Git repository.
237+
data = pandas.DataFrame([row | {'commit': commit} for (commit, row) in data])
238+
revlist = sorted_revlist(args.git_repo, [c.fullrev for c in set(data['commit'])])
239+
data = data.join(pandas.DataFrame([{'revlist_order': revlist.index(c.fullrev)} for c in data['commit']]))
246240

247-
# Filter the benchmarks if needed
248-
benchmarks = {b for (_, commit_data) in historical_data for b in commit_data.keys()}
241+
# Filter the benchmarks if needed.
249242
if args.filter is not None:
250-
regex = re.compile(args.filter)
251-
benchmarks = {b for b in benchmarks if regex.search(b)}
243+
keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
244+
data = data[data['benchmark'].isin(keeplist)]
252245

253-
# If requested, perform a basic pass to detect outliers
246+
# If requested, perform a basic pass to detect outliers.
247+
# Note that we consider a commit to be an outlier if any of the benchmarks for that commit is an outlier.
254248
if args.find_outliers is not None:
255249
threshold = args.find_outliers
256250
outliers = set()
257-
for benchmark in benchmarks:
258-
commits = [commit for (commit, _) in historical_data]
259-
series = [commit_data.get(benchmark, None) for (_, commit_data) in historical_data]
260-
outliers |= set(commit for (commit, _) in find_outliers(commits, series, threshold=threshold))
261-
print(f'Outliers (more than {threshold * 100}%): {" ".join(str(x) for x in outliers)}')
262-
263-
# Plot the data for all the required benchmarks
264-
figure = create_plot([commit for (commit, _) in historical_data],
265-
sorted(list(benchmarks)),
266-
[commit_data for (_, commit_data) in historical_data])
251+
for (benchmark, series) in data.sort_values(by='revlist_order').groupby('benchmark'):
252+
outliers |= set(series[series[args.metric].pct_change() > threshold]['commit'])
253+
print(f'Outliers (more than {threshold * 100}%): {" ".join(c.shortrev for c in outliers)}')
254+
return
255+
256+
# Plot the data for all the required benchmarks.
257+
figure = create_plot(data, args.metric)
267258
do_open = args.output is None or args.open
268259
output = args.output if args.output is not None else tempfile.NamedTemporaryFile(suffix='.html').name
269260
plotly.io.write_html(figure, file=output, auto_open=do_open)

0 commit comments

Comments
 (0)