Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[analyzer][tests] Introduce analyzer benchmarking framework
Summary: This commit includes a couple of changes: * Benchmark selected projects by analyzing them multiple times * Compare two benchmarking results and visualizing them on one chart * Organize project build logging, so we can use the same code in benchmarks Differential Revision: https://reviews.llvm.org/D83539
- Loading branch information
1 parent
faa7e30
commit 5b4f143
Showing
5 changed files
with
281 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
""" | ||
Static Analyzer qualification infrastructure. | ||
This source file contains all the functionality related to benchmarking | ||
the analyzer on a set projects. Right now, this includes measuring | ||
execution time and peak memory usage. Benchmark runs analysis on every | ||
project multiple times to get a better picture about the distribution | ||
of measured values. | ||
Additionally, this file includes a comparison routine for two benchmarking | ||
results that plots the result together on one chart. | ||
""" | ||
|
||
import SATestUtils as utils | ||
from SATestBuild import ProjectTester, stdout, TestInfo | ||
from ProjectMap import ProjectInfo | ||
|
||
import pandas as pd | ||
from typing import List, Tuple | ||
|
||
|
||
INDEX_COLUMN = "index" | ||
|
||
|
||
def _save(data: pd.DataFrame, file_path: str): | ||
data.to_csv(file_path, index_label=INDEX_COLUMN) | ||
|
||
|
||
def _load(file_path: str) -> pd.DataFrame: | ||
return pd.read_csv(file_path, index_col=INDEX_COLUMN) | ||
|
||
|
||
class Benchmark: | ||
""" | ||
Becnhmark class encapsulates one functionality: it runs the analysis | ||
multiple times for the given set of projects and stores results in the | ||
specified file. | ||
""" | ||
def __init__(self, projects: List[ProjectInfo], iterations: int, | ||
output_path: str): | ||
self.projects = projects | ||
self.iterations = iterations | ||
self.out = output_path | ||
|
||
def run(self): | ||
results = [self._benchmark_project(project) | ||
for project in self.projects] | ||
|
||
data = pd.concat(results, ignore_index=True) | ||
_save(data, self.out) | ||
|
||
def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame: | ||
if not project.enabled: | ||
stdout(f" \n\n--- Skipping disabled project {project.name}\n") | ||
return | ||
|
||
stdout(f" \n\n--- Benchmarking project {project.name}\n") | ||
|
||
test_info = TestInfo(project) | ||
tester = ProjectTester(test_info, silent=True) | ||
project_dir = tester.get_project_dir() | ||
output_dir = tester.get_output_dir() | ||
|
||
raw_data = [] | ||
|
||
for i in range(self.iterations): | ||
stdout(f"Iteration #{i + 1}") | ||
time, mem = tester.build(project_dir, output_dir) | ||
raw_data.append({"time": time, "memory": mem, | ||
"iteration": i, "project": project.name}) | ||
stdout(f"time: {utils.time_to_str(time)}, " | ||
f"peak memory: {utils.memory_to_str(mem)}") | ||
|
||
return pd.DataFrame(raw_data) | ||
|
||
|
||
def compare(old_path: str, new_path: str, plot_file: str): | ||
""" | ||
Compare two benchmarking results stored as .csv files | ||
and produce a plot in the specified file. | ||
""" | ||
old = _load(old_path) | ||
new = _load(new_path) | ||
|
||
old_projects = set(old["project"]) | ||
new_projects = set(new["project"]) | ||
common_projects = old_projects & new_projects | ||
|
||
# Leave only rows for projects common to both dataframes. | ||
old = old[old["project"].isin(common_projects)] | ||
new = new[new["project"].isin(common_projects)] | ||
|
||
old, new = _normalize(old, new) | ||
|
||
# Seaborn prefers all the data to be in one dataframe. | ||
old["kind"] = "old" | ||
new["kind"] = "new" | ||
data = pd.concat([old, new], ignore_index=True) | ||
|
||
# TODO: compare data in old and new dataframes using statistical tests | ||
# to check if they belong to the same distribution | ||
_plot(data, plot_file) | ||
|
||
|
||
def _normalize(old: pd.DataFrame, | ||
new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: | ||
# This creates a dataframe with all numerical data averaged. | ||
means = old.groupby("project").mean() | ||
return _normalize_impl(old, means), _normalize_impl(new, means) | ||
|
||
|
||
def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame): | ||
# Right now 'means' has one row corresponding to one project, | ||
# while 'data' has N rows for each project (one for each iteration). | ||
# | ||
# In order for us to work easier with this data, we duplicate | ||
# 'means' data to match the size of the 'data' dataframe. | ||
# | ||
# All the columns from 'data' will maintain their names, while | ||
# new columns coming from 'means' will have "_mean" suffix. | ||
joined_data = data.merge(means, on="project", suffixes=("", "_mean")) | ||
_normalize_key(joined_data, "time") | ||
_normalize_key(joined_data, "memory") | ||
return joined_data | ||
|
||
|
||
def _normalize_key(data: pd.DataFrame, key: str): | ||
norm_key = _normalized_name(key) | ||
mean_key = f"{key}_mean" | ||
data[norm_key] = data[key] / data[mean_key] | ||
|
||
|
||
def _normalized_name(name: str) -> str: | ||
return f"normalized {name}" | ||
|
||
|
||
def _plot(data: pd.DataFrame, plot_file: str): | ||
import matplotlib | ||
import seaborn as sns | ||
from matplotlib import pyplot as plt | ||
|
||
sns.set_style("whitegrid") | ||
# We want to have time and memory charts one above the other. | ||
figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6)) | ||
|
||
def _subplot(key: str, ax: matplotlib.axes.Axes): | ||
sns.boxplot(x="project", y=_normalized_name(key), hue="kind", | ||
data=data, palette=sns.color_palette("BrBG", 2), ax=ax) | ||
|
||
_subplot("time", ax1) | ||
# No need to have xlabels on both top and bottom charts. | ||
ax1.set_xlabel("") | ||
|
||
_subplot("memory", ax2) | ||
# The legend on the top chart is enough. | ||
ax2.get_legend().remove() | ||
|
||
figure.savefig(plot_file) |
Oops, something went wrong.