Skip to content

Commit

Permalink
[analyzer][tests] Introduce analyzer benchmarking framework
Browse files Browse the repository at this point in the history
Summary:
This commit includes a couple of changes:
  * Benchmark selected projects by analyzing them multiple times
  * Compare two benchmarking results and visualizing them on one chart
  * Organize project build logging, so we can use the same code
    in benchmarks

Differential Revision: https://reviews.llvm.org/D83539
  • Loading branch information
SavchenkoValeriy committed Jul 14, 2020
1 parent faa7e30 commit 5b4f143
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 58 deletions.
89 changes: 69 additions & 20 deletions clang/utils/analyzer/SATest.py
Expand Up @@ -34,29 +34,10 @@ def add(parser, args):

def build(parser, args):
import SATestBuild
from ProjectMap import ProjectMap

SATestBuild.VERBOSE = args.verbose

project_map = ProjectMap()
projects = project_map.projects

if args.projects:
projects_arg = args.projects.split(",")
available_projects = [project.name
for project in projects]

# validate that given projects are present in the project map file
for manual_project in projects_arg:
if manual_project not in available_projects:
parser.error("Project '{project}' is not found in "
"the project map file. Available projects are "
"{all}.".format(project=manual_project,
all=available_projects))

projects = [project.with_fields(enabled=project.name in projects_arg)
for project in projects]

projects = get_projects(parser, args.projects)
tester = SATestBuild.RegressionTester(args.jobs,
projects,
args.override_compiler,
Expand Down Expand Up @@ -100,6 +81,44 @@ def update(parser, args):
SATestUpdateDiffs.update_reference_results(project)


def benchmark(parser, args):
from SATestBenchmark import Benchmark

projects = get_projects(parser, args.projects)
benchmark = Benchmark(projects, args.iterations, args.output)
benchmark.run()


def benchmark_compare(parser, args):
import SATestBenchmark
SATestBenchmark.compare(args.old, args.new, args.output)


def get_projects(parser, projects_str):
from ProjectMap import ProjectMap

project_map = ProjectMap()
projects = project_map.projects

if projects_str:
projects_arg = projects_str.split(",")
available_projects = [project.name
for project in projects]

# validate that given projects are present in the project map file
for manual_project in projects_arg:
if manual_project not in available_projects:
parser.error("Project '{project}' is not found in "
"the project map file. Available projects are "
"{all}.".format(project=manual_project,
all=available_projects))

projects = [project.with_fields(enabled=project.name in projects_arg)
for project in projects]

return projects


def docker(parser, args):
if len(args.rest) > 0:
if args.rest[0] != "--":
Expand Down Expand Up @@ -284,6 +303,36 @@ def main():
"to the docker's entrypoint.")
dock_parser.set_defaults(func=docker)

# benchmark subcommand
bench_parser = subparsers.add_parser(
"benchmark",
help="Run benchmarks by building a set of projects multiple times.")

bench_parser.add_argument("-i", "--iterations", action="store",
type=int, default=20,
help="Number of iterations for building each "
"project.")
bench_parser.add_argument("-o", "--output", action="store",
default="benchmark.csv",
help="Output csv file for the benchmark results")
bench_parser.add_argument("--projects", action="store", default="",
help="Comma-separated list of projects to test")
bench_parser.set_defaults(func=benchmark)

bench_subparsers = bench_parser.add_subparsers()
bench_compare_parser = bench_subparsers.add_parser(
"compare",
help="Compare benchmark runs.")
bench_compare_parser.add_argument("--old", action="store", required=True,
help="Benchmark reference results to "
"compare agains.")
bench_compare_parser.add_argument("--new", action="store", required=True,
help="New benchmark results to check.")
bench_compare_parser.add_argument("-o", "--output",
action="store", required=True,
help="Output file for plots.")
bench_compare_parser.set_defaults(func=benchmark_compare)

args = parser.parse_args()
args.func(parser, args)

Expand Down
158 changes: 158 additions & 0 deletions clang/utils/analyzer/SATestBenchmark.py
@@ -0,0 +1,158 @@
"""
Static Analyzer qualification infrastructure.
This source file contains all the functionality related to benchmarking
the analyzer on a set projects. Right now, this includes measuring
execution time and peak memory usage. Benchmark runs analysis on every
project multiple times to get a better picture about the distribution
of measured values.
Additionally, this file includes a comparison routine for two benchmarking
results that plots the result together on one chart.
"""

import SATestUtils as utils
from SATestBuild import ProjectTester, stdout, TestInfo
from ProjectMap import ProjectInfo

import pandas as pd
from typing import List, Tuple


INDEX_COLUMN = "index"


def _save(data: pd.DataFrame, file_path: str):
data.to_csv(file_path, index_label=INDEX_COLUMN)


def _load(file_path: str) -> pd.DataFrame:
return pd.read_csv(file_path, index_col=INDEX_COLUMN)


class Benchmark:
"""
Becnhmark class encapsulates one functionality: it runs the analysis
multiple times for the given set of projects and stores results in the
specified file.
"""
def __init__(self, projects: List[ProjectInfo], iterations: int,
output_path: str):
self.projects = projects
self.iterations = iterations
self.out = output_path

def run(self):
results = [self._benchmark_project(project)
for project in self.projects]

data = pd.concat(results, ignore_index=True)
_save(data, self.out)

def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
if not project.enabled:
stdout(f" \n\n--- Skipping disabled project {project.name}\n")
return

stdout(f" \n\n--- Benchmarking project {project.name}\n")

test_info = TestInfo(project)
tester = ProjectTester(test_info, silent=True)
project_dir = tester.get_project_dir()
output_dir = tester.get_output_dir()

raw_data = []

for i in range(self.iterations):
stdout(f"Iteration #{i + 1}")
time, mem = tester.build(project_dir, output_dir)
raw_data.append({"time": time, "memory": mem,
"iteration": i, "project": project.name})
stdout(f"time: {utils.time_to_str(time)}, "
f"peak memory: {utils.memory_to_str(mem)}")

return pd.DataFrame(raw_data)


def compare(old_path: str, new_path: str, plot_file: str):
"""
Compare two benchmarking results stored as .csv files
and produce a plot in the specified file.
"""
old = _load(old_path)
new = _load(new_path)

old_projects = set(old["project"])
new_projects = set(new["project"])
common_projects = old_projects & new_projects

# Leave only rows for projects common to both dataframes.
old = old[old["project"].isin(common_projects)]
new = new[new["project"].isin(common_projects)]

old, new = _normalize(old, new)

# Seaborn prefers all the data to be in one dataframe.
old["kind"] = "old"
new["kind"] = "new"
data = pd.concat([old, new], ignore_index=True)

# TODO: compare data in old and new dataframes using statistical tests
# to check if they belong to the same distribution
_plot(data, plot_file)


def _normalize(old: pd.DataFrame,
new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
# This creates a dataframe with all numerical data averaged.
means = old.groupby("project").mean()
return _normalize_impl(old, means), _normalize_impl(new, means)


def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
# Right now 'means' has one row corresponding to one project,
# while 'data' has N rows for each project (one for each iteration).
#
# In order for us to work easier with this data, we duplicate
# 'means' data to match the size of the 'data' dataframe.
#
# All the columns from 'data' will maintain their names, while
# new columns coming from 'means' will have "_mean" suffix.
joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
_normalize_key(joined_data, "time")
_normalize_key(joined_data, "memory")
return joined_data


def _normalize_key(data: pd.DataFrame, key: str):
norm_key = _normalized_name(key)
mean_key = f"{key}_mean"
data[norm_key] = data[key] / data[mean_key]


def _normalized_name(name: str) -> str:
return f"normalized {name}"


def _plot(data: pd.DataFrame, plot_file: str):
import matplotlib
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_style("whitegrid")
# We want to have time and memory charts one above the other.
figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))

def _subplot(key: str, ax: matplotlib.axes.Axes):
sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
data=data, palette=sns.color_palette("BrBG", 2), ax=ax)

_subplot("time", ax1)
# No need to have xlabels on both top and bottom charts.
ax1.set_xlabel("")

_subplot("memory", ax2)
# The legend on the top chart is enough.
ax2.get_legend().remove()

figure.savefig(plot_file)

0 comments on commit 5b4f143

Please sign in to comment.