From 1ff321c91bde73afef2ddfe64bedde7139a95a1f Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Fri, 26 Jul 2024 14:15:34 +0200 Subject: [PATCH] improve benchmarks automation This patch: - adds an option to run a benchmark a few times to pick a median value - adds a timeout for benchmarks, set at 10 minutes by default. - adds an option to filter out benchmarks by name - adds an option to pick a specific compiler commit to test with - adds more compute benchmarks - fixes cudaSift - uses upstream Velocity Bench - adds a simple summary table with results --- .github/workflows/benchmarks_compute.yml | 21 +- scripts/benchmarks/benches/SobelFilter.py | 3 + scripts/benchmarks/benches/api_overhead.py | 82 -------- scripts/benchmarks/benches/base.py | 15 +- scripts/benchmarks/benches/compute.py | 212 +++++++++++++++++++++ scripts/benchmarks/benches/cudaSift.py | 7 + scripts/benchmarks/benches/easywave.py | 2 + scripts/benchmarks/benches/hashtable.py | 3 + scripts/benchmarks/benches/options.py | 3 + scripts/benchmarks/benches/quicksilver.py | 7 +- scripts/benchmarks/benches/result.py | 1 + scripts/benchmarks/benches/velocity.py | 16 +- scripts/benchmarks/main.py | 72 +++++-- scripts/benchmarks/output.py | 76 +++++--- scripts/benchmarks/utils/utils.py | 22 ++- 15 files changed, 384 insertions(+), 158 deletions(-) delete mode 100644 scripts/benchmarks/benches/api_overhead.py create mode 100644 scripts/benchmarks/benches/compute.py diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks_compute.yml index 619784b263..86fbb1ddc8 100644 --- a/.github/workflows/benchmarks_compute.yml +++ b/.github/workflows/benchmarks_compute.yml @@ -34,6 +34,16 @@ on: type: string required: false default: '' + sycl_repo: + description: 'Compiler repo' + type: string + required: true + default: 'intel/llvm' + sycl_commit: + description: 'Compiler commit' + type: string + required: false + default: '' permissions: contents: read @@ -41,8 +51,6 @@ permissions: jobs: e2e-build-hw: - # Run only on upstream; forks will not have the HW - # if: github.repository == 'oneapi-src/unified-runtime' name: Build SYCL, UR, run Compute Benchmarks strategy: matrix: @@ -105,12 +113,19 @@ jobs: - name: Checkout SYCL uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: - repository: intel/llvm + repository: ${{inputs.sycl_repo}} ref: refs/heads/sycl path: sycl-repo fetch-depth: 1 fetch-tags: false + - name: Fetch specific SYCL commit + if: inputs.sycl_commit != '' + working-directory: ./sycl-repo + run: | + git fetch --depth=1 origin ${{ inputs.sycl_commit }} + git checkout ${{ inputs.sycl_commit }} + - name: Set CUDA env vars if: matrix.adapter.str_name == 'cuda' run: | diff --git a/scripts/benchmarks/benches/SobelFilter.py b/scripts/benchmarks/benches/SobelFilter.py index e976bfaee8..b28681c2ee 100644 --- a/scripts/benchmarks/benches/SobelFilter.py +++ b/scripts/benchmarks/benches/SobelFilter.py @@ -12,7 +12,10 @@ class SobelFilter(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("sobel_filter", "sobel_filter", vb) + + def download_deps(self): self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz") + return def name(self): return "Velocity-Bench Sobel Filter" diff --git a/scripts/benchmarks/benches/api_overhead.py b/scripts/benchmarks/benches/api_overhead.py deleted file mode 100644 index d34f4c4ee8..0000000000 --- a/scripts/benchmarks/benches/api_overhead.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -import os -import csv -import io -from utils.utils import run, git_clone -from .base import Benchmark -from .result import Result -from .options import options - -class APIOverheadSYCL(Benchmark): - def __init__(self, directory): - super().__init__(directory) - - def name(self): - return "api_overhead_benchmark_sycl, mean execution time per 10 kernels" - - def unit(self): - return "μs" - - def setup(self): - repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "0f758021dce9ba32341a503739b69db057433c59") - build_path = self.create_build_path('compute-benchmarks-build') - - configure_command = [ - "cmake", - f"-B {build_path}", - f"-S {repo_path}", - f"-DCMAKE_BUILD_TYPE=Release", - f"-DBUILD_SYCL=ON", - f"-DSYCL_COMPILER_ROOT={options.sycl}", - f"-DALLOW_WARNINGS=ON" - ] - run(configure_command, add_sycl=True) - - run(f"cmake --build {build_path} -j", add_sycl=True) - self.benchmark_bin = f"{build_path}/bin/api_overhead_benchmark_sycl" - - def run_internal(self, ioq, env_vars): - command = [ - f"{self.benchmark_bin}", - "--test=SubmitKernel", - f"--Ioq={ioq}", - "--DiscardEvents=0", - "--MeasureCompletion=0", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - "--csv", - "--noHeaders" - ] - result = self.run_bench(command, env_vars) - (label, mean) = self.parse_output(result) - return Result(label=label, value=mean, command=command, env=env_vars, stdout=result) - - def run(self, env_vars) -> list[Result]: - results = [] - for ioq in [0, 1]: - results.append(self.run_internal(ioq, env_vars)) - - return results - - def parse_output(self, output): - csv_file = io.StringIO(output) - reader = csv.reader(csv_file) - next(reader, None) - data_row = next(reader, None) - if data_row is None: - raise ValueError("Benchmark output does not contain data.") - try: - label = data_row[0] - mean = float(data_row[1]) - return (label, mean) - except (ValueError, IndexError) as e: - raise ValueError(f"Error parsing output: {e}") - - def teardown(self): - return diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 25b5d2619f..c7f263c253 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -20,16 +20,6 @@ def __init__(self, directory): def run_bench(self, command, env_vars): return run(command=command, env_vars=env_vars, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode() - def create_build_path(self, name): - build_path = os.path.join(self.directory, name) - - if options.rebuild and Path(build_path).exists(): - shutil.rmtree(build_path) - - Path(build_path).mkdir(parents=True, exist_ok=True) - - return build_path - def create_data_path(self, name): data_path = os.path.join(self.directory, "data", name) @@ -58,10 +48,13 @@ def name(self): def unit(self): raise NotImplementedError() + def lower_is_better(self): + return True + def setup(self): raise NotImplementedError() - def run(self, env_vars): + def run(self, env_vars) -> Result: raise NotImplementedError() def teardown(self): diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py new file mode 100644 index 0000000000..19bc0b7fd0 --- /dev/null +++ b/scripts/benchmarks/benches/compute.py @@ -0,0 +1,212 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import csv +import io +from utils.utils import run, git_clone, create_build_path +from .base import Benchmark +from .result import Result +from .options import options + +class ComputeBench: + def __init__(self, directory): + self.directory = directory + self.built = False + return + + def setup(self): + if self.built: + return + + repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "0f758021dce9ba32341a503739b69db057433c59") + build_path = create_build_path(self.directory, 'compute-benchmarks-build') + + configure_command = [ + "cmake", + f"-B {build_path}", + f"-S {repo_path}", + f"-DCMAKE_BUILD_TYPE=Release", + f"-DBUILD_SYCL=ON", + f"-DSYCL_COMPILER_ROOT={options.sycl}", + f"-DALLOW_WARNINGS=ON" + ] + run(configure_command, add_sycl=True) + + run(f"cmake --build {build_path} -j", add_sycl=True) + + self.built = True + self.bins = os.path.join(build_path, 'bin') + +class ComputeBenchmark(Benchmark): + def __init__(self, bench, name, test): + self.bench = bench + self.bench_name = name + self.test = test + super().__init__(bench.directory) + + def bin_args(self) -> list[str]: + return [] + + def extra_env_vars(self) -> dict: + return {} + + def unit(self): + return "μs" + + def setup(self): + self.bench.setup() + self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name) + + def run(self, env_vars) -> Result: + command = [ + f"{self.benchmark_bin}", + f"--test={self.test}", + "--csv", + "--noHeaders" + ] + + command += self.bin_args() + env_vars.update(self.extra_env_vars()) + + result = self.run_bench(command, env_vars) + (label, mean) = self.parse_output(result) + return Result(label=label, value=mean, command=command, env=env_vars, stdout=result) + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.reader(csv_file) + next(reader, None) + data_row = next(reader, None) + if data_row is None: + raise ValueError("Benchmark output does not contain data.") + try: + label = data_row[0] + mean = float(data_row[1]) + return (label, mean) + except (ValueError, IndexError) as e: + raise ValueError(f"Error parsing output: {e}") + + def teardown(self): + return + +class SubmitKernelSYCL(ComputeBenchmark): + def __init__(self, bench, ioq): + self.ioq = ioq + super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel") + + def name(self): + order = "in order" if self.ioq else "out of order" + return f"api_overhead_benchmark_sycl SubmitKernel {order}" + + def bin_args(self) -> list[str]: + return [ + f"--Ioq={self.ioq}", + "--DiscardEvents=0", + "--MeasureCompletion=0", + "--iterations=100000", + "--Profiling=0", + "--NumKernels=10", + "--KernelExecTime=1" + ] + +class ExecImmediateCopyQueue(ComputeBenchmark): + def __init__(self, bench, ioq, isCopyOnly, source, destination, size): + self.ioq = ioq + self.isCopyOnly = isCopyOnly + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "api_overhead_benchmark_sycl", "ExecImmediateCopyQueue") + + def name(self): + order = "in order" if self.ioq else "out of order" + return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=100000", + f"--ioq={self.ioq}", + f"--IsCopyOnly={self.isCopyOnly}", + "--MeasureCompletionTime=0", + f"--src={self.destination}", + f"--dst={self.destination}", + f"--size={self.size}" + ] + +class QueueInOrderMemcpy(ComputeBenchmark): + def __init__(self, bench, isCopyOnly, source, destination, size): + self.isCopyOnly = isCopyOnly + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "memory_benchmark_sycl", "QueueInOrderMemcpy") + + def name(self): + return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--IsCopyOnly={self.isCopyOnly}", + f"--sourcePlacement={self.source}", + f"--destinationPlacement={self.destination}", + f"--size={self.size}", + "--count=100" + ] + +class QueueMemcpy(ComputeBenchmark): + def __init__(self, bench, source, destination, size): + self.source = source + self.destination = destination + self.size = size + super().__init__(bench, "memory_benchmark_sycl", "QueueMemcpy") + + def name(self): + return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--sourcePlacement={self.source}", + f"--destinationPlacement={self.destination}", + f"--size={self.size}", + ] + +class StreamMemory(ComputeBenchmark): + def __init__(self, bench, type, size, placement): + self.type = type + self.size = size + self.placement = placement + super().__init__(bench, "memory_benchmark_sycl", "StreamMemory") + + def name(self): + return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--type={self.type}", + f"--size={self.size}", + f"--memoryPlacement={self.placement}", + "--useEvents=0", + "--contents=Zeros", + ] + +class VectorSum(ComputeBenchmark): + def __init__(self, bench): + super().__init__(bench, "miscellaneous_benchmark_sycl", "VectorSum") + + def name(self): + return f"miscellaneous_benchmark_sycl VectorSum" + + def bin_args(self) -> list[str]: + return [ + "--iterations=1000", + "--numberOfElementsX=512", + "--numberOfElementsY=256", + "--numberOfElementsZ=256", + ] + diff --git a/scripts/benchmarks/benches/cudaSift.py b/scripts/benchmarks/benches/cudaSift.py index 6f9c19040e..482d258052 100644 --- a/scripts/benchmarks/benches/cudaSift.py +++ b/scripts/benchmarks/benches/cudaSift.py @@ -9,11 +9,18 @@ from utils.utils import run import os import re +import shutil class CudaSift(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("cudaSift", "cudaSift", vb) + def download_deps(self): + images = os.path.join(self.vb.repo_path, self.bench_name, 'inputData') + dest = os.path.join(self.directory, 'inputData') + if not os.path.exists(dest): + shutil.copytree(images, dest) + def name(self): return "Velocity-Bench CudaSift" diff --git a/scripts/benchmarks/benches/easywave.py b/scripts/benchmarks/benches/easywave.py index 2fa4d95685..2f89482329 100644 --- a/scripts/benchmarks/benches/easywave.py +++ b/scripts/benchmarks/benches/easywave.py @@ -14,6 +14,8 @@ class Easywave(VelocityBase): def __init__(self, vb: VelocityBench): super().__init__("easywave", "easyWave_sycl", vb) + + def download_deps(self): self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz") def name(self): diff --git a/scripts/benchmarks/benches/hashtable.py b/scripts/benchmarks/benches/hashtable.py index c8cb0bdb03..7558183bf0 100644 --- a/scripts/benchmarks/benches/hashtable.py +++ b/scripts/benchmarks/benches/hashtable.py @@ -23,6 +23,9 @@ def unit(self): def bin_args(self) -> list[str]: return ["--no-verify"] + def lower_is_better(self): + return False + def parse_output(self, stdout: str) -> float: match = re.search(r'(\d+\.\d+) million keys/second', stdout) if match: diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py index c990a44d5f..c035ce6800 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/benches/options.py @@ -5,6 +5,9 @@ class Options: sycl: str = "" rebuild: bool = True benchmark_cwd: str = "INVALID" + timeout: float = 600 + iterations: int = 5 + verbose: bool = False options = Options() diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py index 383c8dd5be..7e1f65ee1d 100644 --- a/scripts/benchmarks/benches/quicksilver.py +++ b/scripts/benchmarks/benches/quicksilver.py @@ -15,10 +15,10 @@ def __init__(self, vb: VelocityBench): super().__init__("QuickSilver", "qs", vb) self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") - def run(self, env_vars) -> list[Result]: + def run(self, env_vars) -> Result: # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': - return [] + return None return super().run(env_vars) @@ -28,6 +28,9 @@ def name(self): def unit(self): return "MMS/CTT" + def lower_is_better(self): + return False + def bin_args(self) -> list[str]: return ["-i", f"{self.data_path}/scatteringOnly.inp"] diff --git a/scripts/benchmarks/benches/result.py b/scripts/benchmarks/benches/result.py index 8dd2f4ba9c..896ff4da98 100644 --- a/scripts/benchmarks/benches/result.py +++ b/scripts/benchmarks/benches/result.py @@ -16,3 +16,4 @@ class Result: stdout: str unit: str = "" name: str = "" + lower_is_better: bool = True diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index fec3abb842..e5601c6563 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -6,15 +6,14 @@ from utils.utils import git_clone from .base import Benchmark from .result import Result -from utils.utils import run +from utils.utils import run, create_build_path import os import re class VelocityBench: def __init__(self, directory): self.directory = directory - # TODO: replace with https://github.com/oneapi-src/Velocity-Bench once all fixes land upstream - self.repo_path = git_clone(self.directory, "velocity-bench-repo", "https://github.com/pbalcer/Velocity-Bench.git", "ae0ae05c7fd1469779ecea4f36e4741b1d956eb4") + self.repo_path = git_clone(self.directory, "velocity-bench-repo", "https://github.com/oneapi-src/Velocity-Bench", "34ee4ebe18d91dfdd38b7d798fd986b41874fcbc") class VelocityBase(Benchmark): def __init__(self, name: str, bin_name: str, vb: VelocityBench): @@ -24,8 +23,13 @@ def __init__(self, name: str, bin_name: str, vb: VelocityBench): self.bin_name = bin_name self.code_path = os.path.join(self.vb.repo_path, self.bench_name, 'SYCL') + def download_deps(self): + return + def setup(self): - build_path = self.create_build_path(self.bench_name) + self.download_deps() + + build_path = create_build_path(self.directory, self.bench_name) configure_command = [ "cmake", @@ -47,7 +51,7 @@ def extra_env_vars(self) -> dict: def parse_output(self, stdout: str) -> float: raise NotImplementedError() - def run(self, env_vars) -> list[Result]: + def run(self, env_vars) -> Result: env_vars.update(self.extra_env_vars()) command = [ @@ -57,7 +61,7 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) - return [Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)] + return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result) def teardown(self): return diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index 5dad40c7fe..34238f773c 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -5,9 +5,8 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import os from utils.utils import prepare_workdir, load_benchmark_results, save_benchmark_results; -from benches.api_overhead import APIOverheadSYCL +from benches.compute import * from benches.hashtable import Hashtable from benches.bitcracker import Bitcracker from benches.cudaSift import CudaSift @@ -18,46 +17,72 @@ from benches.options import options from output import generate_markdown import argparse +import re # Update this if you are changing the layout of the results files -INTERNAL_WORKDIR_VERSION = '1.0' - -def main(directory, additional_env_vars, save_name, compare_names): - variants = [ - ({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '0'}, "Imm-CmdLists-OFF"), - ({'UR_L0_USE_IMMEDIATE_COMMANDLISTS': '1'}, ""), - ] +INTERNAL_WORKDIR_VERSION = '1.6' +def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) vb = VelocityBench(directory) + cb = ComputeBench(directory) benchmarks = [ - APIOverheadSYCL(directory), + SubmitKernelSYCL(cb, 0), + SubmitKernelSYCL(cb, 1), + QueueInOrderMemcpy(cb, 0, 'Device', 'Device', 1024), + QueueInOrderMemcpy(cb, 0, 'Host', 'Device', 1024), + QueueMemcpy(cb, 'Device', 'Device', 1024), + StreamMemory(cb, 'Triad', 10 * 1024, 'Device'), + ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024), + ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024), + VectorSum(cb), Hashtable(vb), Bitcracker(vb), - #CudaSift(vb), TODO: the benchmark is passing, but is outputting "Failed to allocate device data" + CudaSift(vb), Easywave(vb), QuickSilver(vb), SobelFilter(vb) ] + if filter: + benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())] + for benchmark in benchmarks: + print(f"setting up {benchmark.name()}... ", end='', flush=True) benchmark.setup() + print("complete.") results = [] for benchmark in benchmarks: - for env_vars, extra_label in variants: - merged_env_vars = {**env_vars, **additional_env_vars} + merged_env_vars = {**additional_env_vars} + iteration_results = [] + for iter in range(options.iterations): + print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) bench_results = benchmark.run(merged_env_vars) - for res in bench_results: - res.unit = benchmark.unit() - res.name = benchmark.name() - res.label += f" {extra_label}" - results.append(res) + if bench_results is not None: + print(f"complete ({bench_results.value} {benchmark.unit()}).") + iteration_results.append(bench_results) + else: + print(f"did not finish.") + + if len(iteration_results) == 0: + continue + + iteration_results.sort(key=lambda res: res.value) + median_index = len(iteration_results) // 2 + median_result = iteration_results[median_index] + + median_result.unit = benchmark.unit() + median_result.name = benchmark.name() + + results.append(median_result) for benchmark in benchmarks: + print(f"tearing down {benchmark.name()}... ", end='', flush=True) benchmark.teardown() + print("complete.") chart_data = {"This PR" : results} @@ -93,11 +118,20 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[]) parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.') parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"]) + parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=5) + parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600) + parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) + parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) + options.verbose = args.verbose options.rebuild = not args.no_rebuild options.sycl = args.sycl + options.iterations = args.iterations + options.timeout = args.timeout + + benchmark_filter = re.compile(args.filter) if args.filter else None - main(args.benchmark_directory, additional_env_vars, args.save, args.compare) + main(args.benchmark_directory, additional_env_vars, args.save, args.compare, benchmark_filter) diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py index 9cfee303b1..26deabe099 100644 --- a/scripts/benchmarks/output.py +++ b/scripts/benchmarks/output.py @@ -5,6 +5,7 @@ import collections from benches.base import Result +import math # Function to generate the mermaid bar chart script def generate_mermaid_script(chart_data: dict[str, list[Result]]): @@ -19,6 +20,9 @@ def generate_mermaid_script(chart_data: dict[str, list[Result]]): # remove duplicates labels = list(dict.fromkeys(labels)) mermaid_script += f""" +
+{bname} + ```mermaid --- config: @@ -57,6 +61,8 @@ def generate_mermaid_script(chart_data: dict[str, list[Result]]): """ mermaid_script += f""" ``` + +
""" return mermaid_script @@ -83,44 +89,52 @@ def generate_markdown_details(results: list[Result]): """) return "\n".join(markdown_sections) -def generate_summary(chart_data: dict[str, list[Result]]) -> str: - # Calculate the mean value of "This PR" for each benchmark - this_pr_means = {} - for res in chart_data["This PR"]: - if res.name not in this_pr_means: - this_pr_means[res.name] = [] - this_pr_means[res.name].append(res.value) - for bname in this_pr_means: - this_pr_means[bname] = sum(this_pr_means[bname]) / len(this_pr_means[bname]) - - # Calculate the percentage for each entry relative to "This PR" - summary_data = {"This PR": 100} - for entry_name, results in chart_data.items(): - if entry_name == "This PR": - continue - entry_sum = 0 - for res in results: - if res.name in this_pr_means: - percentage = (res.value / this_pr_means[res.name]) * 100 - entry_sum += percentage - - entry_average = entry_sum / len(results) if results else 0 - summary_data[entry_name] = entry_average +def generate_summary_table(chart_data: dict[str, list[Result]]): + summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " |\n" + summary_table += "|---" * (len(chart_data) + 1) + "|\n" - markdown_table = "| Name | Result % |\n| --- | --- |\n" - for entry_name, percentage in summary_data.items(): - markdown_table += f"| {entry_name} | {percentage:.2f}% |\n" - - return markdown_table + # Collect all benchmarks and their results + benchmark_results = collections.defaultdict(dict) + for key, results in chart_data.items(): + for res in results: + benchmark_results[res.name][key] = res + + # Generate the table rows + for bname, results in benchmark_results.items(): + row = f"| {bname} |" + best_value = None + best_key = None + + # Determine the best value + for key, res in results.items(): + if best_value is None or (res.lower_is_better and res.value < best_value) or (not res.lower_is_better and res.value > best_value): + best_value = res.value + best_key = key + + # Generate the row with the best value highlighted + for key in chart_data.keys(): + if key in results: + value = results[key].value + if key == best_key: + row += f" `**{value}**` |" # Highlight the best value + else: + row += f" {value} |" + else: + row += " - |" + + summary_table += row + "\n" + + return summary_table def generate_markdown(chart_data: dict[str, list[Result]]): mermaid_script = generate_mermaid_script(chart_data) + summary_table = generate_summary_table(chart_data) return f""" # Summary -{generate_summary(chart_data)} -# Benchmark Results +{summary_table} +# Charts {mermaid_script} -## Details +# Details {generate_markdown_details(chart_data["This PR"])} """ diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 9dc3f23a9b..5c7beb95d0 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -28,9 +28,12 @@ def run(command, env_vars={}, cwd=None, add_sycl=False): env['LD_LIBRARY_PATH'] = sycl_lib_path + os.pathsep + env.get('LD_LIBRARY_PATH', '') env.update(env_vars) - result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) # nosec B603 - print(result.stdout.decode()) - print(result.stderr.decode()) + result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=options.timeout) # nosec B603 + + if options.verbose: + print(result.stdout.decode()) + print(result.stderr.decode()) + return result except subprocess.CalledProcessError as e: print(e.stdout.decode()) @@ -70,7 +73,8 @@ def load_benchmark_results(dir, compare_name) -> list[Result]: return None def prepare_bench_cwd(dir): - options.benchmark_cwd = os.path.join(dir, 'bcwd') + # we need 2 deep to workaround a problem with a fixed relative path in cudaSift + options.benchmark_cwd = os.path.join(dir, 'bcwd', 'bcwd') if os.path.exists(options.benchmark_cwd): shutil.rmtree(options.benchmark_cwd) os.makedirs(options.benchmark_cwd) @@ -97,3 +101,13 @@ def prepare_workdir(dir, version): with open(version_file_path, 'w') as version_file: version_file.write(version) + +def create_build_path(directory, name): + build_path = os.path.join(directory, name) + + if options.rebuild and Path(build_path).exists(): + shutil.rmtree(build_path) + + Path(build_path).mkdir(parents=True, exist_ok=True) + + return build_path