Skip to content

Commit

Permalink
feat(minor): Support thresholds for absolute checks (#223)
Browse files Browse the repository at this point in the history
For benchmarks that are not completely stable (in e.g. syscall/malloc count) due to use of e.g. async or networking, it is desirable to also be able to specify some leeway for benchmarks even for the absolute checks from thresholds.

Co-authored-by: dimlio <122263440+dimlio@users.noreply.github.com>
  • Loading branch information
hassila and dimlio committed Jan 11, 2024
1 parent 02b4da9 commit 3bb4e7d
Show file tree
Hide file tree
Showing 17 changed files with 253 additions and 229 deletions.
42 changes: 31 additions & 11 deletions Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ struct BenchmarkIdentifier: Codable, Hashable {
}
}

extension Benchmark {
var benchmarkIdentifier: BenchmarkIdentifier {
.init(target: self.target, name: self.name)
}
}

public extension Sequence where Iterator.Element: Hashable {
func unique() -> [Iterator.Element] {
var seen: Set<Iterator.Element> = []
Expand Down Expand Up @@ -386,19 +392,25 @@ extension BenchmarkTool {
}

extension BenchmarkBaseline {
func thresholdsForBenchmarks(_ benchmarks: [Benchmark], name: String, target: String, metric: BenchmarkMetric) -> BenchmarkThresholds {
func thresholdsForBenchmarks(
_ benchmarks: [Benchmark],
name: String,
target: String,
metric: BenchmarkMetric,
defaultThresholds: BenchmarkThresholds = BenchmarkThresholds.default
) -> BenchmarkThresholds {
let benchmark = benchmarks.filter { $0.name == name && $0.target == target }.first

guard let benchmark else {
return BenchmarkThresholds.default
return defaultThresholds
}

guard let thresholds = benchmark.configuration.thresholds else {
return BenchmarkThresholds.default
return defaultThresholds
}

guard let threshold = thresholds[metric] else {
return BenchmarkThresholds.default
return defaultThresholds
}

return threshold
Expand Down Expand Up @@ -445,20 +457,28 @@ extension BenchmarkBaseline: Equatable {
return allDeviationResults
}

public func failsAbsoluteThresholdChecks(benchmarks: [Benchmark]) -> BenchmarkResult.ThresholdDeviations {
public func failsAbsoluteThresholdChecks(benchmarks: [Benchmark],
p90Thresholds: [BenchmarkIdentifier :
[BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]]) -> BenchmarkResult.ThresholdDeviations {
var allDeviationResults = BenchmarkResult.ThresholdDeviations()

for (lhsBenchmarkIdentifier, lhsBenchmarkResults) in results {
for lhsBenchmarkResult in lhsBenchmarkResults {
let thresholds = thresholdsForBenchmarks(benchmarks,
name: lhsBenchmarkIdentifier.name,
target: lhsBenchmarkIdentifier.target,
metric: lhsBenchmarkResult.metric)

let deviationResults = lhsBenchmarkResult.deviationsAgainstAbsoluteThresholds(thresholds,
name: lhsBenchmarkIdentifier.name,
target: lhsBenchmarkIdentifier.target)
allDeviationResults.append(deviationResults)
metric: lhsBenchmarkResult.metric,
defaultThresholds: BenchmarkThresholds.strict)

if let p90Thresholds = p90Thresholds[lhsBenchmarkIdentifier] {
if let p90Thresholds = p90Thresholds[lhsBenchmarkResult.metric] {
let deviationResults = lhsBenchmarkResult.deviationsAgainstAbsoluteThresholds(thresholds: thresholds,
p90Threshold: p90Thresholds,
name: lhsBenchmarkIdentifier.name,
target: lhsBenchmarkIdentifier.target)
allDeviationResults.append(deviationResults)
}
}
}
}

Expand Down
57 changes: 28 additions & 29 deletions Plugins/BenchmarkTool/BenchmarkTool+Operations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -165,27 +165,18 @@ extension BenchmarkTool {
}
}

var p90Thresholds: [BenchmarkIdentifier : [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]] = [:]

if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90
var thresholdsFound = false
benchmarks.forEach { benchmark in
let thresholds = BenchmarkTool.makeBenchmarkThresholds(path: benchmarkPath,
moduleName: benchmark.target,
benchmarkName: benchmark.name)
var transformed: [BenchmarkMetric: BenchmarkThresholds] = [:]
if let thresholds {
thresholdsFound = true
thresholds.forEach { key, value in
if let metric = BenchmarkMetric(argument: key) {
let absoluteThreshold: BenchmarkThresholds.AbsoluteThresholds = [.p90: value]
transformed[metric] = BenchmarkThresholds(absolute: absoluteThreshold)
}
}
if transformed.isEmpty == false {
benchmark.configuration.thresholds = transformed
}
if let thresholds = BenchmarkTool.makeBenchmarkThresholds(
path: benchmarkPath,
benchmarkIdentifier: benchmark.benchmarkIdentifier) {
p90Thresholds[benchmark.benchmarkIdentifier] = thresholds
}
}
if !thresholdsFound {

if p90Thresholds.isEmpty {
if benchmarks.count == 0 {
failBenchmark("No benchmarks matching filter selection, failing threshold check.",
exitCode: .thresholdRegression)
Expand All @@ -196,23 +187,31 @@ extension BenchmarkTool {
}
print("")

let deviationResults = currentBaseline.failsAbsoluteThresholdChecks(benchmarks: benchmarks)
let deviationResults = currentBaseline.failsAbsoluteThresholdChecks(benchmarks: benchmarks,
p90Thresholds: p90Thresholds)

if deviationResults.regressions.isEmpty {
if deviationResults.improvements.isEmpty {
print("Baseline '\(baselineName)' is EQUAL to the defined absolute baseline thresholds. (--check-absolute)")
if deviationResults.regressions.isEmpty && deviationResults.improvements.isEmpty {
print("Baseline '\(baselineName)' is EQUAL to the defined absolute baseline thresholds. (--check-absolute)")
} else {
if !deviationResults.regressions.isEmpty {
prettyPrintDeviation(baselineName: "p90 threshold",
comparingBaselineName: baselineName,
deviationResults: deviationResults.regressions,
deviationTitle: "Deviations worse than threshold")
}
if !deviationResults.improvements.isEmpty {
prettyPrintDeviation(baselineName: "p90 threshold",
comparingBaselineName: baselineName,
deviationResults: deviationResults.improvements,
deviationTitle: "Deviations better than threshold")
}
if !deviationResults.regressions.isEmpty {
failBenchmark("New baseline '\(baselineName)' is WORSE than the defined absolute baseline thresholds. (--check-absolute)",
exitCode: .thresholdRegression)
} else {
prettyPrintAbsoluteDeviation(baselineName: baselineName,
deviationResults: deviationResults.improvements)

failBenchmark("New baseline '\(baselineName)' is BETTER than the defined absolute baseline thresholds. (--check-absolute)",
exitCode: .thresholdImprovement)
}
} else {
prettyPrintAbsoluteDeviation(baselineName: baselineName,
deviationResults: deviationResults.regressions)
failBenchmark("New baseline '\(baselineName)' is WORSE than the defined absolute baseline thresholds. (--check-absolute)",
exitCode: .thresholdRegression)
}
} else {
guard benchmarkBaselines.count == 2 else {
Expand Down
48 changes: 3 additions & 45 deletions Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,8 @@ extension BenchmarkTool {

func prettyPrintDeviation(baselineName: String,
comparingBaselineName: String,
deviationResults: [BenchmarkResult.ThresholdDeviation]) {
deviationResults: [BenchmarkResult.ThresholdDeviation],
deviationTitle: String = "Threshold deviations") {
guard quiet == false else { return }

let metrics = deviationResults.map(\.metric).unique()
Expand All @@ -384,7 +385,7 @@ extension BenchmarkTool {
namesAndTargets.forEach { nameAndTarget in

printMarkdown("```")
"Threshold deviations for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
"\(deviationTitle) for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
printMarkdown("```")

metrics.forEach { metric in
Expand Down Expand Up @@ -435,47 +436,4 @@ extension BenchmarkTool {
}
}
}

func prettyPrintAbsoluteDeviation(baselineName: String,
deviationResults: [BenchmarkResult.ThresholdDeviation]) {
guard quiet == false else { return }

let metrics = deviationResults.map(\.metric).unique()
// Get a unique set of all name/target pairs that have threshold deviations, sorted lexically:
let namesAndTargets = deviationResults.map { NameAndTarget(name: $0.name, target: $0.target) }
.unique().sorted { ($0.target, $0.name) < ($1.target, $1.name) }

namesAndTargets.forEach { nameAndTarget in

printMarkdown("```")
"Absolute threshold deviations for \(nameAndTarget.name):\(nameAndTarget.target)".printAsHeader(addWhiteSpace: false)
printMarkdown("```")

metrics.forEach { metric in

let absoluteResults = deviationResults.filter { $0.name == nameAndTarget.name &&
$0.target == nameAndTarget.target &&
$0.metric == metric &&
$0.relative == false
}
let width = 40
let percentileWidth = 15

// The baseValue is the new baseline that we're using as the comparison base, so...
if absoluteResults.isEmpty == false {
let absoluteTable = TextTable<BenchmarkResult.ThresholdDeviation> {
[Column(title: "\(metric.description) (\(metric.countable ? $0.units.description : $0.units.timeDescription), Δ)",
value: $0.percentile, width: width, align: .left),
Column(title: "Threshold", value: $0.comparisonValue, width: percentileWidth, align: .right),
Column(title: "\(baselineName)", value: $0.baseValue, width: percentileWidth, align: .right),
Column(title: "Threshold Abs", value: $0.differenceThreshold, width: percentileWidth, align: .right)]
}

printMarkdown("```")
absoluteTable.print(absoluteResults, style: Style.fancy)
printMarkdown("```")
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,22 @@ extension BenchmarkTool {
/// `String("\(#fileID)".prefix(while: { $0 != "/" }))`
/// - benchmarkName: The name of the benchmark
/// - Returns: A dictionary with static benchmark thresholds per metric or nil if the file could not be found or read
static func makeBenchmarkThresholds(path: String,
moduleName: String,
benchmarkName: String) -> [String: BenchmarkThresholds.AbsoluteThreshold]? {
static func makeBenchmarkThresholds(
path: String,
benchmarkIdentifier: BenchmarkIdentifier
) -> [BenchmarkMetric : BenchmarkThresholds.AbsoluteThreshold]? {
var path = FilePath(path)
if path.isAbsolute {
path.append("\(moduleName).\(benchmarkName).p90.json")
path.append("\(benchmarkIdentifier.target).\(benchmarkIdentifier.name).p90.json")
} else {
var cwdPath = FilePath(FileManager.default.currentDirectoryPath)
cwdPath.append(path.components)
cwdPath.append("\(moduleName).\(benchmarkName).p90.json")
cwdPath.append("\(benchmarkIdentifier.target).\(benchmarkIdentifier.name).p90.json")
path = cwdPath
}

var p90Thresholds: [String: BenchmarkThresholds.AbsoluteThreshold]?
var p90Thresholds: [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold] = [:]
var p90ThresholdsRaw: [String: BenchmarkThresholds.AbsoluteThreshold]?

do {
let fileDescriptor = try FileDescriptor.open(path, .readOnly, options: [], permissions: .ownerRead)
Expand All @@ -64,7 +66,18 @@ extension BenchmarkTool {
readBytes.append(contentsOf: nextBytes)
}

p90Thresholds = try JSONDecoder().decode([String: BenchmarkThresholds.AbsoluteThreshold].self, from: Data(readBytes))
p90ThresholdsRaw = try JSONDecoder().decode(
[String: BenchmarkThresholds.AbsoluteThreshold].self,
from: Data(readBytes)
)

if let p90ThresholdsRaw {
p90ThresholdsRaw.forEach { metric, threshold in
if let metric = BenchmarkMetric(argument: metric) {
p90Thresholds[metric] = threshold
}
}
}
} catch {
print("Failed to read file at \(path) [\(error)] \(Errno(rawValue: errno).description)")
}
Expand All @@ -77,6 +90,6 @@ extension BenchmarkTool {
print("Failed to open file \(path), errno = [\(errno)] \(Errno(rawValue: errno).description)")
}
}
return p90Thresholds
return p90Thresholds.isEmpty ? nil : p90Thresholds
}
}
15 changes: 15 additions & 0 deletions Sources/Benchmark/BenchmarkClock.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ import Glibc
#error("Unsupported Platform")
#endif

#if swift(>=5.8)
@_documentation(visibility: internal)
#endif
public struct BenchmarkClock {
/// A continuous point in time used for `BenchmarkClock`.
public struct Instant: Codable, Sendable {
Expand All @@ -35,6 +38,9 @@ public struct BenchmarkClock {
public init() {}
}

#if swift(>=5.8)
@_documentation(visibility: internal)
#endif
public extension Clock where Self == BenchmarkClock {
/// A clock that measures time that always increments but does not stop
/// incrementing while the system is asleep.
Expand All @@ -44,6 +50,9 @@ public extension Clock where Self == BenchmarkClock {
static var internalUTC: BenchmarkClock { BenchmarkClock() }
}

#if swift(>=5.8)
@_documentation(visibility: internal)
#endif
extension BenchmarkClock: Clock {
/// The current continuous instant.
public var now: BenchmarkClock.Instant {
Expand Down Expand Up @@ -114,6 +123,9 @@ extension BenchmarkClock: Clock {
}
}

#if swift(>=5.8)
@_documentation(visibility: internal)
#endif
extension BenchmarkClock.Instant: InstantProtocol {
public static var now: BenchmarkClock.Instant { BenchmarkClock.now }

Expand Down Expand Up @@ -177,6 +189,9 @@ extension BenchmarkClock.Instant: InstantProtocol {
}
}

#if swift(>=5.8)
@_documentation(visibility: internal)
#endif
public extension Duration {
func nanoseconds() -> Int64 {
(components.seconds * 1_000_000_000) + (components.attoseconds / 1_000_000_000)
Expand Down
6 changes: 3 additions & 3 deletions Sources/Benchmark/BenchmarkMetric+Defaults.swift
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,16 @@ public extension BenchmarkMetric {

// Nicer convenience extension for Array so one can write `.extended` instead of `BenchmarkMetric.extended`
public extension [BenchmarkMetric] {
/// A sutiable set of metrics for microbenchmarks that are CPU-oriented only.
/// A suitable set of metrics for microbenchmarks that are CPU-oriented only.
///
/// The defaults include ``wallClock`` and ``throughput``
/// The defaults include ``BenchmarkMetric/wallClock`` and ``BenchmarkMetric/throughput``
static var microbenchmark: [BenchmarkMetric] {
BenchmarkMetric.microbenchmark
}

/// The default collection of metrics used for a benchmark.
///
/// The defaults include ``wallClock``, ``cpuTotal``, ``mallocCountTotal``, ``throughput``, and ``peakMemoryResident``.
/// The defaults include ``BenchmarkMetric/wallClock``, ``BenchmarkMetric/cpuTotal``, ``BenchmarkMetric/mallocCountTotal``, ``BenchmarkMetric/throughput``, and ``BenchmarkMetric/peakMemoryResident``.
static var `default`: [BenchmarkMetric] {
BenchmarkMetric.default
}
Expand Down

0 comments on commit 3bb4e7d

Please sign in to comment.