From ef77a3bffc8763380f637258b8c3451757de977b Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 19 Oct 2021 17:23:41 -0400 Subject: [PATCH] Implement 'faster on x% of runs' --- pyperf/__main__.py | 2 + pyperf/_compare.py | 30 +++++++++++--- pyperf/_utils.py | 26 +++++++++++++ pyperf/tests/test_utils.py | 80 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 6 deletions(-) diff --git a/pyperf/__main__.py b/pyperf/__main__.py index 3e03565b..3862a1a8 100644 --- a/pyperf/__main__.py +++ b/pyperf/__main__.py @@ -108,6 +108,8 @@ def cpu_affinity(cmd): help='Absolute minimum of speed in percent to ' 'consider that a benchmark is significant ' '(default: 0%%)') + cmd.add_argument('--win_percentage', action="store_true", + help="Show how often a benchmark is faster.") cmd.add_argument('--table', action="store_true", help='Render a table') cmd.add_argument("--table-format", type=str, default="rest", diff --git a/pyperf/_compare.py b/pyperf/_compare.py index a83ee2a1..12ee46b1 100644 --- a/pyperf/_compare.py +++ b/pyperf/_compare.py @@ -1,5 +1,5 @@ from pyperf._cli import display_title, format_result_value -from pyperf._utils import is_significant, geometric_mean +from pyperf._utils import is_significant, geometric_mean, percentage_less_than def is_significant_benchs(bench1, bench2): @@ -18,6 +18,14 @@ def is_significant_benchs(bench1, bench2): # FIXME: fix the root bug, don't work around it return (True, None) +def win_percentage_text(bench1, bench2): + values1 = bench1.get_values() + values2 = bench2.get_values() + p = percentage_less_than(values1, values2) + if p >= 0.5: + return " (slower on {:.0%} of runs)".format(p) + else: + return " (faster on {:.0%} of runs)".format(1 - p) class CompareData: def __init__(self, name, benchmark): @@ -100,7 +108,8 @@ def norm_mean(self): self._compute_norm_mean() return self._norm_mean - def oneliner(self, verbose=True, show_name=True, check_significant=True): + def oneliner(self, verbose=True, show_name=True, + check_significant=True, win_percentage=False): if check_significant and not self.significant: return "Not significant!" @@ -119,10 +128,14 @@ def oneliner(self, verbose=True, show_name=True, check_significant=True): text = "%s -> %s" % (ref_text, chg_text) text = "%s: %s" % (text, format_normalized_mean(self.norm_mean)) + if win_percentage: + text += win_percentage_text(self.ref.benchmark, + self.changed.benchmark) return text - def format(self, verbose=True, show_name=True): - text = self.oneliner(show_name=show_name, check_significant=False) + def format(self, verbose=True, show_name=True, win_percentage=False): + text = self.oneliner(show_name=show_name, check_significant=False, + win_percentage=win_percentage) lines = [text] # significant? @@ -228,6 +241,7 @@ def __init__(self, benchmarks, args): self.group_by_speed = args.group_by_speed self.verbose = args.verbose self.quiet = args.quiet + self.win_percentage = args.win_percentage grouped_by_name = self.benchmarks.group_by_name() if not grouped_by_name: @@ -297,6 +311,8 @@ def sort_key(results): text = "%s: %s" % (bench.format_value(bench.mean()), text) else: text = "not significant" + if self.win_percentage: + text += win_percentage_text(ref_bench, bench) significants.append(significant) all_norm_means[index].append(result.norm_mean) row.append(text) @@ -365,7 +381,8 @@ def sort_key(item): print() print("%s (%s):" % (title, len(results))) for name, result in results: - text = result.oneliner(verbose=False) + text = result.oneliner(verbose=False, + win_percentage=self.win_percentage) print("- %s: %s" % (name, text)) empty_line = True @@ -383,7 +400,8 @@ def compare_suites_list(self): significant = any(result.significant for result in results) lines = [] for result in results: - lines.extend(result.format(self.verbose)) + lines.extend(result.format(self.verbose, + win_percentage=self.win_percentage)) if not(significant or self.verbose): not_significant.append(results.name) diff --git a/pyperf/_utils.py b/pyperf/_utils.py index 11ec16dd..c2687a40 100644 --- a/pyperf/_utils.py +++ b/pyperf/_utils.py @@ -403,3 +403,29 @@ def geometric_mean(data): if not data: raise ValueError("empty data") return _geometric_mean(data) + + +def _count_pairs_less_than(list1, list2): + # counts sum(x < y for x in list1 for y in list2) + # list1 and list2 should be sorted + i = 0 + count = 0 + for x in list2: + while i < len(list1) and list1[i] < x: + i += 1 + # x is bigger than all of list1[:i] + count += i + return count + + +def percentage_less_than(list1, list2): + # measures mean(x < y for x in list1 for y in list2) + list1 = sorted(list1) + list2 = sorted(list2) + list1_less = _count_pairs_less_than(list1, list2) + list2_less = _count_pairs_less_than(list2, list1) + product = len(list1) * len(list2) + if product == 0: + raise ValueError("Can't compute percentages of empty samples") + ties = product - list1_less - list2_less + return (list1_less + ties / 2) / product diff --git a/pyperf/tests/test_utils.py b/pyperf/tests/test_utils.py index 70bb8fac..7daee721 100644 --- a/pyperf/tests/test_utils.py +++ b/pyperf/tests/test_utils.py @@ -74,6 +74,86 @@ def test_geometric_mean(self): self.assertAlmostEqual(utils.geometric_mean([54, 24, 36]), 36.0) +class TestWinPercentage(unittest.TestCase): + + def test_count_pairs_less_than(self): + cplt = utils._count_pairs_less_than + self.assertEqual(cplt([], []), 0) + self.assertEqual(cplt([], [1]), 0) + self.assertEqual(cplt([], []), 0) + self.assertEqual(cplt([], [1]), 0) + self.assertEqual(cplt([], [1, 2]), 0) + self.assertEqual(cplt([1], []), 0) + self.assertEqual(cplt([1, 2], []), 0) + self.assertEqual(cplt([1], [1]), 0) + self.assertEqual(cplt([0], [1]), 1) + self.assertEqual(cplt([0, 1], [0, 1]), 1) + self.assertEqual(cplt([0, 1, 2, 3], [1, 1, 2, 2]), 6) + + self.assertEqual(cplt([0, 1, 1, 2], [1, 2, 3, 4]), 12) + self.assertEqual(cplt([1, 2, 3, 4], [0, 1, 1, 2]), 1) + self.assertEqual(cplt([3, 4, 4, 5], [1, 2, 3, 4]), 1) + self.assertEqual(cplt([1, 2, 3, 4], [3, 4, 4, 5]), 12) + + from random import random, randrange + from collections import Counter + + for _ in range(1000): + # short test cases to brute-force + common = [random() for i in range(randrange(3))] + list1 = [random() for i in range(randrange(10))] + common + list2 = [random() for i in range(randrange(10))] + common + list1.sort() + list2.sort() + + expected = sum(x < y for x in list1 for y in list2) + self.assertEqual(cplt(list1, list2), expected) + + for _ in range(1000): + # longer test cases just to make sure things add up. + common = [random() for i in range(randrange(10))] + list1 = [random() for i in range(randrange(200))] + common + list2 = [random() for i in range(randrange(200))] + common + list1.sort() + list2.sort() + + c1 = Counter(list1) + c2 = Counter(list2) + ties1 = sum(c2[t] * count1 for t, count1 in c1.items()) + ties2 = sum(c1[t] * count2 for t, count2 in c2.items()) + self.assertEqual(ties1, ties2) + self.assertGreaterEqual(ties1, len(common)) + + list1_less = cplt(list1, list2) + list2_less = cplt(list2, list1) + self.assertEqual(ties1 + list1_less + list2_less, + len(list1) * len(list2)) + + def test_count_percentage_less_than(self): + plt = utils.percentage_less_than + self.assertRaises(ValueError, plt, [], []) + self.assertRaises(ValueError, plt, [1], []) + self.assertRaises(ValueError, plt, [], [1]) + self.assertEqual(plt([0], [1]), 1.0) + self.assertEqual(plt([1], [0]), 0.0) + self.assertEqual(plt([0, 2], [1]), 0.5) + self.assertEqual(plt([0, 0], [1]), 1.0) + self.assertEqual(plt([0, 0], [1, 1]), 1.0) + self.assertEqual(plt([1, 2, 3], [2, 3]), 4.0/6) + self.assertEqual(plt([0, 0], [0, 1]), 0.75) + self.assertEqual(plt([0, 1], [0, 1]), 0.5) + + from random import random, randrange + for _ in range(1000): + # no ties => count in the obvious way. + distinct = list({random() for i in range(randrange(20, 30))}) + j = randrange(1, len(distinct)) + left, right = distinct[:j], distinct[j:] + expected = (sum(x < y for x in left for y in right) + / (len(left) * len(right))) + self.assertEqual(plt(left, right), expected) + + class TestUtils(unittest.TestCase): def test_parse_iso8601(self): # Default format using 'T' separator