Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement 'faster on x% of runs' #118

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyperf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def cpu_affinity(cmd):
help='Absolute minimum of speed in percent to '
'consider that a benchmark is significant '
'(default: 0%%)')
cmd.add_argument('--win_percentage', action="store_true",
help="Show how often a benchmark is faster.")
cmd.add_argument('--table', action="store_true",
help='Render a table')
cmd.add_argument("--table-format", type=str, default="rest",
Expand Down
30 changes: 24 additions & 6 deletions pyperf/_compare.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pyperf._cli import display_title, format_result_value
from pyperf._utils import is_significant, geometric_mean
from pyperf._utils import is_significant, geometric_mean, percentage_less_than


def is_significant_benchs(bench1, bench2):
Expand All @@ -18,6 +18,14 @@ def is_significant_benchs(bench1, bench2):
# FIXME: fix the root bug, don't work around it
return (True, None)

def win_percentage_text(bench1, bench2):
values1 = bench1.get_values()
values2 = bench2.get_values()
p = percentage_less_than(values1, values2)
if p >= 0.5:
return " (slower on {:.0%} of runs)".format(p)
else:
return " (faster on {:.0%} of runs)".format(1 - p)

class CompareData:
def __init__(self, name, benchmark):
Expand Down Expand Up @@ -100,7 +108,8 @@ def norm_mean(self):
self._compute_norm_mean()
return self._norm_mean

def oneliner(self, verbose=True, show_name=True, check_significant=True):
def oneliner(self, verbose=True, show_name=True,
check_significant=True, win_percentage=False):
if check_significant and not self.significant:
return "Not significant!"

Expand All @@ -119,10 +128,14 @@ def oneliner(self, verbose=True, show_name=True, check_significant=True):
text = "%s -> %s" % (ref_text, chg_text)

text = "%s: %s" % (text, format_normalized_mean(self.norm_mean))
if win_percentage:
text += win_percentage_text(self.ref.benchmark,
self.changed.benchmark)
return text

def format(self, verbose=True, show_name=True):
text = self.oneliner(show_name=show_name, check_significant=False)
def format(self, verbose=True, show_name=True, win_percentage=False):
text = self.oneliner(show_name=show_name, check_significant=False,
win_percentage=win_percentage)
lines = [text]

# significant?
Expand Down Expand Up @@ -228,6 +241,7 @@ def __init__(self, benchmarks, args):
self.group_by_speed = args.group_by_speed
self.verbose = args.verbose
self.quiet = args.quiet
self.win_percentage = args.win_percentage

grouped_by_name = self.benchmarks.group_by_name()
if not grouped_by_name:
Expand Down Expand Up @@ -297,6 +311,8 @@ def sort_key(results):
text = "%s: %s" % (bench.format_value(bench.mean()), text)
else:
text = "not significant"
if self.win_percentage:
text += win_percentage_text(ref_bench, bench)
significants.append(significant)
all_norm_means[index].append(result.norm_mean)
row.append(text)
Expand Down Expand Up @@ -365,7 +381,8 @@ def sort_key(item):
print()
print("%s (%s):" % (title, len(results)))
for name, result in results:
text = result.oneliner(verbose=False)
text = result.oneliner(verbose=False,
win_percentage=self.win_percentage)
print("- %s: %s" % (name, text))
empty_line = True

Expand All @@ -383,7 +400,8 @@ def compare_suites_list(self):
significant = any(result.significant for result in results)
lines = []
for result in results:
lines.extend(result.format(self.verbose))
lines.extend(result.format(self.verbose,
win_percentage=self.win_percentage))

if not(significant or self.verbose):
not_significant.append(results.name)
Expand Down
26 changes: 26 additions & 0 deletions pyperf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,29 @@ def geometric_mean(data):
if not data:
raise ValueError("empty data")
return _geometric_mean(data)


def _count_pairs_less_than(list1, list2):
# counts sum(x < y for x in list1 for y in list2)
# list1 and list2 should be sorted
i = 0
count = 0
for x in list2:
while i < len(list1) and list1[i] < x:
i += 1
# x is bigger than all of list1[:i]
count += i
return count


def percentage_less_than(list1, list2):
# measures mean(x < y for x in list1 for y in list2)
list1 = sorted(list1)
list2 = sorted(list2)
list1_less = _count_pairs_less_than(list1, list2)
list2_less = _count_pairs_less_than(list2, list1)
product = len(list1) * len(list2)
if product == 0:
raise ValueError("Can't compute percentages of empty samples")
ties = product - list1_less - list2_less
return (list1_less + ties / 2) / product
80 changes: 80 additions & 0 deletions pyperf/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,86 @@ def test_geometric_mean(self):
self.assertAlmostEqual(utils.geometric_mean([54, 24, 36]), 36.0)


class TestWinPercentage(unittest.TestCase):

def test_count_pairs_less_than(self):
cplt = utils._count_pairs_less_than
self.assertEqual(cplt([], []), 0)
self.assertEqual(cplt([], [1]), 0)
self.assertEqual(cplt([], []), 0)
self.assertEqual(cplt([], [1]), 0)
self.assertEqual(cplt([], [1, 2]), 0)
self.assertEqual(cplt([1], []), 0)
self.assertEqual(cplt([1, 2], []), 0)
self.assertEqual(cplt([1], [1]), 0)
self.assertEqual(cplt([0], [1]), 1)
self.assertEqual(cplt([0, 1], [0, 1]), 1)
self.assertEqual(cplt([0, 1, 2, 3], [1, 1, 2, 2]), 6)

self.assertEqual(cplt([0, 1, 1, 2], [1, 2, 3, 4]), 12)
self.assertEqual(cplt([1, 2, 3, 4], [0, 1, 1, 2]), 1)
self.assertEqual(cplt([3, 4, 4, 5], [1, 2, 3, 4]), 1)
self.assertEqual(cplt([1, 2, 3, 4], [3, 4, 4, 5]), 12)

from random import random, randrange
from collections import Counter

for _ in range(1000):
# short test cases to brute-force
common = [random() for i in range(randrange(3))]
list1 = [random() for i in range(randrange(10))] + common
list2 = [random() for i in range(randrange(10))] + common
list1.sort()
list2.sort()

expected = sum(x < y for x in list1 for y in list2)
self.assertEqual(cplt(list1, list2), expected)

for _ in range(1000):
# longer test cases just to make sure things add up.
common = [random() for i in range(randrange(10))]
list1 = [random() for i in range(randrange(200))] + common
list2 = [random() for i in range(randrange(200))] + common
list1.sort()
list2.sort()

c1 = Counter(list1)
c2 = Counter(list2)
ties1 = sum(c2[t] * count1 for t, count1 in c1.items())
ties2 = sum(c1[t] * count2 for t, count2 in c2.items())
self.assertEqual(ties1, ties2)
self.assertGreaterEqual(ties1, len(common))

list1_less = cplt(list1, list2)
list2_less = cplt(list2, list1)
self.assertEqual(ties1 + list1_less + list2_less,
len(list1) * len(list2))

def test_count_percentage_less_than(self):
plt = utils.percentage_less_than
self.assertRaises(ValueError, plt, [], [])
self.assertRaises(ValueError, plt, [1], [])
self.assertRaises(ValueError, plt, [], [1])
self.assertEqual(plt([0], [1]), 1.0)
self.assertEqual(plt([1], [0]), 0.0)
self.assertEqual(plt([0, 2], [1]), 0.5)
self.assertEqual(plt([0, 0], [1]), 1.0)
self.assertEqual(plt([0, 0], [1, 1]), 1.0)
self.assertEqual(plt([1, 2, 3], [2, 3]), 4.0/6)
self.assertEqual(plt([0, 0], [0, 1]), 0.75)
self.assertEqual(plt([0, 1], [0, 1]), 0.5)

from random import random, randrange
for _ in range(1000):
# no ties => count in the obvious way.
distinct = list({random() for i in range(randrange(20, 30))})
j = randrange(1, len(distinct))
left, right = distinct[:j], distinct[j:]
expected = (sum(x < y for x in left for y in right)
/ (len(left) * len(right)))
self.assertEqual(plt(left, right), expected)


class TestUtils(unittest.TestCase):
def test_parse_iso8601(self):
# Default format using 'T' separator
Expand Down