Navigation Menu

Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
First dummy benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Mar 5, 2019
1 parent 16b1626 commit 5d739ed
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 2 deletions.
2 changes: 0 additions & 2 deletions HISTORY.rst
@@ -1,5 +1,4 @@


.. _l-HISTORY:

=======
Expand All @@ -9,7 +8,6 @@ History
current - 2019-03-05 - 0.00Mb
=============================


0.0.0 - 2019-03-05 - 0.00Mb
===========================

Expand Down
7 changes: 7 additions & 0 deletions _doc/sphinxdoc/source/api/benchmark.rst
@@ -0,0 +1,7 @@

benchmark
=========

.. autosignature:: pymlbenchmark.benchmark.benchmark_perf.BenchPerfTest

.. autosignature:: pymlbenchmark.benchmark.benchmark_perf.BenchPerf
5 changes: 5 additions & 0 deletions _doc/sphinxdoc/source/api/datasets.rst
@@ -0,0 +1,5 @@

datasets
========

.. autosignature:: pymlbenchmark.datasets.artificial.random_binary_classification
2 changes: 2 additions & 0 deletions _doc/sphinxdoc/source/api/index.rst
Expand Up @@ -6,3 +6,5 @@ API Summary
:maxdepth: 2

context
datasets
benchmark
1 change: 1 addition & 0 deletions _doc/sphinxdoc/source/conf.py
Expand Up @@ -38,5 +38,6 @@
'onnxruntime': 'https://github.com/Microsoft/onnxruntime',
'PolynomialFeatures': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html',
'Python': 'https://www.python.org/',
'tqdm': 'https://github.com/tqdm/tqdm',
'scikit-learn': 'https://scikit-learn.org/stable/',
})
116 changes: 116 additions & 0 deletions _unittests/ut_benchmark/test_benchmark_perf.py
@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=2s)
"""
import io
import contextlib
import sys
import os
import unittest
import numpy
from pyquickhelper.pycode import ExtTestCase


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src


from src.pymlbenchmark.benchmark import BenchPerf, BenchPerfTest
from src.pymlbenchmark.datasets import random_binary_classification


class TestBenchPerf(ExtTestCase):

def test_filter_conf(self):
pbefore = dict(a=[0, 1], b=['a', 'b', 'c'])
bp = BenchPerf(pbefore, None, None)
opts = list(bp.enumerate_tests(pbefore))
self.assertEqual(len(opts), 6)
self.assertEqual(opts, [{'a': 0, 'b': 'a'}, {'a': 0, 'b': 'b'},
{'a': 0, 'b': 'c'}, {'a': 1, 'b': 'a'},
{'a': 1, 'b': 'b'}, {'a': 1, 'b': 'c'}])

def test_perf_benchmark_vfalse(self):
self.do_test_perf_benchmark(False)

def test_perf_benchmark_vtrue(self):
st = io.StringIO()
with contextlib.redirect_stdout(st):
with contextlib.redirect_stderr(st):
self.do_test_perf_benchmark(True)
self.assertIn("/24", st.getvalue())

def do_test_perf_benchmark(self, verbose):

class dummycl:
def __init__(self, alpha):
self.alpha = alpha

def fit(self, X, y):
self.mean_ = X.mean(axis=0) # pylint: disable=W0201
return self

def predict(self, X):
return self.predict_proba(X) > 0

def predict_proba(self, X):
return numpy.sum(X - self.mean_[numpy.newaxis, :], axis=1) * self.alpha

class dummycl2(dummycl):
def predict_proba(self, X):
r = dummycl.predict_proba(self, X)
return dummycl.predict_proba(self, X) + r

class myBenchPerfTest(BenchPerfTest):
def __init__(self, N=10, dim=4, alpha=3):
BenchPerfTest.__init__(self)
X, y = random_binary_classification(N, dim)
self.skl = dummycl(alpha).fit(X, y)
self.ort = dummycl2(alpha).fit(X, y)

def fcts(self, **kwargs):

def predict_skl_predict(X, model=self.skl):
return model.predict(X)

def predict_skl_predict_proba(X, model=self.skl):
return model.predict_proba(X)

def predict_ort_predict(X, model=self.ort):
return model.predict(X)

def predict_ort_predict_proba(X, model=self.ort):
return model.predict_proba(X)

return [{'lib': 'skl', 'method': 'predict', 'fct': predict_skl_predict},
{'lib': 'skl', 'method': 'predict_proba',
'fct': predict_skl_predict_proba},
{'lib': 'ort', 'method': 'predict',
'fct': predict_ort_predict},
{'lib': 'ort', 'method': 'predict_proba', 'fct': predict_ort_predict_proba}]

def data(self, N=10, dim=4, **kwargs): # pylint: disable=W0221
return random_binary_classification(N, dim)[:1]

pbefore = dict(alpha=[0, 1, 2], dim=[1, 10])
pafter = dict(method=["predict", "predict_proba"],
N=[1, 10])
bp = BenchPerf(pbefore, pafter, myBenchPerfTest)
res = list(bp.enumerate_run_benchs(verbose=verbose))
self.assertEqual(len(res), 96)
self.assertLesser(res[0]['min'], res[0]['max'])
self.assertEqual(set(_['N'] for _ in res), {1, 10})


if __name__ == "__main__":
unittest.main()
39 changes: 39 additions & 0 deletions _unittests/ut_datasets/test_artificial.py
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=2s)
"""

import sys
import os
import unittest
from pyquickhelper.pycode import ExtTestCase


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src


from src.pymlbenchmark.datasets import random_binary_classification


class TestArtificial(ExtTestCase):

def test_random_binary_classification(self):
X, y = random_binary_classification(40, 4)
self.assertEqual(X.shape, (40, 4))
self.assertEqual(y.shape, (40, ))
self.assertEqual(len(set(y)), 2)


if __name__ == "__main__":
unittest.main()
6 changes: 6 additions & 0 deletions src/pymlbenchmark/benchmark/__init__.py
@@ -0,0 +1,6 @@
"""
@file
@brief Shortcut to *benchmark*.
"""

from .benchmark_perf import BenchPerf, BenchPerfTest
192 changes: 192 additions & 0 deletions src/pymlbenchmark/benchmark/benchmark_perf.py
@@ -0,0 +1,192 @@
"""
@file
@brief Implements a benchmark about performance.
"""
from time import perf_counter as time_perf
import numpy


class BenchPerfTest:
"""
Defines a bench perf test.
.. faqref::
:title: Conventions for N, dim
In all the package, *N* refers to the number of observations,
*dim* the dimension or the number of features.
"""

def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)

def data(self, **opts):
"""
Generates one testing dataset.
@return dataset, usually a list of arrays
such as *X, y*
"""
raise NotImplementedError()

def fcts(self, **opts):
"""
Returns the function call to test,
it produces a dictionary ``{name: fct}``
where *name* is the name of the function
and *fct* the function to benchmark
"""
raise NotImplementedError()

def validate(self, results):
"""
Runs validations after the test was done
to make sure it was valid.
@param results results to validate, list of tuple
``(parameters, results)``
The function raised an exception or not.
"""
pass


class BenchPerf:
"""
Factorizes code to compare two implementations.
"""

def __init__(self, pbefore, pafter, btest, filter_test=None):
"""
@param pbefore parameters before calling *fct*,
dictionary ``{name: [list of values]}``,
these parameters are sent to the instance
of @see cl BenchPerfTest to test
@param pafter parameters after calling *fct*,
dictionary ``{name: [list of values]}``,
these parameters are sent to method
:meth:`BenchPerfTest.fcts
<pymlbenchmark.benchmark.benchmark_perf.BenchPerfTest.fcts>`
@param btest instance of @see cl BenchPerfTest
@param filter_test function which tells if a configuration
must be tested or not, None to test them
all
Every parameter specify a function is called through
a method. The user can only overwrite it.
"""
self.pbefore = pbefore
self.pafter = pafter
self.btest = btest
self.filter_test = filter_test

def fct_filter_test(self, **conf):
"""
Tells if the test by *conf* is valid or not.
@param options dicotionary ``{name: value}``
@return boolean
"""
if self.filter_test is None:
return True
return self.filter_test(**conf)

def enumerate_tests(self, options):
"""
Enumerates all possible options.
@param options dictionary ``{name: list of values}``
@return list of dictionary ``{name: value}``
The function applies the method *fct_filter_test*.
"""
keys = list(sorted(options))
mx = [len(options[k]) for k in keys]
if min(mx) == 0:
mi = min(zip(mx, keys))
raise ValueError("Parameter '{0}' has no values.".format(mi[1]))
pos = [0 for _ in keys]
while pos[0] < mx[0]:
opts = {k: options[k][pos[i]] for i, k in enumerate(keys)}
if self.fct_filter_test(**opts):
yield opts
p = len(pos) - 1
pos[p] += 1
while p > 0 and pos[p] >= mx[p]:
pos[p] = 0
p -= 1
pos[p] += 1

def enumerate_run_benchs(self, repeat=10, verbose=False):
"""
Runs the benchmark.
@param repeat number of repeatition of the same call
with different datasets
@param verbose if True, use :epkg:`tqdm`
@return yields dictionaries with all the metrics
"""
all_opts = self.pbefore.copy()
all_opts.update(self.pafter)
all_tests = list(self.enumerate_tests(all_opts))

if verbose:
from tqdm import tqdm
loop = iter(tqdm(range(len(all_tests))))
else:
loop = iter(all_tests)

for a_opt in self.enumerate_tests(self.pbefore):
if not self.fct_filter_test(**a_opt):
continue

inst = self.btest(**a_opt)

for b_opt in self.enumerate_tests(self.pafter):
obs = b_opt.copy()
obs.update(a_opt)
if not self.fct_filter_test(**obs):
continue

fcts = inst.fcts(**obs)
if not isinstance(fcts, list):
raise TypeError(
"Method fcts must return a list of dictionaries (name, fct) not {}".format(fcts))

data = [inst.data(**obs) for r in range(repeat)]
if not isinstance(data, (list, tuple)):
raise ValueError(
"Method *data* must return a list or a tuple.")
obs["repeat"] = len(data)
results = []

for fct in fcts:
if not isinstance(fct, dict) or 'fct' not in fct:
raise ValueError(
"Method fcts must return a list of dictionaries (name, fct) not {}".format(fct))
f = fct['fct']
del fct['fct']
times = []
fct.update(obs)

for dt in data:
st = time_perf()
r = f(*dt)
d = time_perf() - st
times.append(d)

results.append((fct, r))
times.sort()
fct['min'] = times[0]
fct['max'] = times[-1]
if len(times) > 5:
fct['min3'] = times[3]
fct['max3'] = times[-3]
times = numpy.array(times)
fct['mean'] = times.mean()
fct['median'] = numpy.median(times)
yield fct

inst.validate(results)
next(loop) # pylint: disable=R1708

0 comments on commit 5d739ed

Please sign in to comment.