First dummy benchmark

sdpython · Mar 5, 2019 · 5d739ed · 5d739ed
1 parent 16b1626
commit 5d739ed
Show file tree

Hide file tree

Showing 11 changed files with 406 additions and 2 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -1,5 +1,4 @@
 
-
 .. _l-HISTORY:
 
 =======
@@ -9,7 +8,6 @@ History
 current - 2019-03-05 - 0.00Mb
 =============================
 
-
 0.0.0 - 2019-03-05 - 0.00Mb
 ===========================
 

diff --git a/_doc/sphinxdoc/source/api/benchmark.rst b/_doc/sphinxdoc/source/api/benchmark.rst
@@ -0,0 +1,7 @@
+
+benchmark
+=========
+
+.. autosignature:: pymlbenchmark.benchmark.benchmark_perf.BenchPerfTest
+
+.. autosignature:: pymlbenchmark.benchmark.benchmark_perf.BenchPerf
diff --git a/_doc/sphinxdoc/source/api/datasets.rst b/_doc/sphinxdoc/source/api/datasets.rst
@@ -0,0 +1,5 @@
+
+datasets
+========
+
+.. autosignature:: pymlbenchmark.datasets.artificial.random_binary_classification
diff --git a/_doc/sphinxdoc/source/api/index.rst b/_doc/sphinxdoc/source/api/index.rst
@@ -6,3 +6,5 @@ API Summary
     :maxdepth: 2
 
     context
+    datasets
+    benchmark
diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py
@@ -38,5 +38,6 @@
     'onnxruntime': 'https://github.com/Microsoft/onnxruntime',
     'PolynomialFeatures': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html',
     'Python': 'https://www.python.org/',
+    'tqdm': 'https://github.com/tqdm/tqdm',
     'scikit-learn': 'https://scikit-learn.org/stable/',
 })
diff --git a/_unittests/ut_benchmark/test_benchmark_perf.py b/_unittests/ut_benchmark/test_benchmark_perf.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=2s)
+"""
+import io
+import contextlib
+import sys
+import os
+import unittest
+import numpy
+from pyquickhelper.pycode import ExtTestCase
+
+
+try:
+    import src
+except ImportError:
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..")))
+    if path not in sys.path:
+        sys.path.append(path)
+    import src
+
+
+from src.pymlbenchmark.benchmark import BenchPerf, BenchPerfTest
+from src.pymlbenchmark.datasets import random_binary_classification
+
+
+class TestBenchPerf(ExtTestCase):
+
+    def test_filter_conf(self):
+        pbefore = dict(a=[0, 1], b=['a', 'b', 'c'])
+        bp = BenchPerf(pbefore, None, None)
+        opts = list(bp.enumerate_tests(pbefore))
+        self.assertEqual(len(opts), 6)
+        self.assertEqual(opts, [{'a': 0, 'b': 'a'}, {'a': 0, 'b': 'b'},
+                                {'a': 0, 'b': 'c'}, {'a': 1, 'b': 'a'},
+                                {'a': 1, 'b': 'b'}, {'a': 1, 'b': 'c'}])
+
+    def test_perf_benchmark_vfalse(self):
+        self.do_test_perf_benchmark(False)
+
+    def test_perf_benchmark_vtrue(self):
+        st = io.StringIO()
+        with contextlib.redirect_stdout(st):
+            with contextlib.redirect_stderr(st):
+                self.do_test_perf_benchmark(True)
+        self.assertIn("/24", st.getvalue())
+
+    def do_test_perf_benchmark(self, verbose):
+
+        class dummycl:
+            def __init__(self, alpha):
+                self.alpha = alpha
+
+            def fit(self, X, y):
+                self.mean_ = X.mean(axis=0)  # pylint: disable=W0201
+                return self
+
+            def predict(self, X):
+                return self.predict_proba(X) > 0
+
+            def predict_proba(self, X):
+                return numpy.sum(X - self.mean_[numpy.newaxis, :], axis=1) * self.alpha
+
+        class dummycl2(dummycl):
+            def predict_proba(self, X):
+                r = dummycl.predict_proba(self, X)
+                return dummycl.predict_proba(self, X) + r
+
+        class myBenchPerfTest(BenchPerfTest):
+            def __init__(self, N=10, dim=4, alpha=3):
+                BenchPerfTest.__init__(self)
+                X, y = random_binary_classification(N, dim)
+                self.skl = dummycl(alpha).fit(X, y)
+                self.ort = dummycl2(alpha).fit(X, y)
+
+            def fcts(self, **kwargs):
+
+                def predict_skl_predict(X, model=self.skl):
+                    return model.predict(X)
+
+                def predict_skl_predict_proba(X, model=self.skl):
+                    return model.predict_proba(X)
+
+                def predict_ort_predict(X, model=self.ort):
+                    return model.predict(X)
+
+                def predict_ort_predict_proba(X, model=self.ort):
+                    return model.predict_proba(X)
+
+                return [{'lib': 'skl', 'method': 'predict', 'fct': predict_skl_predict},
+                        {'lib': 'skl', 'method': 'predict_proba',
+                            'fct': predict_skl_predict_proba},
+                        {'lib': 'ort', 'method': 'predict',
+                            'fct': predict_ort_predict},
+                        {'lib': 'ort', 'method': 'predict_proba', 'fct': predict_ort_predict_proba}]
+
+            def data(self, N=10, dim=4, **kwargs):  # pylint: disable=W0221
+                return random_binary_classification(N, dim)[:1]
+
+        pbefore = dict(alpha=[0, 1, 2], dim=[1, 10])
+        pafter = dict(method=["predict", "predict_proba"],
+                      N=[1, 10])
+        bp = BenchPerf(pbefore, pafter, myBenchPerfTest)
+        res = list(bp.enumerate_run_benchs(verbose=verbose))
+        self.assertEqual(len(res), 96)
+        self.assertLesser(res[0]['min'], res[0]['max'])
+        self.assertEqual(set(_['N'] for _ in res), {1, 10})
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_datasets/test_artificial.py b/_unittests/ut_datasets/test_artificial.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=2s)
+"""
+
+import sys
+import os
+import unittest
+from pyquickhelper.pycode import ExtTestCase
+
+
+try:
+    import src
+except ImportError:
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..")))
+    if path not in sys.path:
+        sys.path.append(path)
+    import src
+
+
+from src.pymlbenchmark.datasets import random_binary_classification
+
+
+class TestArtificial(ExtTestCase):
+
+    def test_random_binary_classification(self):
+        X, y = random_binary_classification(40, 4)
+        self.assertEqual(X.shape, (40, 4))
+        self.assertEqual(y.shape, (40, ))
+        self.assertEqual(len(set(y)), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/pymlbenchmark/benchmark/__init__.py b/src/pymlbenchmark/benchmark/__init__.py
@@ -0,0 +1,6 @@
+"""
+@file
+@brief Shortcut to *benchmark*.
+"""
+
+from .benchmark_perf import BenchPerf, BenchPerfTest
diff --git a/src/pymlbenchmark/benchmark/benchmark_perf.py b/src/pymlbenchmark/benchmark/benchmark_perf.py
@@ -0,0 +1,192 @@
+"""
+@file
+@brief Implements a benchmark about performance.
+"""
+from time import perf_counter as time_perf
+import numpy
+
+
+class BenchPerfTest:
+    """
+    Defines a bench perf test.
+
+    .. faqref::
+        :title: Conventions for N, dim
+
+        In all the package, *N* refers to the number of observations,
+        *dim* the dimension or the number of features.
+    """
+
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+    def data(self, **opts):
+        """
+        Generates one testing dataset.
+
+        @return                 dataset, usually a list of arrays
+                                such as *X, y*
+        """
+        raise NotImplementedError()
+
+    def fcts(self, **opts):
+        """
+        Returns the function call to test,
+        it produces a dictionary ``{name: fct}``
+        where *name* is the name of the function
+        and *fct* the function to benchmark
+        """
+        raise NotImplementedError()
+
+    def validate(self, results):
+        """
+        Runs validations after the test was done
+        to make sure it was valid.
+
+        @param      results     results to validate, list of tuple
+                                ``(parameters, results)``
+
+        The function raised an exception or not.
+        """
+        pass
+
+
+class BenchPerf:
+    """
+    Factorizes code to compare two implementations.
+    """
+
+    def __init__(self, pbefore, pafter, btest, filter_test=None):
+        """
+        @param      pbefore     parameters before calling *fct*,
+                                dictionary ``{name: [list of values]}``,
+                                these parameters are sent to the instance
+                                of @see cl BenchPerfTest to test
+        @param      pafter      parameters after calling *fct*,
+                                dictionary ``{name: [list of values]}``,
+                                these parameters are sent to method
+                                :meth:`BenchPerfTest.fcts
+                                <pymlbenchmark.benchmark.benchmark_perf.BenchPerfTest.fcts>`
+        @param      btest       instance of @see cl BenchPerfTest
+        @param      filter_test function which tells if a configuration
+                                must be tested or not, None to test them
+                                all
+
+        Every parameter specify a function is called through
+        a method. The user can only overwrite it.
+        """
+        self.pbefore = pbefore
+        self.pafter = pafter
+        self.btest = btest
+        self.filter_test = filter_test
+
+    def fct_filter_test(self, **conf):
+        """
+        Tells if the test by *conf* is valid or not.
+
+        @param      options     dicotionary ``{name: value}``
+        @return                 boolean
+        """
+        if self.filter_test is None:
+            return True
+        return self.filter_test(**conf)
+
+    def enumerate_tests(self, options):
+        """
+        Enumerates all possible options.
+
+        @param      options     dictionary ``{name: list of values}``
+        @return                 list of dictionary ``{name: value}``
+
+        The function applies the method *fct_filter_test*.
+        """
+        keys = list(sorted(options))
+        mx = [len(options[k]) for k in keys]
+        if min(mx) == 0:
+            mi = min(zip(mx, keys))
+            raise ValueError("Parameter '{0}' has no values.".format(mi[1]))
+        pos = [0 for _ in keys]
+        while pos[0] < mx[0]:
+            opts = {k: options[k][pos[i]] for i, k in enumerate(keys)}
+            if self.fct_filter_test(**opts):
+                yield opts
+            p = len(pos) - 1
+            pos[p] += 1
+            while p > 0 and pos[p] >= mx[p]:
+                pos[p] = 0
+                p -= 1
+                pos[p] += 1
+
+    def enumerate_run_benchs(self, repeat=10, verbose=False):
+        """
+        Runs the benchmark.
+
+        @param      repeat      number of repeatition of the same call
+                                with different datasets
+        @param      verbose     if True, use :epkg:`tqdm`
+        @return                 yields dictionaries with all the metrics
+        """
+        all_opts = self.pbefore.copy()
+        all_opts.update(self.pafter)
+        all_tests = list(self.enumerate_tests(all_opts))
+
+        if verbose:
+            from tqdm import tqdm
+            loop = iter(tqdm(range(len(all_tests))))
+        else:
+            loop = iter(all_tests)
+
+        for a_opt in self.enumerate_tests(self.pbefore):
+            if not self.fct_filter_test(**a_opt):
+                continue
+
+            inst = self.btest(**a_opt)
+
+            for b_opt in self.enumerate_tests(self.pafter):
+                obs = b_opt.copy()
+                obs.update(a_opt)
+                if not self.fct_filter_test(**obs):
+                    continue
+
+                fcts = inst.fcts(**obs)
+                if not isinstance(fcts, list):
+                    raise TypeError(
+                        "Method fcts must return a list of dictionaries (name, fct) not {}".format(fcts))
+
+                data = [inst.data(**obs) for r in range(repeat)]
+                if not isinstance(data, (list, tuple)):
+                    raise ValueError(
+                        "Method *data* must return a list or a tuple.")
+                obs["repeat"] = len(data)
+                results = []
+
+                for fct in fcts:
+                    if not isinstance(fct, dict) or 'fct' not in fct:
+                        raise ValueError(
+                            "Method fcts must return a list of dictionaries (name, fct) not {}".format(fct))
+                    f = fct['fct']
+                    del fct['fct']
+                    times = []
+                    fct.update(obs)
+
+                    for dt in data:
+                        st = time_perf()
+                        r = f(*dt)
+                        d = time_perf() - st
+                        times.append(d)
+
+                    results.append((fct, r))
+                    times.sort()
+                    fct['min'] = times[0]
+                    fct['max'] = times[-1]
+                    if len(times) > 5:
+                        fct['min3'] = times[3]
+                        fct['max3'] = times[-3]
+                    times = numpy.array(times)
+                    fct['mean'] = times.mean()
+                    fct['median'] = numpy.median(times)
+                    yield fct
+
+                inst.validate(results)
+                next(loop)  # pylint: disable=R1708