sdpython · sdpython · May 3, 2021 · Apr 20, 2021 · Apr 20, 2021 · Apr 21, 2021
diff --git a/_doc/sphinxdoc/source/api/testing.rst b/_doc/sphinxdoc/source/api/testing.rst
@@ -28,6 +28,8 @@ Einsum
 
 .. autosignature:: mlprodict.testing.experimental_c.custom_einsum_double
 
+.. autosignature:: mlprodict.testing.einsum_bench.einsum_benchmark
+
 .. autosignature:: mlprodict.testing.einsum_impl_ext.numpy_diagonal
 
 .. autosignature:: mlprodict.testing.einsum_impl_ext.numpy_extended_dot

diff --git a/_unittests/ut_cli/test_cli_einsum.py b/_unittests/ut_cli/test_cli_einsum.py
@@ -0,0 +1,77 @@
+"""
+@brief      test tree node (time=4s)
+"""
+import os
+import unittest
+from pyquickhelper.loghelper import BufferedPrint
+from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from mlprodict.__main__ import main
+
+
+class TestCliEinsum(ExtTestCase):
+
+    def test_cli_einsum(self):
+        st = BufferedPrint()
+        main(args=["einsum_test", "--help"], fLOG=st.fprint)
+        res = str(st)
+        self.assertIn("verbose", res)
+
+    def test_cli_excel(self):
+        temp = get_temp_folder(__file__, "temp_cli_excel")
+        name = os.path.join(temp, "res.xlsx")
+        st = BufferedPrint()
+        main(args=["einsum_test", "--equation", "abc,cd->ad",
+                   "--output", name, "--shape", "5",
+                   "--verbose", "0"], fLOG=st.fprint)
+        self.assertExists(name)
+        res = str(st)
+        self.assertIn("wrote", res)
+
+    def test_cli_csv(self):
+        temp = get_temp_folder(__file__, "temp_cli_csv")
+        name = os.path.join(temp, "res.csv")
+        st = BufferedPrint()
+        main(args=["einsum_test", "--equation", "abc,cd->ad",
+                   "--output", name, "--shape", "(5,5,5);(5,5)",
+                   "--verbose", "0"], fLOG=st.fprint)
+        self.assertExists(name)
+        res = str(st)
+        self.assertIn("wrote", res)
+
+    def test_cli_csv_n(self):
+        temp = get_temp_folder(__file__, "temp_cli_csvn")
+        name = os.path.join(temp, "res.csv")
+        st = BufferedPrint()
+        main(args=["einsum_test", "--equation", "abc,cd->ad",
+                   "--output", name, "--shape", "5,5",
+                   "--verbose", "0"], fLOG=st.fprint)
+        self.assertExists(name)
+        res = str(st)
+        self.assertIn("wrote", res)
+
+    def test_cli_csv_rt(self):
+        temp = get_temp_folder(__file__, "temp_cli_csv_rt")
+        name = os.path.join(temp, "res.csv")
+        st = BufferedPrint()
+        main(args=["einsum_test", "--equation", "abc,cd->ad",
+                   "--output", name, "--shape", "(5,5,5);(5,5)",
+                   "--verbose", "0", "--runtime", "onnxruntime"],
+             fLOG=st.fprint)
+        self.assertExists(name)
+        res = str(st)
+        self.assertIn("wrote", res)
+
+    def test_cli_csv_perm(self):
+        temp = get_temp_folder(__file__, "temp_cli_csv_perm")
+        name = os.path.join(temp, "res.csv")
+        st = BufferedPrint()
+        main(args=["einsum_test", "--equation", "abc,cd->ad",
+                   "--output", name, "--shape", "(5,5,5);(5,5)",
+                   "--verbose", "0", "--perm", "1"], fLOG=st.fprint)
+        self.assertExists(name)
+        res = str(st)
+        self.assertIn("wrote", res)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_testing/test_einsum.py b/_unittests/ut_testing/test_einsum.py
@@ -6,6 +6,7 @@
 from contextlib import redirect_stdout
 import itertools
 import numpy
+from onnx import numpy_helper
 from onnxruntime import (
     InferenceSession, GraphOptimizationLevel, SessionOptions)
 from pyquickhelper.pycode import ExtTestCase
@@ -628,7 +629,38 @@ def test_exc(self):
         r = repr(EinsumSubOp(2, 'transpose', 0, perm=(1, 0)))
         self.assertIn("EinsumSubOp('transpose', 0, perm=(1, 0))", r)
 
+    def test_bid_nd_bin(self):
+
+        def local_test(inp1, inp2):
+            exp = numpy.einsum('bid,nd->bin', inp1, inp2)
+            seq = decompose_einsum_equation(
+                'bid,nd->bin', clean=True, strategy='numpy')
+            got = apply_einsum_sequence(seq, inp1, inp2)
+            self.assertEqualArray(exp, got, decimal=3)
+
+            onx = seq.to_onnx('Y', 'X1', 'X2')
+            oinf = OnnxInference(onx)
+            got = oinf.run({'X1': inp1, 'X2': inp2})['Y']
+            self.assertEqualArray(exp, got, decimal=3)
+
+            onx = seq.to_onnx(
+                'Y', 'X1', 'X2',
+                initializer=[numpy_helper.from_array(inp2, name="X2")])
+            oinf = OnnxInference(onx)
+            got = oinf.run({'X1': inp1})['Y']
+            self.assertEqualArray(exp, got, decimal=3)
+
+        inp1 = numpy.arange(2 * 3 * 5).reshape((2, 3, 5)).astype(numpy.float32)
+        inp2 = numpy.arange(5 * 7).reshape((5, 7)).astype(numpy.float32)
+        local_test(inp1, inp2.T)
+
+        inp1 = numpy.random.uniform(size=[4, 5, 7]).astype(numpy.float32)
+        inp2 = numpy.random.uniform(size=[7, 8]).astype(numpy.float32)
+        local_test(inp1, inp2.T)
+
+        self.optimize_compare('bid,nd->bin')
+
 
 if __name__ == "__main__":
-    # TestEinsum().test_many_2()
+    # TestEinsum().test_bid_nd_bin()
     unittest.main()
diff --git a/_unittests/ut_testing/test_einsum_benchmark.py b/_unittests/ut_testing/test_einsum_benchmark.py
@@ -0,0 +1,37 @@
+"""
+@brief      test log(time=8s)
+"""
+import unittest
+from pyquickhelper.pycode import ExtTestCase
+from mlprodict.testing.einsum_bench import einsum_benchmark
+
+
+class TestEinsumBenchmark(ExtTestCase):
+
+    def test_benchmark1(self):
+        for rt in ['numpy', 'python', 'onnxruntime']:
+            with self.subTest(rt=rt):
+                res = list(einsum_benchmark(shape=5))
+                self.assertEqual(len(res), 2)
+
+    def test_benchmark2(self):
+        for rt in ['numpy', 'python', 'onnxruntime']:
+            with self.subTest(rt=rt):
+                res = list(einsum_benchmark(shape=[5, 6]))
+                self.assertEqual(len(res), 4)
+
+    def test_benchmark1_shape(self):
+        for rt in ['numpy', 'python', 'onnxruntime']:
+            with self.subTest(rt=rt):
+                res = list(einsum_benchmark(shape=[(5, 5, 5), (5, 5)]))
+                self.assertEqual(len(res), 2)
+
+    def test_benchmarkn(self):
+        for rt in ['numpy']:
+            with self.subTest(rt=rt):
+                res = list(einsum_benchmark(shape=5, perm=True))
+                self.assertEqual(len(res), 48)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mlprodict/__main__.py b/mlprodict/__main__.py
@@ -21,21 +21,24 @@ def main(args, fLOG=print):
         from .cli.asv_bench import asv_bench
         from .cli.asv2csv import asv2csv
         from .cli.replay import benchmark_replay
+        from .cli.einsum import einsum_test
     except ImportError:  # pragma: no cover
         from mlprodict.cli.validate import validate_runtime
         from mlprodict.cli.convert_validate import convert_validate
         from mlprodict.cli.optimize import onnx_optim, onnx_stats
         from mlprodict.cli.asv_bench import asv_bench
         from mlprodict.cli.asv2csv import asv2csv
         from mlprodict.cli.replay import benchmark_replay
+        from mlprodict.cli.einsum import einsum_test
 
     fcts = dict(validate_runtime=validate_runtime,
                 convert_validate=convert_validate,
                 onnx_optim=onnx_optim,
                 onnx_stats=onnx_stats,
                 asv_bench=asv_bench,
                 asv2csv=asv2csv,
-                benchmark_replay=benchmark_replay)
+                benchmark_replay=benchmark_replay,
+                einsum_test=einsum_test)
     try:
         from pyquickhelper.cli import cli_main_helper
     except ImportError:  # pragma: no cover

diff --git a/mlprodict/cli/__init__.py b/mlprodict/cli/__init__.py
@@ -3,5 +3,6 @@
 @brief Shortcut to *cli*.
 """
 from .convert_validate import convert_validate
+from .einsum import einsum_test
 from .optimize import onnx_optim
 from .validate import validate_runtime
diff --git a/mlprodict/cli/einsum.py b/mlprodict/cli/einsum.py
@@ -0,0 +1,76 @@
+"""
+@file
+@brief Command line to check einsum scenarios.
+"""
+import os
+
+
+def einsum_test(equation="abc,cd->abd", shape="30", perm=False,
+                runtime='python', verbose=1, fLOG=print,
+                output=None, number=5, repeat=5):
+    """
+    Investigates whether or not the decomposing einsum is faster.
+
+    :param equation: einsum equation to test
+    :param shape: an integer (all dimension gets the same size) or
+        a list of shapes in a string separated with `;`) or
+        a list of integer to try out multiple shapes,
+        example: `5`, `(5,5,5),(5,5)`, `5,6`
+    :param perm: check on permutation or all letter permutations
+    :param runtime: `'numpy'`, `'python'`, `'onnxruntime'`
+    :param verbose: verbose
+    :param fLOG: logging function
+    :param output: output file (usually a csv file or an excel file),
+        it requires pandas
+    :param number: usual parameter to measure a function
+    :param repeat: usual parameter to measure a function
+
+    .. cmdref::
+        :title: Investigates whether or not the decomposing einsum is faster.
+        :cmd: -m mlprodict einsum_test --help
+        :lid: l-cmd-einsum_test
+
+        The command checks whether or not decomposing an einsum function
+        is faster than einsum implementation.
+
+        Example::
+
+            python -m mlprodict einsum_test --equation="abc,cd->abd" --output=res.csv
+    """
+    from ..testing.einsum_bench import einsum_benchmark  # pylint: disable=E0402
+
+    perm = perm in ('True', '1', 1, True)
+    if "(" not in shape:
+        if "," in shape:
+            shape = list(map(int, shape.split(",")))
+        else:
+            shape = int(shape)
+    else:
+        shapes = shape.replace('(', '').replace(')', '').split(";")
+        shape = []
+        for sh in shapes:
+            spl = sh.split(',')
+            shape.append(tuple(map(int, spl)))
+    verbose = int(verbose)
+    number = int(number)
+    repeat = int(repeat)
+
+    res = einsum_benchmark(equation=equation, shape=shape, perm=perm,
+                           runtime=runtime, use_tqdm=verbose > 0,
+                           number=number, repeat=repeat)
+    if output not in ('', None):
+        import pandas
+        df = pandas.DataFrame(res)
+        ext = os.path.splitext(output)[-1]
+        if ext == '.csv':
+            df.to_csv(output, index=False)
+            fLOG('[einsum_test] wrote file %r.' % output)
+        elif ext == '.xlsx':
+            df.to_excel(output, index=False)
+            fLOG('[einsum_test] wrote file %r.' % output)
+        else:
+            raise ValueError(
+                "Unknown extension %r in file %r." % (ext, output))
+    else:
+        for r in res:
+            fLOG(r)
diff --git a/mlprodict/testing/bench_helper.py b/mlprodict/testing/bench_helper.py
@@ -0,0 +1,48 @@
+"""
+@file
+@brief Helpers for benchmarks.
+"""
+from timeit import Timer
+import numpy
+
+
+def measure_time(stmt, *x, repeat=5, number=5, div_by_number=True, first_run=True):
+    """
+    Measures a statement and returns the results as a dictionary.
+
+    :param stmt: string
+    :param *x: inputs
+    :param repeat: average over *repeat* experiment
+    :param number: number of executions in one row
+    :param div_by_number: divide by the number of executions
+    :param first_run: if True, runs the function once before measuring
+    :return: dictionary
+
+    See `Timer.repeat
+    <https://docs.python.org/3/library/timeit.html?timeit.Timer.repeat>`_
+    for a better understanding of parameter *repeat* and *number*.
+    The function returns a duration corresponding to
+    *number* times the execution of the main statement.
+    """
+    try:
+        stmt(*x)
+    except RuntimeError as e:  # pragma: no cover
+        raise RuntimeError("{}-{}".format(type(x), x.dtype)) from e
+
+    def fct():
+        stmt(*x)
+
+    if first_run:
+        fct()
+    tim = Timer(fct)
+    res = numpy.array(tim.repeat(repeat=repeat, number=number))
+    total = numpy.sum(res)
+    if div_by_number:
+        res /= number
+    mean = numpy.mean(res)
+    dev = numpy.mean(res ** 2)
+    dev = max(0, (dev - mean**2)) ** 0.5
+    mes = dict(average=mean, deviation=dev, min_exec=numpy.min(res),
+               max_exec=numpy.max(res), repeat=repeat, number=number,
+               total=total)
+    return mes