Skip to content
This repository was archived by the owner on Jan 13, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
13817c9
Einsum decomposition
sdpython Apr 20, 2021
da470a1
Update test_einsum.py
sdpython Apr 20, 2021
9f760b9
fix a couple of issues
sdpython Apr 21, 2021
c2e0f47
improve dot graph
sdpython Apr 22, 2021
6758a9a
documentation
sdpython Apr 22, 2021
14eba46
remove optimize=True in some cases
sdpython Apr 22, 2021
7943226
Add diagonal
sdpython Apr 23, 2021
8d9ded8
refactoring, fix duplicates indices
sdpython Apr 23, 2021
9290046
refactoring
sdpython Apr 23, 2021
9a8d385
fix python implemented, one broadcast case is still missing
sdpython Apr 24, 2021
f298ac6
fix python version of matmul
sdpython Apr 24, 2021
60650d9
update unit tests
sdpython Apr 25, 2021
84b3618
decompose matmul into smaller pieces
sdpython Apr 26, 2021
8dd3a65
update, still fixes to do
sdpython Apr 27, 2021
32e356d
Fix matrix multiplication
sdpython Apr 28, 2021
2035bae
fix decomposition of einsum
sdpython Apr 28, 2021
1b660dd
documentation
sdpython Apr 29, 2021
f0efa47
remove useless transpose
sdpython Apr 29, 2021
222bad2
merge expand_dims node into a single one
sdpython Apr 29, 2021
801366d
simplifies output graph
sdpython Apr 29, 2021
218f031
add onnx export to einsum
sdpython Apr 30, 2021
50747a3
fix issue with sparse
sdpython Apr 30, 2021
41f6273
fix matmul computation
sdpython Apr 30, 2021
9f3885c
fix issue with latest version of scikit-learn
sdpython Apr 30, 2021
662727f
finalize conversion to onnx
sdpython Apr 30, 2021
91ec893
Update test_einsum.py
sdpython Apr 30, 2021
b28dd2d
remove duplicate transpose
sdpython May 1, 2021
93aca50
code coverage
sdpython May 1, 2021
8fe01ff
Merge branch 'master' of https://github.com/sdpython/mlprodict into e…
sdpython May 2, 2021
e864e7f
Merge branch 'master' of https://github.com/sdpython/mlprodict into e…
sdpython May 2, 2021
8050dc5
Add function to benchmark einsum decomposition
sdpython May 2, 2021
67910d5
fix ir_version
sdpython May 3, 2021
b2f5a71
support initializers
sdpython May 3, 2021
7f0c756
lint
sdpython May 3, 2021
516ccee
documentation
sdpython May 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions _doc/sphinxdoc/source/api/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Einsum

.. autosignature:: mlprodict.testing.experimental_c.custom_einsum_double

.. autosignature:: mlprodict.testing.einsum_bench.einsum_benchmark

.. autosignature:: mlprodict.testing.einsum_impl_ext.numpy_diagonal

.. autosignature:: mlprodict.testing.einsum_impl_ext.numpy_extended_dot
Expand Down
77 changes: 77 additions & 0 deletions _unittests/ut_cli/test_cli_einsum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
@brief test tree node (time=4s)
"""
import os
import unittest
from pyquickhelper.loghelper import BufferedPrint
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
from mlprodict.__main__ import main


class TestCliEinsum(ExtTestCase):

def test_cli_einsum(self):
st = BufferedPrint()
main(args=["einsum_test", "--help"], fLOG=st.fprint)
res = str(st)
self.assertIn("verbose", res)

def test_cli_excel(self):
temp = get_temp_folder(__file__, "temp_cli_excel")
name = os.path.join(temp, "res.xlsx")
st = BufferedPrint()
main(args=["einsum_test", "--equation", "abc,cd->ad",
"--output", name, "--shape", "5",
"--verbose", "0"], fLOG=st.fprint)
self.assertExists(name)
res = str(st)
self.assertIn("wrote", res)

def test_cli_csv(self):
temp = get_temp_folder(__file__, "temp_cli_csv")
name = os.path.join(temp, "res.csv")
st = BufferedPrint()
main(args=["einsum_test", "--equation", "abc,cd->ad",
"--output", name, "--shape", "(5,5,5);(5,5)",
"--verbose", "0"], fLOG=st.fprint)
self.assertExists(name)
res = str(st)
self.assertIn("wrote", res)

def test_cli_csv_n(self):
temp = get_temp_folder(__file__, "temp_cli_csvn")
name = os.path.join(temp, "res.csv")
st = BufferedPrint()
main(args=["einsum_test", "--equation", "abc,cd->ad",
"--output", name, "--shape", "5,5",
"--verbose", "0"], fLOG=st.fprint)
self.assertExists(name)
res = str(st)
self.assertIn("wrote", res)

def test_cli_csv_rt(self):
temp = get_temp_folder(__file__, "temp_cli_csv_rt")
name = os.path.join(temp, "res.csv")
st = BufferedPrint()
main(args=["einsum_test", "--equation", "abc,cd->ad",
"--output", name, "--shape", "(5,5,5);(5,5)",
"--verbose", "0", "--runtime", "onnxruntime"],
fLOG=st.fprint)
self.assertExists(name)
res = str(st)
self.assertIn("wrote", res)

def test_cli_csv_perm(self):
temp = get_temp_folder(__file__, "temp_cli_csv_perm")
name = os.path.join(temp, "res.csv")
st = BufferedPrint()
main(args=["einsum_test", "--equation", "abc,cd->ad",
"--output", name, "--shape", "(5,5,5);(5,5)",
"--verbose", "0", "--perm", "1"], fLOG=st.fprint)
self.assertExists(name)
res = str(st)
self.assertIn("wrote", res)


if __name__ == "__main__":
unittest.main()
34 changes: 33 additions & 1 deletion _unittests/ut_testing/test_einsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from contextlib import redirect_stdout
import itertools
import numpy
from onnx import numpy_helper
from onnxruntime import (
InferenceSession, GraphOptimizationLevel, SessionOptions)
from pyquickhelper.pycode import ExtTestCase
Expand Down Expand Up @@ -628,7 +629,38 @@ def test_exc(self):
r = repr(EinsumSubOp(2, 'transpose', 0, perm=(1, 0)))
self.assertIn("EinsumSubOp('transpose', 0, perm=(1, 0))", r)

def test_bid_nd_bin(self):

def local_test(inp1, inp2):
exp = numpy.einsum('bid,nd->bin', inp1, inp2)
seq = decompose_einsum_equation(
'bid,nd->bin', clean=True, strategy='numpy')
got = apply_einsum_sequence(seq, inp1, inp2)
self.assertEqualArray(exp, got, decimal=3)

onx = seq.to_onnx('Y', 'X1', 'X2')
oinf = OnnxInference(onx)
got = oinf.run({'X1': inp1, 'X2': inp2})['Y']
self.assertEqualArray(exp, got, decimal=3)

onx = seq.to_onnx(
'Y', 'X1', 'X2',
initializer=[numpy_helper.from_array(inp2, name="X2")])
oinf = OnnxInference(onx)
got = oinf.run({'X1': inp1})['Y']
self.assertEqualArray(exp, got, decimal=3)

inp1 = numpy.arange(2 * 3 * 5).reshape((2, 3, 5)).astype(numpy.float32)
inp2 = numpy.arange(5 * 7).reshape((5, 7)).astype(numpy.float32)
local_test(inp1, inp2.T)

inp1 = numpy.random.uniform(size=[4, 5, 7]).astype(numpy.float32)
inp2 = numpy.random.uniform(size=[7, 8]).astype(numpy.float32)
local_test(inp1, inp2.T)

self.optimize_compare('bid,nd->bin')


if __name__ == "__main__":
# TestEinsum().test_many_2()
# TestEinsum().test_bid_nd_bin()
unittest.main()
37 changes: 37 additions & 0 deletions _unittests/ut_testing/test_einsum_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
@brief test log(time=8s)
"""
import unittest
from pyquickhelper.pycode import ExtTestCase
from mlprodict.testing.einsum_bench import einsum_benchmark


class TestEinsumBenchmark(ExtTestCase):

def test_benchmark1(self):
for rt in ['numpy', 'python', 'onnxruntime']:
with self.subTest(rt=rt):
res = list(einsum_benchmark(shape=5))
self.assertEqual(len(res), 2)

def test_benchmark2(self):
for rt in ['numpy', 'python', 'onnxruntime']:
with self.subTest(rt=rt):
res = list(einsum_benchmark(shape=[5, 6]))
self.assertEqual(len(res), 4)

def test_benchmark1_shape(self):
for rt in ['numpy', 'python', 'onnxruntime']:
with self.subTest(rt=rt):
res = list(einsum_benchmark(shape=[(5, 5, 5), (5, 5)]))
self.assertEqual(len(res), 2)

def test_benchmarkn(self):
for rt in ['numpy']:
with self.subTest(rt=rt):
res = list(einsum_benchmark(shape=5, perm=True))
self.assertEqual(len(res), 48)


if __name__ == "__main__":
unittest.main()
5 changes: 4 additions & 1 deletion mlprodict/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,24 @@ def main(args, fLOG=print):
from .cli.asv_bench import asv_bench
from .cli.asv2csv import asv2csv
from .cli.replay import benchmark_replay
from .cli.einsum import einsum_test
except ImportError: # pragma: no cover
from mlprodict.cli.validate import validate_runtime
from mlprodict.cli.convert_validate import convert_validate
from mlprodict.cli.optimize import onnx_optim, onnx_stats
from mlprodict.cli.asv_bench import asv_bench
from mlprodict.cli.asv2csv import asv2csv
from mlprodict.cli.replay import benchmark_replay
from mlprodict.cli.einsum import einsum_test

fcts = dict(validate_runtime=validate_runtime,
convert_validate=convert_validate,
onnx_optim=onnx_optim,
onnx_stats=onnx_stats,
asv_bench=asv_bench,
asv2csv=asv2csv,
benchmark_replay=benchmark_replay)
benchmark_replay=benchmark_replay,
einsum_test=einsum_test)
try:
from pyquickhelper.cli import cli_main_helper
except ImportError: # pragma: no cover
Expand Down
1 change: 1 addition & 0 deletions mlprodict/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
@brief Shortcut to *cli*.
"""
from .convert_validate import convert_validate
from .einsum import einsum_test
from .optimize import onnx_optim
from .validate import validate_runtime
76 changes: 76 additions & 0 deletions mlprodict/cli/einsum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
@file
@brief Command line to check einsum scenarios.
"""
import os


def einsum_test(equation="abc,cd->abd", shape="30", perm=False,
runtime='python', verbose=1, fLOG=print,
output=None, number=5, repeat=5):
"""
Investigates whether or not the decomposing einsum is faster.

:param equation: einsum equation to test
:param shape: an integer (all dimension gets the same size) or
a list of shapes in a string separated with `;`) or
a list of integer to try out multiple shapes,
example: `5`, `(5,5,5),(5,5)`, `5,6`
:param perm: check on permutation or all letter permutations
:param runtime: `'numpy'`, `'python'`, `'onnxruntime'`
:param verbose: verbose
:param fLOG: logging function
:param output: output file (usually a csv file or an excel file),
it requires pandas
:param number: usual parameter to measure a function
:param repeat: usual parameter to measure a function

.. cmdref::
:title: Investigates whether or not the decomposing einsum is faster.
:cmd: -m mlprodict einsum_test --help
:lid: l-cmd-einsum_test

The command checks whether or not decomposing an einsum function
is faster than einsum implementation.

Example::

python -m mlprodict einsum_test --equation="abc,cd->abd" --output=res.csv
"""
from ..testing.einsum_bench import einsum_benchmark # pylint: disable=E0402

perm = perm in ('True', '1', 1, True)
if "(" not in shape:
if "," in shape:
shape = list(map(int, shape.split(",")))
else:
shape = int(shape)
else:
shapes = shape.replace('(', '').replace(')', '').split(";")
shape = []
for sh in shapes:
spl = sh.split(',')
shape.append(tuple(map(int, spl)))
verbose = int(verbose)
number = int(number)
repeat = int(repeat)

res = einsum_benchmark(equation=equation, shape=shape, perm=perm,
runtime=runtime, use_tqdm=verbose > 0,
number=number, repeat=repeat)
if output not in ('', None):
import pandas
df = pandas.DataFrame(res)
ext = os.path.splitext(output)[-1]
if ext == '.csv':
df.to_csv(output, index=False)
fLOG('[einsum_test] wrote file %r.' % output)
elif ext == '.xlsx':
df.to_excel(output, index=False)
fLOG('[einsum_test] wrote file %r.' % output)
else:
raise ValueError(
"Unknown extension %r in file %r." % (ext, output))
else:
for r in res:
fLOG(r)
48 changes: 48 additions & 0 deletions mlprodict/testing/bench_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
@file
@brief Helpers for benchmarks.
"""
from timeit import Timer
import numpy


def measure_time(stmt, *x, repeat=5, number=5, div_by_number=True, first_run=True):
"""
Measures a statement and returns the results as a dictionary.

:param stmt: string
:param *x: inputs
:param repeat: average over *repeat* experiment
:param number: number of executions in one row
:param div_by_number: divide by the number of executions
:param first_run: if True, runs the function once before measuring
:return: dictionary

See `Timer.repeat
<https://docs.python.org/3/library/timeit.html?timeit.Timer.repeat>`_
for a better understanding of parameter *repeat* and *number*.
The function returns a duration corresponding to
*number* times the execution of the main statement.
"""
try:
stmt(*x)
except RuntimeError as e: # pragma: no cover
raise RuntimeError("{}-{}".format(type(x), x.dtype)) from e

def fct():
stmt(*x)

if first_run:
fct()
tim = Timer(fct)
res = numpy.array(tim.repeat(repeat=repeat, number=number))
total = numpy.sum(res)
if div_by_number:
res /= number
mean = numpy.mean(res)
dev = numpy.mean(res ** 2)
dev = max(0, (dev - mean**2)) ** 0.5
mes = dict(average=mean, deviation=dev, min_exec=numpy.min(res),
max_exec=numpy.max(res), repeat=repeat, number=number,
total=total)
return mes
Loading