Implements numpy functions with onnx (#214)

* Implements numpy functions with onnx * finalize OnnxNumpyCompiler * First sketch of easy function onnx numpy * add operator +, /, *, - * support constants * simplify when variable are reused * add function transformer * Fix issue with RNN opset 14 * Update requirements.txt
sdpython · Feb 19, 2021 · 32dc659 · 32dc659
1 parent 991f12c
commit 32dc659
Show file tree

Hide file tree

Showing 36 changed files with 1,259 additions and 72 deletions.
diff --git a/_doc/sphinxdoc/source/api/index.rst b/_doc/sphinxdoc/source/api/index.rst
@@ -11,6 +11,7 @@ API
     onnxrt_ops
     onnx_conv
     sklapi
+    npy
     asv
     validation
     testing

diff --git a/_doc/sphinxdoc/source/api/npy.rst b/_doc/sphinxdoc/source/api/npy.rst
@@ -0,0 +1,76 @@
+
+.. _l-numpy-onnxpy:
+
+Numpy revisited with ONNX
+=========================
+
+Converting custom code into :epkg:`ONNX` is not necessarily easy.
+One big obstacle is :epkg:`ONNX` does not represent all numpy functions
+with a single operator. One possible option is to provide a
+:epkg:`numpy` API to :epkg:`ONNX`. That's the purpose of wrapper
+:class:`onnxnumpy <mlprodict.npy.onnx_numpy_wrapper.onnxnumpy>`.
+It takes a function written with functions following the same
+signature as :epkg:`numpy` and provides a way to execute them
+with an :epkg:`ONNX` runtime. In the below example,
+`custom_fct` creates an :epkg:`ONNX` graph, the wrapper
+loads it in a runtime and runs it everytime the function
+is called.
+
+.. runpython::
+    :showcode:
+
+    import numpy
+    from typing import Any
+    from mlprodict.npy import onnxnumpy_default, NDArray
+    import mlprodict.npy.numpy_impl as nxnp
+
+    @onnxnumpy_default
+    def custom_fct(x: NDArray[Any, numpy.float32],
+                   ) -> NDArray[Any, numpy.float32]:
+        "onnx numpy abs"
+        return nxnp.abs(x) + numpy.float32(1)
+
+    x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32)
+    y = custom_fct(x)
+    print(y)
+
+Annotations are mandatory to indicate inputs and outputs type.
+As a result, the returned function is strict about types
+as opposed to numpy. This approach is similar to what
+:epkg:`tensorflow` with `autograph
+<https://www.tensorflow.org/api_docs/python/tf/autograph>`_.
+
+.. contents::
+    :local:
+
+NDArray
++++++++
+
+.. autosignature:: mlprodict.npy.onnx_numpy_compiler.NDArray
+    :members:
+
+onnxnumpy
++++++++++
+
+.. autosignature:: mlprodict.npy.onnx_numpy_wrapper.onnxnumpy
+
+.. autosignature:: mlprodict.npy.onnx_numpy_wrapper.onnxnumpy_default
+
+OnnxNumpyCompiler
++++++++++++++++++
+
+.. autosignature:: mlprodict.npy.onnx_numpy_compiler.OnnxNumpyCompiler
+    :members:
+
+OnnxVar
++++++++
+
+.. autosignature:: mlprodict.npy.onnx_variable.OnnxVar
+    :members:
+
+Available numpy functions
++++++++++++++++++++++++++
+
+.. autosignature:: mlprodict.npy.numpy_impl.abs
+
+.. autosignature:: mlprodict.npy.numpy_impl.sum
diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py
@@ -43,8 +43,8 @@
 ])
 
 html_context = {
-    'css_files': get_default_stylesheet() + [
-        '_static/my-styles.css', '_static/gallery.css'],
+    'css_files': get_default_stylesheet([
+        '_static/my-styles.css', '_static/gallery.css']),
 }
 
 html_logo = "phdoc_static/project_ico.png"

diff --git a/_unittests/ut__skl2onnx/test_sklearn_adaboost_converter.py b/_unittests/ut__skl2onnx/test_sklearn_adaboost_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=4s)
+@brief      test tree node (time=90s)
 """
 import unittest
 from onnx.defs import onnx_opset_version

diff --git a/_unittests/ut__skl2onnx/test_sklearn_cast_transformer.py b/_unittests/ut__skl2onnx/test_sklearn_cast_transformer.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=15s)
 """
 import unittest
 import math

diff --git a/_unittests/ut__skl2onnx/test_sklearn_gaussian_mixture_converter.py b/_unittests/ut__skl2onnx/test_sklearn_gaussian_mixture_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=30s)
 """
 import unittest
 import numpy as np

diff --git a/_unittests/ut__skl2onnx/test_sklearn_gaussian_process.py b/_unittests/ut__skl2onnx/test_sklearn_gaussian_process.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=20s)
 """
 import unittest
 import inspect
@@ -115,6 +115,7 @@ def check_outputs(self, model, model_onnx, Xtest,
             assert_almost_equal(np.squeeze(exp),
                                 np.squeeze(got), decimal=decimal)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_unfitted(self):
 
         se = (C(1.0, (1e-3, 1e3)) *
@@ -181,6 +182,7 @@ def test_gpr_rbf_fitted_true(self):
                             basename="SklearnGaussianProcessRBFT",
                             check_error="misses a kernel")
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_false(self):
 
         gp = GaussianProcessRegressor(
@@ -195,6 +197,7 @@ def test_gpr_rbf_fitted_false(self):
                             verbose=False,
                             basename="SklearnGaussianProcessRBF-Dec4")
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_true(self):
         gp = GaussianProcessRegressor(
             alpha=1e-7, n_restarts_optimizer=15, normalize_y=True)
@@ -223,6 +226,7 @@ def test_gpr_rbf_fitted_return_std_true(self):
                             check_error="misses a kernel",
                             disable_optimisation=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):
 
         gp = GaussianProcessRegressor(
@@ -246,6 +250,7 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):
                            predict_attributes=options[GaussianProcessRegressor],
                            decimal=4, disable_optimisation=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):
 
         gp = GaussianProcessRegressor(
@@ -269,6 +274,7 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):
             GaussianProcessRegressor],
             decimal=4)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_exp_sine_squared_double_true(self):
 
         gp = GaussianProcessRegressor(
@@ -292,6 +298,7 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_double_true(self):
             predict_attributes=options[GaussianProcessRegressor],
             decimal=4, disable_optimisation=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_dot_product_true(self):
 
         gp = GaussianProcessRegressor(
@@ -315,6 +322,7 @@ def test_gpr_rbf_fitted_return_std_dot_product_true(self):
             predict_attributes=options[GaussianProcessRegressor],
             decimal=3, disable_optimisation=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self):
 
         gp = GaussianProcessRegressor(
@@ -338,6 +346,7 @@ def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self):
             predict_attributes=options[GaussianProcessRegressor],
             disable_optimisation=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_fitted_shapes(self):
         data = load_iris()
         X = data.data.astype(np.float32)
@@ -352,6 +361,7 @@ def test_gpr_fitted_shapes(self):
         self.assertTrue(model_onnx is not None)
         self.check_outputs(gp, model_onnx, X_test, {}, skip_if_float32=True)
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_fitted_partial_float64(self):
         data = load_iris()
         X = data.data
@@ -373,6 +383,7 @@ def test_gpr_fitted_partial_float64(self):
         self.assertTrue(model_onnx is not None)
         self.check_outputs(gp, model_onnx, X_test, {})
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_fitted_partial_float64_operator_cdist_rbf(self):
         data = load_iris()
         X = data.data
@@ -449,6 +460,7 @@ def test_gpr_fitted_partial_float64_operator_cdist_sine(self):
         self.assertTrue(model_onnx is not None)
         self.check_outputs(gp, model_onnx, X_test, {})
 
+    @ignore_warnings(ConvergenceWarning)
     def test_gpr_fitted_partial_float64_operator_cdist_quad(self):
         data = load_iris()
         X = data.data

diff --git a/_unittests/ut__skl2onnx/test_sklearn_glm_regressor_converter.py b/_unittests/ut__skl2onnx/test_sklearn_glm_regressor_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test log(time=10s)
+@brief      test log(time=9s)
 """
 
 import unittest

diff --git a/_unittests/ut__skl2onnx/test_sklearn_isolation_forest.py b/_unittests/ut__skl2onnx/test_sklearn_isolation_forest.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=3s)
+@brief      test tree node (time=14s)
 """
 import unittest
 import numpy

diff --git a/_unittests/ut__skl2onnx/test_sklearn_k_means_converter.py b/_unittests/ut__skl2onnx/test_sklearn_k_means_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=8s)
 """
 import unittest
 import numpy

diff --git a/_unittests/ut__skl2onnx/test_sklearn_label_encoder_converter.py b/_unittests/ut__skl2onnx/test_sklearn_label_encoder_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=5s)
 """
 import unittest
 import numpy

diff --git a/_unittests/ut__skl2onnx/test_sklearn_naive_bayes_converter.py b/_unittests/ut__skl2onnx/test_sklearn_naive_bayes_converter.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=12s)
 """
 import unittest
 import numpy

diff --git a/_unittests/ut__skl2onnx/test_sklearn_pipeline.py b/_unittests/ut__skl2onnx/test_sklearn_pipeline.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=10s)
 """
 import unittest
 import warnings

diff --git a/_unittests/ut__skl2onnx/test_sklearn_stacking.py b/_unittests/ut__skl2onnx/test_sklearn_stacking.py
@@ -1,5 +1,5 @@
 """
-@brief      test tree node (time=2s)
+@brief      test tree node (time=20s)
 """
 import unittest
 from sklearn.linear_model import LinearRegression, LogisticRegression

diff --git a/_unittests/ut_npy/test_function_transformer.py b/_unittests/ut_npy/test_function_transformer.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=3s)
+"""
+import unittest
+import warnings
+from logging import getLogger
+from typing import Any
+import numpy
+from sklearn.preprocessing import FunctionTransformer
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlprodict.onnx_conv import register_rewritten_operators, to_onnx
+from mlprodict.onnxrt import OnnxInference
+from mlprodict.npy import onnxnumpy_default
+import mlprodict.npy.numpy_impl as nxnp
+from mlprodict.npy import NDArray
+
+
+@onnxnumpy_default
+def custom_fct(x: NDArray[Any, numpy.float32],
+               ) -> NDArray[Any, numpy.float32]:
+    "onnx custom function"
+    return (nxnp.abs(x) + x) / numpy.float32(2)
+
+
+class TestOnnxFunctionTransformer(ExtTestCase):
+
+    def setUp(self):
+        logger = getLogger('skl2onnx')
+        logger.disabled = True
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", ResourceWarning)
+            res = register_rewritten_operators()
+        self.assertGreater(len(res), 2)
+        self.assertIn('SklearnFunctionTransformer', res[0])
+        self.assertIn('SklearnFunctionTransformer', res[1])
+
+    @ignore_warnings(DeprecationWarning)
+    def test_function_transformer(self):
+        x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32)
+        tr = FunctionTransformer(custom_fct)
+        tr.fit(x)
+        y_exp = tr.transform(x)
+        self.assertEqualArray(
+            numpy.array([[6.1, 0.], [3.5, 0.]], dtype=numpy.float32),
+            y_exp)
+
+        onnx_model = to_onnx(tr, x)
+        oinf = OnnxInference(onnx_model)
+        y_onx = oinf.run({'X': x})
+        self.assertEqualArray(y_exp, y_onx['variable'])
+
+
+if __name__ == "__main__":
+    unittest.main()