From ef25574926f8a3cd228650870ee2cfe8bbbff003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 24 Feb 2021 13:02:23 +0100 Subject: [PATCH 1/4] tutorial on numpy API for ONNX --- _doc/sphinxdoc/source/conf.py | 3 + _doc/sphinxdoc/source/index.rst | 2 + _doc/sphinxdoc/source/onnx.rst | 1 + _doc/sphinxdoc/source/onnx_bench.rst | 2 + _doc/sphinxdoc/source/onnx_conv.rst | 1 + _doc/sphinxdoc/source/onnx_runtime.rst | 9 +- _doc/sphinxdoc/source/tutorial/index.rst | 1 + _doc/sphinxdoc/source/tutorial/onnx.rst | 125 +----------- _doc/sphinxdoc/source/tutorial/onnx_numpy.rst | 191 ++++++++++++++++++ mlprodict/asv_benchmark/create_asv.py | 1 + mlprodict/grammar_sklearn/g_sklearn_main.py | 2 + mlprodict/onnx_conv/convert.py | 1 + .../operator_converters/conv_lightgbm.py | 1 + mlprodict/onnx_conv/validate_scenarios.py | 2 + .../onnx_grammar/node_visitor_translator.py | 1 + mlprodict/onnx_grammar/onnx_translation.py | 3 + mlprodict/onnxrt/doc/doc_helper.py | 2 + mlprodict/onnxrt/onnx2py_helper.py | 2 + mlprodict/onnxrt/onnx_inference.py | 3 + mlprodict/onnxrt/onnx_inference_exports.py | 3 + mlprodict/onnxrt/optim/onnx_helper.py | 1 + mlprodict/onnxrt/optim/sklearn_helper.py | 4 + mlprodict/onnxrt/shape_object.py | 1 + mlprodict/onnxrt/validate/data/__init__.py | 1 + mlprodict/onnxrt/validate/validate.py | 2 + .../onnxrt/validate/validate_benchmark.py | 1 + mlprodict/onnxrt/validate/validate_helper.py | 3 + .../onnxrt/validate/validate_problems.py | 1 + .../onnxrt/validate/validate_scenarios.py | 1 + mlprodict/tools/asv_options_helper.py | 4 + mlprodict/tools/filename_helper.py | 1 + mlprodict/tools/model_info.py | 1 + mlprodict/tools/speed_measure.py | 1 + mlprodict/tools/zoo.py | 1 + 34 files changed, 257 insertions(+), 122 deletions(-) create mode 100644 _doc/sphinxdoc/source/tutorial/onnx_numpy.rst diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py index 51db3c7ca..9f47887f6 100644 --- a/_doc/sphinxdoc/source/conf.py +++ b/_doc/sphinxdoc/source/conf.py @@ -58,6 +58,7 @@ epkg_dictionary.update({ '_PredictScorer': 'https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/scorer.py#L168', 'airspeed-velocity': 'https://github.com/airspeed-velocity/asv', + 'ast': 'https://docs.python.org/3/library/ast.html', 'asv': 'https://github.com/airspeed-velocity/asv', 'bench1': 'http://www.xavierdupre.fr/app/mlprodict_bench/helpsphinx/index.html', 'bench2': 'http://www.xavierdupre.fr/app/mlprodict_bench2/helpsphinx/index.html', @@ -73,6 +74,7 @@ 'DOT': 'https://en.wikipedia.org/wiki/DOT_(graph_description_language)', 'einsum': 'https://numpy.org/doc/stable/reference/generated/numpy.einsum.html', 'exec': 'https://docs.python.org/3/library/functions.html#exec', + 'FunctionTransformer': 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html', 'GaussianProcessRegressor': 'https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html', 'Iris': 'https://en.wikipedia.org/wiki/Iris_flower_data_set', 'IR_VERSION': 'https://github.com/onnx/onnx/blob/master/docs/IR.md#onnx-versioning', @@ -105,6 +107,7 @@ 'run_asv.sh': 'https://github.com/sdpython/mlprodict/blob/master/bin/run_asv.sh', 'Rust': 'https://www.rust-lang.org/', 'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx', + 'sklearn-onnx tutorial': 'http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/tutorial.html', 'tensorflow': 'https://www.tensorflow.org/', 'Tokenizer': 'https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.Tokenizer', 'tqdm': 'https://github.com/tqdm/tqdm', diff --git a/_doc/sphinxdoc/source/index.rst b/_doc/sphinxdoc/source/index.rst index c1aa4aa6f..a2170797c 100644 --- a/_doc/sphinxdoc/source/index.rst +++ b/_doc/sphinxdoc/source/index.rst @@ -95,6 +95,7 @@ does not have any dependency on :epkg:`scikit-learn`. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.linear_model import LinearRegression @@ -157,6 +158,7 @@ predictions. .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris diff --git a/_doc/sphinxdoc/source/onnx.rst b/_doc/sphinxdoc/source/onnx.rst index 3670f8512..deb9bc35e 100644 --- a/_doc/sphinxdoc/source/onnx.rst +++ b/_doc/sphinxdoc/source/onnx.rst @@ -31,6 +31,7 @@ of modules below: .. runpython:: :showcode: + :warningout: DeprecationWarning :rst: from mlprodict.onnxrt.validate.validate_helper import modules_list diff --git a/_doc/sphinxdoc/source/onnx_bench.rst b/_doc/sphinxdoc/source/onnx_bench.rst index 3936f63fe..0ecb92bb5 100644 --- a/_doc/sphinxdoc/source/onnx_bench.rst +++ b/_doc/sphinxdoc/source/onnx_bench.rst @@ -46,6 +46,7 @@ of modules below: .. runpython:: :showcode: + :warningout: DeprecationWarning :rst: from mlprodict.onnxrt.validate.validate_helper import modules_list @@ -80,6 +81,7 @@ describes the list of considered problems. .. runpython:: :showcode: + :warningout: DeprecationWarning :rst: from mlprodict.onnxrt.validate.validate import sklearn_operators, find_suitable_problem diff --git a/_doc/sphinxdoc/source/onnx_conv.rst b/_doc/sphinxdoc/source/onnx_conv.rst index fe656cc14..4d963a112 100644 --- a/_doc/sphinxdoc/source/onnx_conv.rst +++ b/_doc/sphinxdoc/source/onnx_conv.rst @@ -13,6 +13,7 @@ can be fully converted. .. runpython:: :rst: + :warningout: DeprecationWarning :showcode: from mlprodict.onnx_conv.register import register_converters diff --git a/_doc/sphinxdoc/source/onnx_runtime.rst b/_doc/sphinxdoc/source/onnx_runtime.rst index 208b92044..027f5d6db 100644 --- a/_doc/sphinxdoc/source/onnx_runtime.rst +++ b/_doc/sphinxdoc/source/onnx_runtime.rst @@ -21,6 +21,7 @@ Main class is :class:`OnnxInference .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.linear_model import LinearRegression @@ -94,7 +95,7 @@ the cause of the error if it does not work. .. runpython:: :showcode: :rst: - :warningout: PendingDeprecationWarning UserWarning RuntimeWarning FutureWarning + :warningout: DeprecationWarning PendingDeprecationWarning UserWarning RuntimeWarning FutureWarning from logging import getLogger from pyquickhelper.loghelper import noLOG @@ -142,7 +143,7 @@ intermediate node anymore. .. runpython:: :showcode: - :warningout: FutureWarning + :warningout: FutureWarning DeprecationWarning import numpy from sklearn.ensemble import AdaBoostRegressor @@ -171,7 +172,7 @@ We create a table similar to :ref:`l-onnx-pyrun-tbl`. .. runpython:: :showcode: :rst: - :warningout: PendingDeprecationWarning UserWarning RuntimeWarning + :warningout: DeprecationWarning PendingDeprecationWarning UserWarning RuntimeWarning from logging import getLogger from pyquickhelper.loghelper import noLOG @@ -240,7 +241,7 @@ object to :epkg:`ONNX`. We create a table similar to .. runpython:: :showcode: :rst: - :warningout: PendingDeprecationWarning UserWarning RuntimeWarning + :warningout: DeprecationWarning PendingDeprecationWarning UserWarning RuntimeWarning from logging import getLogger from pyquickhelper.loghelper import noLOG diff --git a/_doc/sphinxdoc/source/tutorial/index.rst b/_doc/sphinxdoc/source/tutorial/index.rst index bfa716143..8bfe9b88a 100644 --- a/_doc/sphinxdoc/source/tutorial/index.rst +++ b/_doc/sphinxdoc/source/tutorial/index.rst @@ -9,5 +9,6 @@ one piece this module can do. More should follow. :maxdepth: 1 onnx + onnx_numpy optim benchmark diff --git a/_doc/sphinxdoc/source/tutorial/onnx.rst b/_doc/sphinxdoc/source/tutorial/onnx.rst index f11da193c..5eeaea3ee 100644 --- a/_doc/sphinxdoc/source/tutorial/onnx.rst +++ b/_doc/sphinxdoc/source/tutorial/onnx.rst @@ -22,6 +22,7 @@ to convert many :epkg:`scikit-learn` models. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -48,6 +49,7 @@ begin. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -74,6 +76,7 @@ The verbosity can be increased. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -103,6 +106,7 @@ can also call :epkg:`onnxruntime` to compute the predictions by using .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -129,6 +133,7 @@ to look into intermediate results. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -157,6 +162,7 @@ As a consequence, interdiate results cannot be seen anymore. .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -190,6 +196,7 @@ It works exactly the same: .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.datasets import load_iris @@ -209,121 +216,3 @@ It works exactly the same: oinf = OnnxInference(model_def, runtime='python') print(oinf.run({'X': X_test[:5]})) - -From ONNX to Python -+++++++++++++++++++ - -The Python Runtime can be optimized by generating -custom python code and dynamically compile it. -:class:`OnnxInference ` -computes predictions based on an ONNX graph with a -python runtime or :epkg:`onnxruntime`. -Method :meth:`to_python -` -goes further by converting the ONNX graph into a standalone -python code. - -.. _l-numpy2onnx-tutorial: - -From numpy to ONNX -++++++++++++++++++ - -.. index:: algebric function - -*mlprodict* implements function -:func:`translate_fct2onnx -` -which converts the code -of a function written with :epkg:`numpy` and :epkg:`scipy` -into an :epkg:`ONNX` graph. - -The kernel *ExpSineSquared* -is used by :epkg:`sklearn:gaussian_process:GaussianProcessRegressor` -and its conversion is required to convert the model. -The first step is to write a standalone function which -relies on :epkg:`scipy` or :epkg:`numpy` and which produces -the same results. The second step calls this function to -produces the :epkg:`ONNX` graph. - -.. runpython:: - :showcode: - :process: - :store_in_file: fct2onnx_expsine.py - - import numpy - from scipy.spatial.distance import squareform, pdist - from sklearn.gaussian_process.kernels import ExpSineSquared - from mlprodict.onnx_grammar import translate_fct2onnx - from mlprodict.onnx_grammar.onnx_translation import squareform_pdist, py_make_float_array - from mlprodict.onnxrt import OnnxInference - - # The function to convert into ONNX. - def kernel_call_ynone(X, length_scale=1.2, periodicity=1.1, pi=3.141592653589793): - - # squareform(pdist(X, ...)) in one function. - dists = squareform_pdist(X, metric='euclidean') - - # Function starting with 'py_' --> must not be converted into ONNX. - t_pi = py_make_float_array(pi) - t_periodicity = py_make_float_array(periodicity) - - # This operator must be converted into ONNX. - arg = dists / t_periodicity * t_pi - sin_of_arg = numpy.sin(arg) - - t_2 = py_make_float_array(2) - t__2 = py_make_float_array(-2) - - t_length_scale = py_make_float_array(length_scale) - - K = numpy.exp((sin_of_arg / t_length_scale) ** t_2 * t__2) - return K - - # This function is equivalent to the following kernel. - kernel = ExpSineSquared(length_scale=1.2, periodicity=1.1) - - x = numpy.array([[1, 2], [3, 4]], dtype=float) - - # Checks that the new function and the kernel are the same. - exp = kernel(x, None) - got = kernel_call_ynone(x) - - print("ExpSineSquared:") - print(exp) - print("numpy function:") - print(got) - - # Converts the numpy function into an ONNX function. - fct_onnx = translate_fct2onnx(kernel_call_ynone, cpl=True, - output_names=['Z']) - - # Calls the ONNX function to produce the ONNX algebric function. - # See below. - onnx_model = fct_onnx('X') - - # Calls the ONNX algebric function to produce the ONNX graph. - inputs = {'X': x.astype(numpy.float32)} - onnx_g = onnx_model.to_onnx(inputs, target_opset=12) - - # Creates a python runtime associated to the ONNX function. - oinf = OnnxInference(onnx_g) - - # Compute the prediction with the python runtime. - res = oinf.run(inputs) - print("ONNX output:") - print(res['Z']) - - # Displays the code of the algebric function. - print('-------------') - print("Function code:") - print('-------------') - print(translate_fct2onnx(kernel_call_ynone, output_names=['Z'])) - -The output of function -:func:`translate_fct2onnx -` -is not an :epkg:`ONNX` graph but the code of a function which -produces an :epkg:`ONNX` graph. That's why the function is called -twice. The first call compiles the code and a returns a new -:epkg:`python` function. The second call starts all over but -returns the code instead of its compiled version. diff --git a/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst b/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst new file mode 100644 index 000000000..d0a97360a --- /dev/null +++ b/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst @@ -0,0 +1,191 @@ + +.. _l-numpy2onnx-tutorial: + +From numpy to ONNX +================== + +Converting a :epkg:`scikit-learn` pipeline is easy when +the pipeline contains only pieces implemented in :epkg:`scikit-learn` +associated to a converter in :epkg:`sklearn-onnx`. Outside this +scenario, the conversion usually requires to write custom code +either directly with :epkg:`onnx` operators, either by writing +a `custom converter +`_. +This tutorial addresses a specific scenario involving an instance of +:epkg:`FunctionTransformer`. + +.. contents:: + :local: + +Translation problem ++++++++++++++++++++ + +The following pipeline cannot be converted into :epkg:`ONNX` when using +the first examples of `sklearn-onnx tutorial`. + +.. runpython:: + :showcode: + :warningout: DeprecationWarning + + import numpy + from sklearn.pipeline import make_pipeline + from sklearn.preprocessing import FunctionTransformer, StandardScaler + from skl2onnx import to_onnx + + log_scale_transformer = make_pipeline( + FunctionTransformer(numpy.log, validate=False), + StandardScaler()) + + X = numpy.random.random((5, 2)) + + log_scale_transformer.fit(X) + print(log_scale_transformer.transform(X)) + + # Conversion to ONNX + try: + onx = to_onnx(log_scale_transformer, X) + except RuntimeError as e: + print(e) + +The first step is a `FunctionTransformer` with a custom function +written with :epkg:`numpy` functions. The pipeline can be converted +only if the function given to this object as argument can be converted +into *ONNX*. Even if function :epkg:`numpy:log` does exist in ONNX specifications +(see `ONNX Log `_), +this problem is equivalent to a translation from a language, Python, +to another one, ONNX. + +Translating numpy to ONNX ++++++++++++++++++++++++++ + +.. index:: algebric function + +The first approach was to use module :epkg:`ast` to convert +a function into a syntax tree and then try to convert every node +into ONNX to obtain an equivalent ONNX graph. + +*mlprodict* implements function +:func:`translate_fct2onnx +` +which converts the code +of a function written with :epkg:`numpy` and :epkg:`scipy` +into an :epkg:`ONNX` graph. + +The kernel *ExpSineSquared* +is used by :epkg:`sklearn:gaussian_process:GaussianProcessRegressor` +and its conversion is required to convert the model. +The first step is to write a standalone function which +relies on :epkg:`scipy` or :epkg:`numpy` and which produces +the same results. The second step calls this function to +produces the :epkg:`ONNX` graph. + +.. runpython:: + :showcode: + :warningout: DeprecationWarning + :process: + :store_in_file: fct2onnx_expsine.py + + import numpy + from scipy.spatial.distance import squareform, pdist + from sklearn.gaussian_process.kernels import ExpSineSquared + from mlprodict.onnx_grammar import translate_fct2onnx + from mlprodict.onnx_grammar.onnx_translation import squareform_pdist, py_make_float_array + from mlprodict.onnxrt import OnnxInference + + # The function to convert into ONNX. + def kernel_call_ynone(X, length_scale=1.2, periodicity=1.1, pi=3.141592653589793): + + # squareform(pdist(X, ...)) in one function. + dists = squareform_pdist(X, metric='euclidean') + + # Function starting with 'py_' --> must not be converted into ONNX. + t_pi = py_make_float_array(pi) + t_periodicity = py_make_float_array(periodicity) + + # This operator must be converted into ONNX. + arg = dists / t_periodicity * t_pi + sin_of_arg = numpy.sin(arg) + + t_2 = py_make_float_array(2) + t__2 = py_make_float_array(-2) + + t_length_scale = py_make_float_array(length_scale) + + K = numpy.exp((sin_of_arg / t_length_scale) ** t_2 * t__2) + return K + + # This function is equivalent to the following kernel. + kernel = ExpSineSquared(length_scale=1.2, periodicity=1.1) + + x = numpy.array([[1, 2], [3, 4]], dtype=float) + + # Checks that the new function and the kernel are the same. + exp = kernel(x, None) + got = kernel_call_ynone(x) + + print("ExpSineSquared:") + print(exp) + print("numpy function:") + print(got) + + # Converts the numpy function into an ONNX function. + fct_onnx = translate_fct2onnx(kernel_call_ynone, cpl=True, + output_names=['Z']) + + # Calls the ONNX function to produce the ONNX algebric function. + # See below. + onnx_model = fct_onnx('X') + + # Calls the ONNX algebric function to produce the ONNX graph. + inputs = {'X': x.astype(numpy.float32)} + onnx_g = onnx_model.to_onnx(inputs, target_opset=12) + + # Creates a python runtime associated to the ONNX function. + oinf = OnnxInference(onnx_g) + + # Compute the prediction with the python runtime. + res = oinf.run(inputs) + print("ONNX output:") + print(res['Z']) + + # Displays the code of the algebric function. + print('-------------') + print("Function code:") + print('-------------') + print(translate_fct2onnx(kernel_call_ynone, output_names=['Z'])) + +The output of function +:func:`translate_fct2onnx +` +is not an :epkg:`ONNX` graph but the code of a function which +produces an :epkg:`ONNX` graph. That's why the function is called +twice. The first call compiles the code and a returns a new +:epkg:`python` function. The second call starts all over but +returns the code instead of its compiled version. + +This approach has two drawback. The first one is not every function +can be converted into ONNX. That does not mean the algorithm could +not be implemented with ONNX operator. The second drawback is discrepencies. +They should be minimal but still could happen between a numpy and ONNX +implementations. + +Numpy API for ONNX +++++++++++++++++++ + +This approach fixes the two issues mentioned above. The goal is write +a code using the same function as :epkg:`numpy` offers but +executed by an ONNX runtime. The full API is described at +:ref:`l-numpy-onnxpy` and introduced here. + +From ONNX to Python ++++++++++++++++++++ + +The Python Runtime can be optimized by generating +custom python code and dynamically compile it. +:class:`OnnxInference ` +computes predictions based on an ONNX graph with a +python runtime or :epkg:`onnxruntime`. +Method :meth:`to_python +` +goes further by converting the ONNX graph into a standalone +python code. diff --git a/mlprodict/asv_benchmark/create_asv.py b/mlprodict/asv_benchmark/create_asv.py index 8e381dd6e..9a58af681 100644 --- a/mlprodict/asv_benchmark/create_asv.py +++ b/mlprodict/asv_benchmark/create_asv.py @@ -126,6 +126,7 @@ def create_asv_benchmark( .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from mlprodict.asv_benchmark.create_asv import default_asv_conf diff --git a/mlprodict/grammar_sklearn/g_sklearn_main.py b/mlprodict/grammar_sklearn/g_sklearn_main.py index 36f32a0e4..8aff36206 100644 --- a/mlprodict/grammar_sklearn/g_sklearn_main.py +++ b/mlprodict/grammar_sklearn/g_sklearn_main.py @@ -28,6 +28,7 @@ def sklearn2graph(model, output_names=None, **kwargs): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris @@ -84,6 +85,7 @@ def sklearn2graph(model, output_names=None, **kwargs): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris diff --git a/mlprodict/onnx_conv/convert.py b/mlprodict/onnx_conv/convert.py index 0b2649706..de82c0d95 100644 --- a/mlprodict/onnx_conv/convert.py +++ b/mlprodict/onnx_conv/convert.py @@ -292,6 +292,7 @@ def to_onnx(model, X=None, name=None, initial_types=None, .. runpython:: :showcode: + :warningout: DeprecationWarning from io import StringIO from textwrap import dedent diff --git a/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py b/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py index 694046755..8ba0e380f 100644 --- a/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py +++ b/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py @@ -415,6 +415,7 @@ def modify_tree_for_rule_in_set(gbm, use_float=False): # pylint: disable=R1710 .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from mlprodict.onnx_conv.operator_converters.conv_lightgbm import modify_tree_for_rule_in_set diff --git a/mlprodict/onnx_conv/validate_scenarios.py b/mlprodict/onnx_conv/validate_scenarios.py index 6d54420f8..9eb7ba3e2 100644 --- a/mlprodict/onnx_conv/validate_scenarios.py +++ b/mlprodict/onnx_conv/validate_scenarios.py @@ -12,6 +12,7 @@ def find_suitable_problem(model): .. runpython:: :showcode: + :warningout: DeprecationWarning :rst: from mlprodict.onnx_conv.validate_scenarios import find_suitable_problem @@ -59,6 +60,7 @@ def build_custom_scenarios(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnx_conv.validate_scenarios import build_custom_scenarios import pprint diff --git a/mlprodict/onnx_grammar/node_visitor_translator.py b/mlprodict/onnx_grammar/node_visitor_translator.py index 65994a314..5f7c9d6f0 100644 --- a/mlprodict/onnx_grammar/node_visitor_translator.py +++ b/mlprodict/onnx_grammar/node_visitor_translator.py @@ -19,6 +19,7 @@ class CodeNodeVisitor(ast.NodeVisitor): .. runpython:: :showcode: + :warningout: DeprecationWarning :process: :store_in_file: fct2onnx1.py diff --git a/mlprodict/onnx_grammar/onnx_translation.py b/mlprodict/onnx_grammar/onnx_translation.py index f3673b4ba..d1dffa4b2 100644 --- a/mlprodict/onnx_grammar/onnx_translation.py +++ b/mlprodict/onnx_grammar/onnx_translation.py @@ -21,6 +21,7 @@ def py_make_float_array(cst, op_version=None): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnx_grammar.onnx_translation import py_make_float_array print(py_make_float_array(5.5)) @@ -161,6 +162,7 @@ def translate_fct2onnx(fct, context=None, cpl=False, .. runpython:: :showcode: + :warningout: DeprecationWarning :process: :store_in_file: fct2onnx2.py @@ -189,6 +191,7 @@ def trs(x, y): .. runpython:: :showcode: + :warningout: DeprecationWarning :process: :store_in_file: fct2onnx3.py diff --git a/mlprodict/onnxrt/doc/doc_helper.py b/mlprodict/onnxrt/doc/doc_helper.py index 3cb16d2ab..2d8d07aed 100644 --- a/mlprodict/onnxrt/doc/doc_helper.py +++ b/mlprodict/onnxrt/doc/doc_helper.py @@ -17,6 +17,7 @@ def type_mapping(name): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.doc.doc_helper import type_mapping import pprint @@ -335,6 +336,7 @@ def visual_rst_template(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.doc.doc_helper import visual_rst_template print(visual_rst_template()) diff --git a/mlprodict/onnxrt/onnx2py_helper.py b/mlprodict/onnxrt/onnx2py_helper.py index 5ce243028..a82452fde 100644 --- a/mlprodict/onnxrt/onnx2py_helper.py +++ b/mlprodict/onnxrt/onnx2py_helper.py @@ -25,6 +25,7 @@ def to_bytes(val): .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from mlprodict.onnxrt.onnx2py_helper import to_bytes @@ -54,6 +55,7 @@ def from_bytes(b): .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from mlprodict.onnxrt.onnx2py_helper import to_bytes, from_bytes diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py index 59a16487e..215b4389b 100644 --- a/mlprodict/onnxrt/onnx_inference.py +++ b/mlprodict/onnxrt/onnx_inference.py @@ -317,6 +317,7 @@ def to_sequence(self): .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint import numpy @@ -523,6 +524,7 @@ def run(self, inputs, clean_right_away=False, .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from sklearn.linear_model import LinearRegression @@ -1020,6 +1022,7 @@ def _build_compile_run(self, debug=False): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split diff --git a/mlprodict/onnxrt/onnx_inference_exports.py b/mlprodict/onnxrt/onnx_inference_exports.py index d47554f7a..1ab8271bc 100644 --- a/mlprodict/onnxrt/onnx_inference_exports.py +++ b/mlprodict/onnxrt/onnx_inference_exports.py @@ -58,6 +58,7 @@ def to_dot(self, recursive=False, prefix='', add_rt_shapes=False, .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from skl2onnx.algebra.onnx_ops import OnnxLinearRegressor @@ -262,6 +263,7 @@ def to_json(self, indent=2): .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from skl2onnx.algebra.onnx_ops import OnnxLinearRegressor @@ -394,6 +396,7 @@ def to_python(self, prefix="onnx_pyrt_", dest=None, inline=True): .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from skl2onnx.algebra.onnx_ops import OnnxAdd diff --git a/mlprodict/onnxrt/optim/onnx_helper.py b/mlprodict/onnxrt/optim/onnx_helper.py index b3bb6229d..f47f5d7da 100644 --- a/mlprodict/onnxrt/optim/onnx_helper.py +++ b/mlprodict/onnxrt/optim/onnx_helper.py @@ -23,6 +23,7 @@ def onnx_statistics(onnx_model, recursive=True, optim=True): .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from sklearn.linear_model import LogisticRegression diff --git a/mlprodict/onnxrt/optim/sklearn_helper.py b/mlprodict/onnxrt/optim/sklearn_helper.py index 287a10f65..ccc3f95a3 100644 --- a/mlprodict/onnxrt/optim/sklearn_helper.py +++ b/mlprodict/onnxrt/optim/sklearn_helper.py @@ -24,6 +24,7 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.datasets import load_iris from sklearn.decomposition import PCA @@ -106,6 +107,7 @@ def enumerate_fitted_arrays(model): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.datasets import load_iris from sklearn.decomposition import PCA @@ -216,6 +218,7 @@ def inspect_sklearn_model(model, recursive=True): .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from sklearn.ensemble import RandomForestClassifier @@ -292,6 +295,7 @@ def set_n_jobs(model, params, n_jobs=None): .. runpython:: :showcode: + :warningout: DeprecationWarning import multiprocessing print(multiprocessing.cpu_count()) diff --git a/mlprodict/onnxrt/shape_object.py b/mlprodict/onnxrt/shape_object.py index 9c1601668..51d6c59b8 100644 --- a/mlprodict/onnxrt/shape_object.py +++ b/mlprodict/onnxrt/shape_object.py @@ -424,6 +424,7 @@ class ShapeObject(BaseDimensionShape): .. runpython:: :showcode: + :warningout: DeprecationWarning import numpy from mlprodict.onnxrt.shape_object import ShapeObject diff --git a/mlprodict/onnxrt/validate/data/__init__.py b/mlprodict/onnxrt/validate/data/__init__.py index 68432ec18..000e68b5f 100644 --- a/mlprodict/onnxrt/validate/data/__init__.py +++ b/mlprodict/onnxrt/validate/data/__init__.py @@ -14,6 +14,7 @@ def load_audit(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.data import load_audit df = load_audit() diff --git a/mlprodict/onnxrt/validate/validate.py b/mlprodict/onnxrt/validate/validate.py index ab6ac525a..b798ceabc 100644 --- a/mlprodict/onnxrt/validate/validate.py +++ b/mlprodict/onnxrt/validate/validate.py @@ -239,6 +239,7 @@ def enumerate_compatible_opset(model, opset_min=-1, opset_max=-1, # pylint: dis .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs import pprint @@ -826,6 +827,7 @@ def enumerate_validated_operator_opsets(verbose=0, opset_min=-1, opset_max=-1, .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs import pprint diff --git a/mlprodict/onnxrt/validate/validate_benchmark.py b/mlprodict/onnxrt/validate/validate_benchmark.py index 4f0a6cb33..707589292 100644 --- a/mlprodict/onnxrt/validate/validate_benchmark.py +++ b/mlprodict/onnxrt/validate/validate_benchmark.py @@ -80,6 +80,7 @@ def benchmark_fct(fct, X, time_limit=4, obs=None, node_time=False, .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs import pprint diff --git a/mlprodict/onnxrt/validate/validate_helper.py b/mlprodict/onnxrt/validate/validate_helper.py index 8db8ccde3..ea1bd786d 100644 --- a/mlprodict/onnxrt/validate/validate_helper.py +++ b/mlprodict/onnxrt/validate/validate_helper.py @@ -49,6 +49,7 @@ def modules_list(): .. runpython:: :showcode: :rst: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_helper import modules_list from pyquickhelper.pandashelper import df2rst @@ -306,6 +307,7 @@ def default_time_kwargs(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs import pprint @@ -385,6 +387,7 @@ def _multiply_time_kwargs(time_kwargs, time_kwargs_fact, inst): .. runpython:: :showcode: + :warningout: DeprecationWarning from pprint import pprint from sklearn.linear_model import LinearRegression diff --git a/mlprodict/onnxrt/validate/validate_problems.py b/mlprodict/onnxrt/validate/validate_problems.py index bd35c20b2..b24f181fa 100644 --- a/mlprodict/onnxrt/validate/validate_problems.py +++ b/mlprodict/onnxrt/validate/validate_problems.py @@ -624,6 +624,7 @@ def find_suitable_problem(model): .. runpython:: :showcode: + :warningout: DeprecationWarning :rst: from mlprodict.onnxrt.validate.validate import ( diff --git a/mlprodict/onnxrt/validate/validate_scenarios.py b/mlprodict/onnxrt/validate/validate_scenarios.py index 7a5a81f76..d35615f07 100644 --- a/mlprodict/onnxrt/validate/validate_scenarios.py +++ b/mlprodict/onnxrt/validate/validate_scenarios.py @@ -49,6 +49,7 @@ def build_custom_scenarios(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios import pprint diff --git a/mlprodict/tools/asv_options_helper.py b/mlprodict/tools/asv_options_helper.py index 083e76b47..178d8867c 100644 --- a/mlprodict/tools/asv_options_helper.py +++ b/mlprodict/tools/asv_options_helper.py @@ -19,6 +19,7 @@ def expand_onnx_options(model, optim): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.linear_model import LogisticRegression from mlprodict.tools.asv_options_helper import expand_onnx_options @@ -68,6 +69,7 @@ def benchmark_version(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.tools.asv_options_helper import benchmark_version print(benchmark_version()) @@ -82,6 +84,7 @@ def ir_version(): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.tools.asv_options_helper import ir_version print(ir_version()) @@ -138,6 +141,7 @@ def get_ir_version_from_onnx(benchmark=True): .. runpython:: :showcode: + :warningout: DeprecationWarning from sklearn.linear_model import LinearRegression from sklearn.datasets import load_iris diff --git a/mlprodict/tools/filename_helper.py b/mlprodict/tools/filename_helper.py index 4c6fa5069..be54e35ad 100644 --- a/mlprodict/tools/filename_helper.py +++ b/mlprodict/tools/filename_helper.py @@ -13,6 +13,7 @@ def extract_information_from_filename(name): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.tools.filename_helper import extract_information_from_filename diff --git a/mlprodict/tools/model_info.py b/mlprodict/tools/model_info.py index 4afb529a2..89aad5dcf 100644 --- a/mlprodict/tools/model_info.py +++ b/mlprodict/tools/model_info.py @@ -177,6 +177,7 @@ def analyze_model(model, simplify=True): .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from sklearn.datasets import load_iris diff --git a/mlprodict/tools/speed_measure.py b/mlprodict/tools/speed_measure.py index a26eab29b..70c3e12da 100644 --- a/mlprodict/tools/speed_measure.py +++ b/mlprodict/tools/speed_measure.py @@ -20,6 +20,7 @@ def measure_time(stmt, context, repeat=10, number=50, div_by_number=False): .. runpython:: :showcode: + :warningout: DeprecationWarning from mlprodict.tools import measure_time from math import cos diff --git a/mlprodict/tools/zoo.py b/mlprodict/tools/zoo.py index 3c8eba43e..961f0a851 100644 --- a/mlprodict/tools/zoo.py +++ b/mlprodict/tools/zoo.py @@ -19,6 +19,7 @@ def short_list_zoo_models(): .. runpython:: :showcode: + :warningout: DeprecationWarning import pprint from mlprodict.tools.zoo import short_list_zoo_models From 70e7fef9efd1100ddc06c35b8f704f2b2643a2cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 24 Feb 2021 23:14:10 +0100 Subject: [PATCH 2/4] add a notebook --- _doc/notebooks/numpy_api_onnx.ipynb | 565 ++++++++++++++++++ _doc/sphinxdoc/source/tutorial/onnx_numpy.rst | 6 +- _doc/sphinxdoc/source/tutorial/optim.rst | 3 +- .../test_run_notebooks_onnx_numpy.py | 45 ++ 4 files changed, 617 insertions(+), 2 deletions(-) create mode 100644 _doc/notebooks/numpy_api_onnx.ipynb create mode 100644 _unittests/ut_documentation/test_run_notebooks_onnx_numpy.py diff --git a/_doc/notebooks/numpy_api_onnx.ipynb b/_doc/notebooks/numpy_api_onnx.ipynb new file mode 100644 index 000000000..82b74dde8 --- /dev/null +++ b/_doc/notebooks/numpy_api_onnx.ipynb @@ -0,0 +1,565 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Numpy API for ONNX\n", + "\n", + "This notebook shows how to write python functions similar functions as numpy offers and get a function which can be converted into ONNX." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
run previous cell, wait for 2 seconds
\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from jyquickhelper import add_notebook_menu\n", + "add_notebook_menu()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext mlprodict" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A pipeline with FunctionTransformer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "data = load_iris()\n", + "X, y = data.data, data.target\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('functiontransformer',\n", + " FunctionTransformer(func=)),\n", + " ('standardscaler', StandardScaler()),\n", + " ('logisticregression', LogisticRegression())])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy\n", + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import FunctionTransformer, StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "pipe = make_pipeline(\n", + " FunctionTransformer(numpy.log),\n", + " StandardScaler(),\n", + " LogisticRegression())\n", + "pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's convert it into ONNX." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FunctionTransformer is not supported unless the transform function is None (= identity). You may raise an issue at https://github.com/onnx/sklearn-onnx/issues.\n" + ] + } + ], + "source": [ + "from mlprodict.onnx_conv import to_onnx\n", + "try:\n", + " onx = to_onnx(pipe, X_train.astype(numpy.float32))\n", + "except RuntimeError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use ONNX instead of numpy\n", + "\n", + "The pipeline cannot be converter because the converter does not know how to convert the function (`numpy.log`) held by `FunctionTransformer` into ONNX. One way to avoid that is to replace it by a function `log` defined with *ONNX* operators and executed with an ONNX runtime." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('functiontransformer',\n", + " FunctionTransformer(func=)),\n", + " ('standardscaler', StandardScaler()),\n", + " ('logisticregression', LogisticRegression())])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import mlprodict.npy.numpy_onnx_pyrt as npnxrt\n", + "\n", + "pipe = make_pipeline(\n", + " FunctionTransformer(npnxrt.log),\n", + " StandardScaler(),\n", + " LogisticRegression())\n", + "pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%onnxview onx" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The operator `Log` is belongs to the graph. There is some overhead by using this function on small matrices. The gap is much less on big matrices." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.9 \u00b5s \u00b1 94 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + ] + } + ], + "source": [ + "%timeit numpy.log(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12.6 \u00b5s \u00b1 449 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + ] + } + ], + "source": [ + "%timeit npnxrt.log(X_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More complex function\n", + "\n", + "What about more complex functions? It is a bit more complicated too. The previous syntax does not work." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('functiontransformer',\n", + " FunctionTransformer(func=)),\n", + " ('standardscaler', StandardScaler()),\n", + " ('logisticregression', LogisticRegression())])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def custom_fct(x):\n", + " return npnxrt.log(x + 1)\n", + "\n", + "pipe = make_pipeline(\n", + " FunctionTransformer(custom_fct),\n", + " StandardScaler(),\n", + " LogisticRegression())\n", + "pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FunctionTransformer is not supported unless the transform function is of type wrapped with onnxnumpy.\n" + ] + } + ], + "source": [ + "try:\n", + " onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)\n", + "except TypeError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The syntax is different." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pipeline(steps=[('functiontransformer',\n", + " FunctionTransformer(func=)),\n", + " ('standardscaler', StandardScaler()),\n", + " ('logisticregression', LogisticRegression())])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import Any\n", + "from mlprodict.npy import onnxnumpy_default, NDArray\n", + "import mlprodict.npy.numpy_onnx_impl as npnx\n", + "\n", + "@onnxnumpy_default\n", + "def custom_fct(x: NDArray[Any, numpy.float64]) -> NDArray[Any, numpy.float64]:\n", + " return npnx.log(x + numpy.float64(1))\n", + "\n", + "pipe = make_pipeline(\n", + " FunctionTransformer(custom_fct),\n", + " StandardScaler(),\n", + " LogisticRegression())\n", + "pipe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Only 2-D tensor(s) can be input(s) not FloatTensorType(shape=[None]).\n" + ] + } + ], + "source": [ + "try:\n", + " onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)\n", + "except Exception as e:\n", + " print(e)\n", + "# still need improvment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's compare the time to *numpy*." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def custom_numpy_fct(x):\n", + " return numpy.log(x + 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5.4 \u00b5s \u00b1 121 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + ] + } + ], + "source": [ + "%timeit custom_numpy_fct(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "17.6 \u00b5s \u00b1 715 ns per loop (mean \u00b1 std. dev. of 7 runs, 10000 loops each)\n" + ] + } + ], + "source": [ + "%timeit custom_fct(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst b/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst index d0a97360a..d875025ce 100644 --- a/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst +++ b/_doc/sphinxdoc/source/tutorial/onnx_numpy.rst @@ -35,7 +35,7 @@ the first examples of `sklearn-onnx tutorial`. log_scale_transformer = make_pipeline( FunctionTransformer(numpy.log, validate=False), StandardScaler()) - + X = numpy.random.random((5, 2)) log_scale_transformer.fit(X) @@ -177,6 +177,10 @@ a code using the same function as :epkg:`numpy` offers but executed by an ONNX runtime. The full API is described at :ref:`l-numpy-onnxpy` and introduced here. +**Notebooks** + +* :ref:`numpyapionnxrst` + From ONNX to Python +++++++++++++++++++ diff --git a/_doc/sphinxdoc/source/tutorial/optim.rst b/_doc/sphinxdoc/source/tutorial/optim.rst index 70b319e4b..679e13a30 100644 --- a/_doc/sphinxdoc/source/tutorial/optim.rst +++ b/_doc/sphinxdoc/source/tutorial/optim.rst @@ -21,7 +21,8 @@ One way to optimize the converted model is to create dedicated operators such as the one for function :epkg:`cdist`. The first example shows how to convert a :epkg:`GaussianProcessRegressor` into -standard :epkg:`ONNX` (see also @see cl CDist). +standard :epkg:`ONNX` (see also :class:`CDist +`). .. gdot:: :script: DOT-SECTION diff --git a/_unittests/ut_documentation/test_run_notebooks_onnx_numpy.py b/_unittests/ut_documentation/test_run_notebooks_onnx_numpy.py new file mode 100644 index 000000000..8de1a7a09 --- /dev/null +++ b/_unittests/ut_documentation/test_run_notebooks_onnx_numpy.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" +@brief test log(time=15s) +""" +import os +import unittest +from onnxruntime import __version__ as ort_version +from sklearn.exceptions import ConvergenceWarning +try: + from sklearn.utils._testing import ignore_warnings +except ImportError: + from sklearn.utils.testing import ignore_warnings +from pyquickhelper.loghelper import fLOG +from pyquickhelper.texthelper.version_helper import compare_module_version +from pyquickhelper.ipythonhelper import test_notebook_execution_coverage +from pyquickhelper.pycode import ( + add_missing_development_version, ExtTestCase +) +from skl2onnx import __version__ as skl2onnx_version +import mlprodict + + +class TestNotebookNumpyOnnx(ExtTestCase): + + def setUp(self): + add_missing_development_version(["jyquickhelper"], __file__, hide=True) + + @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) + @unittest.skipIf(compare_module_version(ort_version, "0.4.0") <= 0, + reason="Node:Scan1 Field 'shape' of type is required but missing.") + def test_notebook_numpy_onnx(self): + fLOG( + __file__, + self._testMethodName, + OutputPrint=__name__ == "__main__") + + self.assertNotEmpty(mlprodict is not None) + folder = os.path.join(os.path.dirname(__file__), + "..", "..", "_doc", "notebooks") + test_notebook_execution_coverage(__file__, "numpy_api_onnx", folder, + this_module_name="mlprodict", fLOG=fLOG) + + +if __name__ == "__main__": + unittest.main() From 6135ac0e54fc0c09eaebe85f9ae8145aae91664d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 25 Feb 2021 18:46:56 +0100 Subject: [PATCH 3/4] support more scenarios about signatures --- _doc/notebooks/numpy_api_onnx.ipynb | 76 ++++++++++--------- .../ut_npy/test_function_transformer.py | 44 ++++++++++- _unittests/ut_npy/test_onnxpy.py | 22 ++++++ mlprodict/npy/__init__.py | 3 +- mlprodict/npy/onnx_numpy_annotation.py | 12 ++- mlprodict/npy/onnx_numpy_compiler.py | 8 +- .../function_transformer_converters.py | 8 +- 7 files changed, 127 insertions(+), 46 deletions(-) diff --git a/_doc/notebooks/numpy_api_onnx.ipynb b/_doc/notebooks/numpy_api_onnx.ipynb index 82b74dde8..6ca4fdf66 100644 --- a/_doc/notebooks/numpy_api_onnx.ipynb +++ b/_doc/notebooks/numpy_api_onnx.ipynb @@ -236,7 +236,7 @@ "source": [ "from mlprodict.onnx_conv import to_onnx\n", "try:\n", - " onx = to_onnx(pipe, X_train.astype(numpy.float32))\n", + " onx = to_onnx(pipe, X_train.astype(numpy.float64))\n", "except RuntimeError as e:\n", " print(e)" ] @@ -259,7 +259,7 @@ "data": { "text/plain": [ "Pipeline(steps=[('functiontransformer',\n", - " FunctionTransformer(func=)),\n", + " FunctionTransformer(func=)),\n", " ('standardscaler', StandardScaler()),\n", " ('logisticregression', LogisticRegression())])" ] @@ -285,7 +285,7 @@ "metadata": {}, "outputs": [], "source": [ - "onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)" + "onx = to_onnx(pipe, X_train.astype(numpy.float64), rewrite_ops=True)" ] }, { @@ -296,16 +296,16 @@ { "data": { "text/html": [ - "
\n", + "
\n", "" ], "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -333,7 +333,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3.9 \u00b5s \u00b1 94 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + "4.3 \u00b5s \u00b1 295 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" ] } ], @@ -350,7 +350,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "12.6 \u00b5s \u00b1 449 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + "14.4 \u00b5s \u00b1 2.33 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -376,7 +376,7 @@ "data": { "text/plain": [ "Pipeline(steps=[('functiontransformer',\n", - " FunctionTransformer(func=)),\n", + " FunctionTransformer(func=)),\n", " ('standardscaler', StandardScaler()),\n", " ('logisticregression', LogisticRegression())])" ] @@ -412,7 +412,7 @@ ], "source": [ "try:\n", - " onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)\n", + " onx = to_onnx(pipe, X_train.astype(numpy.float64), rewrite_ops=True)\n", "except TypeError as e:\n", " print(e)" ] @@ -435,7 +435,7 @@ "data": { "text/plain": [ "Pipeline(steps=[('functiontransformer',\n", - " FunctionTransformer(func=)),\n", + " FunctionTransformer(func=)),\n", " ('standardscaler', StandardScaler()),\n", " ('logisticregression', LogisticRegression())])" ] @@ -451,7 +451,7 @@ "import mlprodict.npy.numpy_onnx_impl as npnx\n", "\n", "@onnxnumpy_default\n", - "def custom_fct(x: NDArray[Any, numpy.float64]) -> NDArray[Any, numpy.float64]:\n", + "def custom_fct(x: NDArray[(None, None), numpy.float64]) -> NDArray[(None, None), numpy.float64]:\n", " return npnx.log(x + numpy.float64(1))\n", "\n", "pipe = make_pipeline(\n", @@ -467,19 +467,28 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Only 2-D tensor(s) can be input(s) not FloatTensorType(shape=[None]).\n" - ] + "data": { + "text/html": [ + "
\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "try:\n", - " onx = to_onnx(pipe, X_train.astype(numpy.float32), rewrite_ops=True)\n", - "except Exception as e:\n", - " print(e)\n", - "# still need improvment" + "onx = to_onnx(pipe, X_train.astype(numpy.float64), rewrite_ops=True)\n", + "%onnxview onx" ] }, { @@ -493,39 +502,32 @@ "cell_type": "code", "execution_count": 15, "metadata": {}, - "outputs": [], - "source": [ - "def custom_numpy_fct(x):\n", - " return numpy.log(x + 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "5.4 \u00b5s \u00b1 121 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" + "5.68 \u00b5s \u00b1 156 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" ] } ], "source": [ + "def custom_numpy_fct(x):\n", + " return numpy.log(x + numpy.float64(1))\n", + "\n", "%timeit custom_numpy_fct(X_train)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "17.6 \u00b5s \u00b1 715 ns per loop (mean \u00b1 std. dev. of 7 runs, 10000 loops each)\n" + "18.3 \u00b5s \u00b1 878 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n" ] } ], @@ -535,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [] diff --git a/_unittests/ut_npy/test_function_transformer.py b/_unittests/ut_npy/test_function_transformer.py index caddd373c..653dd54a3 100644 --- a/_unittests/ut_npy/test_function_transformer.py +++ b/_unittests/ut_npy/test_function_transformer.py @@ -22,7 +22,21 @@ def custom_fct(x: NDArray[Any, numpy.float32], ) -> NDArray[Any, numpy.float32]: "onnx custom function" - return (nxnp.abs(x) + x) / numpy.float32(2) + return (nxnp.abs(x) + x) / numpy.float32(2.) + + +@onnxnumpy_default +def custom_log(x: NDArray[(None, None), numpy.float32], + ) -> NDArray[(None, None), numpy.float32]: + "onnx custom log" + return nxnp.log(x) + + +@onnxnumpy_default +def custom_logn(x: NDArray[(None, ...), numpy.float32], + ) -> NDArray[(None, ...), numpy.float32]: + "onnx custom log n" + return nxnp.log(x) class TestOnnxFunctionTransformer(ExtTestCase): @@ -37,7 +51,7 @@ def setUp(self): self.assertIn('SklearnFunctionTransformer', res[0]) self.assertIn('SklearnFunctionTransformer', res[1]) - @ignore_warnings(DeprecationWarning) + @ignore_warnings((DeprecationWarning, RuntimeWarning)) def test_function_transformer(self): x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32) tr = FunctionTransformer(custom_fct) @@ -52,14 +66,14 @@ def test_function_transformer(self): y_onx = oinf.run({'X': x}) self.assertEqualArray(y_exp, y_onx['variable']) - @ignore_warnings(DeprecationWarning) + @ignore_warnings((DeprecationWarning, RuntimeWarning)) def test_function_transformer_numpy_log(self): x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32) tr = make_pipeline(FunctionTransformer(numpy.log), StandardScaler()) tr.fit(x) self.assertRaise(lambda: to_onnx(tr, x), TypeError) - @ignore_warnings(DeprecationWarning) + @ignore_warnings((DeprecationWarning, RuntimeWarning)) def test_function_transformer_nxnp_log(self): x = numpy.array([[6.1, 5], [3.5, 7.8]], dtype=numpy.float32) self.assertIsInstance(nxnpy.log(x), numpy.ndarray) @@ -71,6 +85,28 @@ def test_function_transformer_nxnp_log(self): y_onx = oinf.run({'X': x}) self.assertEqualArray(y_exp, y_onx['variable'], decimal=5) + @ignore_warnings((DeprecationWarning, RuntimeWarning)) + def test_function_transformer_custom_log(self): + x = numpy.array([[6.1, 5], [3.5, 7.8]], dtype=numpy.float32) + tr = make_pipeline(FunctionTransformer(custom_log), StandardScaler()) + tr.fit(x) + y_exp = tr.transform(x) + onnx_model = to_onnx(tr, x) + oinf = OnnxInference(onnx_model) + y_onx = oinf.run({'X': x}) + self.assertEqualArray(y_exp, y_onx['variable'], decimal=5) + + @ignore_warnings((DeprecationWarning, RuntimeWarning)) + def test_function_transformer_custom_logn(self): + x = numpy.array([[6.1, 5], [3.5, 7.8]], dtype=numpy.float32) + tr = make_pipeline(FunctionTransformer(custom_logn), StandardScaler()) + tr.fit(x) + y_exp = tr.transform(x) + onnx_model = to_onnx(tr, x) + oinf = OnnxInference(onnx_model) + y_onx = oinf.run({'X': x}) + self.assertEqualArray(y_exp, y_onx['variable'], decimal=5) + if __name__ == "__main__": unittest.main() diff --git a/_unittests/ut_npy/test_onnxpy.py b/_unittests/ut_npy/test_onnxpy.py index 59680dee9..84ee87139 100644 --- a/_unittests/ut_npy/test_onnxpy.py +++ b/_unittests/ut_npy/test_onnxpy.py @@ -19,6 +19,11 @@ def onnx_abs(x: NDArray[Any, numpy.float32], op_version=None) -> NDArray[Any, numpy.float32]: return OnnxAbs(x, op_version=op_version) + @staticmethod + def onnx_abs_shape(x: NDArray[(Any, Any), numpy.float32], + op_version=None) -> NDArray[(Any, Any), numpy.float32]: + return OnnxAbs(x, op_version=op_version) + def test_annotation(self): cl = ONC(TestOnnxPy.onnx_abs, op_version=12) ann = cl._parse_annotation(None, None) # pylint: disable=W0212 @@ -36,6 +41,23 @@ def test_annotation(self): self.assertIsInstance(inputs[0][1], FloatTensorType) self.assertIsInstance(outputs[0][1], FloatTensorType) + def test_annotation_shape(self): + cl = ONC(TestOnnxPy.onnx_abs_shape, op_version=12) + ann = cl._parse_annotation(None, None) # pylint: disable=W0212 + inputs, outputs, _ = ann + self.assertIsInstance(inputs, list) + self.assertIsInstance(outputs, list) + self.assertEqual(len(inputs), 1) + self.assertEqual(len(outputs), 1) + self.assertIsInstance(inputs[0], tuple) + self.assertIsInstance(outputs[0], tuple) + self.assertEqual(len(inputs[0]), 2) + self.assertEqual(len(outputs[0]), 2) + self.assertEqual(inputs[0][0], 'x') + self.assertEqual(outputs[0][0], 'y') + self.assertIsInstance(inputs[0][1], FloatTensorType) + self.assertIsInstance(outputs[0][1], FloatTensorType) + def test_wrong_runtime(self): self.assertRaise( lambda: ONC(TestOnnxPy.onnx_abs, op_version=12, diff --git a/mlprodict/npy/__init__.py b/mlprodict/npy/__init__.py index 7cac3a2a6..14acc19e4 100644 --- a/mlprodict/npy/__init__.py +++ b/mlprodict/npy/__init__.py @@ -6,6 +6,7 @@ .. versionadded:: 0.6 """ from .onnx_numpy_annotation import ( - NDArray, NDArraySameType, NDArraySameTypeSameShape) + NDArray, NDArraySameType, NDArraySameTypeSameShape, + Shape, DType) from .onnx_numpy_compiler import OnnxNumpyCompiler from .onnx_numpy_wrapper import onnxnumpy, onnxnumpy_default, onnxnumpy_np diff --git a/mlprodict/npy/onnx_numpy_annotation.py b/mlprodict/npy/onnx_numpy_annotation.py index c9e7c1b5e..f8339dbf6 100644 --- a/mlprodict/npy/onnx_numpy_annotation.py +++ b/mlprodict/npy/onnx_numpy_annotation.py @@ -40,7 +40,17 @@ class NDArray(numpy.ndarray, Generic[Shape, DType]): .. versionadded:: 0.6 """ - pass + class ShapeType: + "Stores shape information." + + def __init__(self, params): + self.__args__ = params + + def __class_getitem__(cls, params): + "Overwrites this method." + if not isinstance(params, tuple): + params = (params,) + return NDArray.ShapeType(params) class _NDArrayAlias: diff --git a/mlprodict/npy/onnx_numpy_compiler.py b/mlprodict/npy/onnx_numpy_compiler.py index e10d22898..eb39ced74 100644 --- a/mlprodict/npy/onnx_numpy_compiler.py +++ b/mlprodict/npy/onnx_numpy_compiler.py @@ -130,8 +130,14 @@ def __repr__(self): "fct_ and onnx_ are empty.") def _to_onnx_shape(self, shape): - if shape is Any: + if shape is Any or shape is Ellipsis: shape = None + elif isinstance(shape, tuple): + shape = [None if s is Any or s is Ellipsis else s + for s in shape] + else: + raise RuntimeError( + "Unexpected annotated shape %r." % shape) return shape def _to_onnx_dtype(self, dtype, shape): diff --git a/mlprodict/onnx_conv/sklconv/function_transformer_converters.py b/mlprodict/onnx_conv/sklconv/function_transformer_converters.py index 9b8d42eee..1afc3ebd4 100644 --- a/mlprodict/onnx_conv/sklconv/function_transformer_converters.py +++ b/mlprodict/onnx_conv/sklconv/function_transformer_converters.py @@ -42,8 +42,12 @@ def new_calculate_sklearn_function_transformer_output_shapes(operator): N = operator.inputs[0].type.shape[0] dims = [N] out = outputs[0] - if hasattr(out, 'dims'): - dims.extend(out.dims[1:]) + try: + extra_dims = out.type.tensor_type.shape.dim + except AttributeError: + extra_dims = None + if extra_dims is not None: + dims.extend(extra_dims[1:]) operator.outputs[0].type = input_type(dims) return From 80f9ccf723d8bbc3896d0209198d5fffd3c6e9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 26 Feb 2021 00:09:37 +0100 Subject: [PATCH 4/4] Update test_onnxrt_python_runtime_.py --- _unittests/ut_onnxrt/test_onnxrt_python_runtime_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py index 14f062fab..010158138 100644 --- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py +++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py @@ -1724,7 +1724,7 @@ def test_onnxt_runtime_max_pool_1d_default_64(self): {'X': X}, target_opset=get_opset_number_from_onnx()) oinf = OnnxInference(model_def) got = oinf.run({'X': X}) - self.assertEqualArray(exp, got['Y']) + self.assertEqualArray(exp, got['Y'], decimal=5) self.assertEqual(got['Y'].dtype, X.dtype) self.assertEqual(got['Y'].dtype, numpy.float64)