From 25ba562aa46c428ea01801dea2c5513077c39de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 24 Aug 2021 01:34:53 +0200 Subject: [PATCH 01/13] Implements a estimator speeding up the inference using ONNX --- _doc/sphinxdoc/source/api/sklapi.rst | 6 + .../test_onnx_speedup_transformer.py | 42 +++++ mlprodict/sklapi/__init__.py | 5 +- mlprodict/sklapi/onnx_speed_up.py | 150 ++++++++++++++++++ 4 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 _unittests/ut_sklapi/test_onnx_speedup_transformer.py create mode 100644 mlprodict/sklapi/onnx_speed_up.py diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst index 8ce2480a2..17531e8c8 100644 --- a/_doc/sphinxdoc/source/api/sklapi.rst +++ b/_doc/sphinxdoc/source/api/sklapi.rst @@ -9,6 +9,12 @@ pipeline. .. contents:: :local: +OnnxSpeedUpTransformer +++++++++++++++++++++++ + +.. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxSpeedUpTransformer + :members: + OnnxTransformer +++++++++++++++ diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py new file mode 100644 index 000000000..c255a4d29 --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -0,0 +1,42 @@ +""" +@brief test log(time=4s) +""" +import unittest +from logging import getLogger +import numpy as np +import pandas +from sklearn.pipeline import make_pipeline +from sklearn.decomposition import PCA +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase +from mlprodict.sklapi import OnnxSpeedUpTransformer +from mlprodict.tools import get_opset_number_from_onnx + + +class TestOnnxSpeedUpTransformer(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + def test_speedp_transform32(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset()) + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + + def test_speedp_transform64(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + spd.assert_almost_equal(X) + + +if __name__ == '__main__': + unittest.main() diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py index c94311d5c..6b3564b36 100644 --- a/mlprodict/sklapi/__init__.py +++ b/mlprodict/sklapi/__init__.py @@ -1,7 +1,8 @@ # -*- encoding: utf-8 -*- """ @file -@brief Shortcut to *onnxrt*. +@brief Shortcut to *sklapi*. """ -from .onnx_transformer import OnnxTransformer from .onnx_pipeline import OnnxPipeline +from .onnx_transformer import OnnxTransformer +from .onnx_speed_up import OnnxSpeedUpTransformer diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py new file mode 100644 index 000000000..0d9cf6423 --- /dev/null +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -0,0 +1,150 @@ +# coding: utf-8 +""" +@file +@brief Speeding up :epkg:`scikit-learn` with :epkg:`onnx`. + +.. versionadded:: 0.7 +""" +import numpy +from numpy.testing import assert_almost_equal +from sklearn.base import BaseEstimator, TransformerMixin, clone +from ..onnx_conv import to_onnx +from .onnx_transformer import OnnxTransformer + + +class _OnnxPipelineStepSpeedUp: + """ + Speeds up inference by replacing methods *transform* or + *predict* by a runtime for :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: options for covnersions, see @see fn to_onnx + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None): + self.estimator = estimator + self.runtime = runtime + self.enforce_float32 = enforce_float32 + self.target_opset = target_opset + self.conv_options = conv_options + + def _to_onnx(self, fitted_estimator, inputs): + """ + Converts an estimator inference into :epkg:`ONNX`. + + :param estimator: any estimator following :epkg:`scikit-learn` API + :param inputs: example of inputs + :return: ONNX + """ + opts = self.conv_options or {} + return to_onnx( + self.estimator_, inputs, target_opset=self.target_opset, + **opts) + + def _build_onnx_runtime(self, onx): + """ + Returns an instance of @see cl OnnxTransformer which + executes the ONNX graph. + + :param onx: ONNX graph + :param runtime: runtime type (see @see cl OnnxInference) + :return: instance of @see cl OnnxInference + """ + tr = OnnxTransformer( + onx, runtime=self.runtime, + enforce_float32=self.enforce_float32) + tr.fit() + return tr + + def fit(self, X, *args, **kwargs): + """ + Fits the estimator, converts to ONNX. + + :param X: features + :param args: other arguments + :param kwargs: fitting options + """ + if not hasattr(self, 'estimator_'): + self.estimator_ = clone(self.estimator) + self.estimator_.fit(X, *args, **kwargs) + if self.enforce_float32: + X = X.astype(numpy.float32) + self.onnx_ = self._to_onnx(self.estimator_, X).SerializeToString() + self.rt_ = self._build_onnx_runtime(self.onnx_) + return self + + +class OnnxSpeedUpTransformer(BaseEstimator, TransformerMixin, + _OnnxPipelineStepSpeedUp): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None): + BaseEstimator.__init__(self) + _OnnxPipelineStepSpeedUp.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options) + + def fit(self, X, y=None, sample_weight=None): + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedUp.fit(self, X, y) + else: + _OnnxPipelineStepSpeedUp.fit( + self, X, y, sample_weight=sample_weight) + return self + + def transform(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + return self.rt_.transform(X) + + def raw_transform(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.transform(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = self.raw_transform(X) + got = self.transform(X) + assert_almost_equal(expected, got, **kwargs) From 0bd6338430b963435f07a39b5259cb65cc81620e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 25 Aug 2021 01:52:34 +0200 Subject: [PATCH 02/13] checks pickle and onnx --- .../test_onnx_speedup_transformer.py | 54 +++++++++++++++++-- mlprodict/onnx_conv/convert.py | 4 +- mlprodict/onnx_tools/onnx2py_helper.py | 13 +++++ mlprodict/sklapi/onnx_speed_up.py | 54 +++++++++++++++++-- mlprodict/sklapi/onnx_transformer.py | 13 +++-- 5 files changed, 122 insertions(+), 16 deletions(-) diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index c255a4d29..b7308393f 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -1,16 +1,20 @@ """ @brief test log(time=4s) """ +from io import BytesIO +import pickle import unittest from logging import getLogger -import numpy as np -import pandas -from sklearn.pipeline import make_pipeline +# import numpy as np +# import pandas +# from sklearn.pipeline import make_pipeline from sklearn.decomposition import PCA from sklearn.datasets import load_iris from pyquickhelper.pycode import ExtTestCase from mlprodict.sklapi import OnnxSpeedUpTransformer from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference class TestOnnxSpeedUpTransformer(ExtTestCase): @@ -22,14 +26,14 @@ def setUp(self): def opset(self): return get_opset_number_from_onnx() - def test_speedp_transform32(self): + def test_speedup_transform32(self): data = load_iris() X, _ = data.data, data.target spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset()) spd.fit(X) spd.assert_almost_equal(X, decimal=5) - def test_speedp_transform64(self): + def test_speedup_transform64(self): data = load_iris() X, _ = data.data, data.target spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), @@ -37,6 +41,46 @@ def test_speedp_transform64(self): spd.fit(X) spd.assert_almost_equal(X) + def test_speedup_transform64_op_version(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + def test_speedup_transform64_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + + def test__speedup_transform64_onnx(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + if __name__ == '__main__': unittest.main() diff --git a/mlprodict/onnx_conv/convert.py b/mlprodict/onnx_conv/convert.py index ec7416231..9aa9ca40d 100644 --- a/mlprodict/onnx_conv/convert.py +++ b/mlprodict/onnx_conv/convert.py @@ -356,8 +356,8 @@ def to_onnx(model, X=None, name=None, initial_types=None, type(model))) return model.to_onnx( X=X, name=name, options=options, black_op=black_op, - white_op=white_op, final_types=final_types, - verbose=verbose) + white_op=white_op, final_types=final_types) + # verbose=verbose) if rewrite_ops: old_values, old_shapes = register_rewritten_operators() diff --git a/mlprodict/onnx_tools/onnx2py_helper.py b/mlprodict/onnx_tools/onnx2py_helper.py index c6576c866..4c023254e 100644 --- a/mlprodict/onnx_tools/onnx2py_helper.py +++ b/mlprodict/onnx_tools/onnx2py_helper.py @@ -380,6 +380,19 @@ def _var_as_dict(var): "Unable to guess which object it is.\n{}\n---".format(var)) +def onnx_model_opsets(onnx_model): + """ + Extracts opsets in a dictionary. + + :param onnx_model: ONNX graph + :return: dictionary `{domain: version}` + """ + res = {} + for oimp in onnx_model.opset_import: + res[oimp.domain] = oimp.version + return res + + def _type_to_string(dtype): """ Converts a type into a readable string. diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 0d9cf6423..729241670 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -8,11 +8,12 @@ import numpy from numpy.testing import assert_almost_equal from sklearn.base import BaseEstimator, TransformerMixin, clone +from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from ..onnx_conv import to_onnx from .onnx_transformer import OnnxTransformer -class _OnnxPipelineStepSpeedUp: +class _OnnxPipelineStepSpeedUp(OnnxOperatorMixin): """ Speeds up inference by replacing methods *transform* or *predict* by a runtime for :epkg:`ONNX`. @@ -39,6 +40,10 @@ def __init__(self, estimator, runtime='python', enforce_float32=True, self.target_opset = target_opset self.conv_options = conv_options + def _check_fitted_(self): + if not hasattr(self, 'onnxrt_'): + raise AttributeError("Object must be be fit.") + def _to_onnx(self, fitted_estimator, inputs): """ Converts an estimator inference into :epkg:`ONNX`. @@ -81,9 +86,50 @@ def fit(self, X, *args, **kwargs): if self.enforce_float32: X = X.astype(numpy.float32) self.onnx_ = self._to_onnx(self.estimator_, X).SerializeToString() - self.rt_ = self._build_onnx_runtime(self.onnx_) + self.onnxrt_ = self._build_onnx_runtime(self.onnx_) return self + @property + def op_version(self): + """ + Returns the opset version. + """ + self._check_fitted_() + return self.onnxrt_.op_version + + def onnx_parser(self, scope=None, inputs=None): + """ + Returns a parser for this model. + """ + self._check_fitted_() + return self.onnxrt_.onnx_parser(scope, inputs) + + def onnx_shape_calculator(self): + """ + Returns a shape calculator for this transform. + """ + self._check_fitted_() + calc = self.onnxrt_.onnx_shape_calculator() + + def shape_calculator(operator): + return calc(operator) + + return shape_calculator + + def onnx_converter(self): + """ + Returns a converter for this transform. + """ + self._check_fitted_() + conv = self.onnxrt_.onnx_converter() + + def converter(scope, operator, container): + op = operator.raw_operator + onnx_model = op.onnxrt_.onnxrt_.obj + conv(scope, operator, container, onnx_model=onnx_model) + + return converter + class OnnxSpeedUpTransformer(BaseEstimator, TransformerMixin, _OnnxPipelineStepSpeedUp): @@ -111,7 +157,7 @@ def __init__(self, estimator, runtime='python', enforce_float32=True, self, estimator, runtime=runtime, enforce_float32=enforce_float32, target_opset=target_opset, conv_options=conv_options) - def fit(self, X, y=None, sample_weight=None): + def fit(self, X, y=None, sample_weight=None): # pylint: disable=W0221 """ Trains based estimator. """ @@ -129,7 +175,7 @@ def transform(self, X): :param X: features :return: transformed features """ - return self.rt_.transform(X) + return self.onnxrt_.transform(X) def raw_transform(self, X): """ diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index c84d0f255..359723e10 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -11,12 +11,13 @@ from sklearn.base import BaseEstimator, TransformerMixin from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from skl2onnx.proto import TensorProto -from skl2onnx.helpers.onnx_helper import load_onnx_model, enumerate_model_node_outputs +from skl2onnx.helpers.onnx_helper import ( + load_onnx_model, enumerate_model_node_outputs) from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs from skl2onnx.common.data_types import ( FloatTensorType, DoubleTensorType, Int64TensorType) -from ..onnx_tools.onnx2py_helper import _var_as_dict +from ..onnx_tools.onnx2py_helper import _var_as_dict, onnx_model_opsets from ..onnxrt import OnnxInference @@ -83,6 +84,7 @@ def fit(self, X=None, y=None, **fit_params): """ from ..onnx_tools.optim.onnx_helper import change_input_first_dimension onx = onnx.load(BytesIO(self.onnx_bytes)) + self.op_version = onnx_model_opsets(onx) output_names = set( o.name for o in onx.graph.output) # pylint: disable=E1101 @@ -299,10 +301,11 @@ def clean_operator_name(name, scope): def clean_initializer_name(name, scope): return scope.get_unique_variable_name(name) - def converter(scope, operator, container): + def converter(scope, operator, container, onnx_model=None): op = operator.raw_operator - graph = op.onnxrt_.obj.graph + onx = onnx_model or op.onnxrt_.obj + graph = onx.graph name_mapping = {} node_mapping = {} for node in graph.node: @@ -350,7 +353,7 @@ def converter(scope, operator, container): container.initializers.append(tensor) # opset - for oimp in op.onnxrt_.obj.opset_import: + for oimp in onx.opset_import: container.node_domain_version_pair_sets.add( (oimp.domain, oimp.version)) From f5dc7ff4be42feb6c0002911573521679c2670e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 27 Aug 2021 12:27:43 +0200 Subject: [PATCH 03/13] refactoring --- .gitignore | 5 + _doc/examples/plot_time_tree_ensemble.py | 434 +++++++++--------- ...on.py => test_onnx_inference_to_python.py} | 0 .../test_rt_valid_model_grid_search_cv.py | 3 - _unittests/ut_tools/data/debug.onnx | Bin 0 -> 12938 bytes _unittests/ut_tools/test_export_onnx.py | 10 + mlprodict/onnx_tools/onnx_export.py | 4 + mlprodict/onnxrt/onnx_inference.py | 1 + mlprodict/onnxrt/onnx_inference_exports.py | 10 + 9 files changed, 248 insertions(+), 219 deletions(-) rename _unittests/ut_onnxrt/{test_to_python.py => test_onnx_inference_to_python.py} (100%) create mode 100644 _unittests/ut_tools/data/debug.onnx diff --git a/.gitignore b/.gitignore index c12278227..1659a0de0 100644 --- a/.gitignore +++ b/.gitignore @@ -314,3 +314,8 @@ _unittests/ut_tools/**/*.pb _unittests/ut_onnxrt/onnxruntime_profile*.json _doc/notebooks/onnxruntime_profile*.json _doc/sphinxdoc/source/phdoc_static/embed*.js +cache-*.pickle +*/*/*.pb +onnxruntime*.json +*net*.tar* +_unittests/unittests.out diff --git a/_doc/examples/plot_time_tree_ensemble.py b/_doc/examples/plot_time_tree_ensemble.py index ad75edb32..728317282 100644 --- a/_doc/examples/plot_time_tree_ensemble.py +++ b/_doc/examples/plot_time_tree_ensemble.py @@ -1,216 +1,218 @@ -""" -.. _l-example-tree-ensemble: - -Benchmark Random Forests, Tree Ensemble -======================================= - -The following script benchmarks different libraries -implementing random forests and boosting trees. -This benchmark can be replicated by installing the -following packages: - -:: - - python -m virtualenv env - cd env - pip install -i https://test.pypi.org/simple/ ort-nightly - pip install git+https://github.com/microsoft/onnxconverter-common.git@jenkins - pip install git+https://https://github.com/xadupre/sklearn-onnx.git@jenkins - pip install mlprodict matplotlib scikit-learn pandas threadpoolctl - pip install mlprodict lightgbm xgboost jinja2 - -.. contents:: - :local: - -Import -++++++ -""" -import os -import pickle -from pprint import pprint -import numpy -import pandas -import matplotlib.pyplot as plt -from xgboost import XGBClassifier -from lightgbm import LGBMClassifier -from onnxruntime import InferenceSession -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble import RandomForestClassifier -from sklearn.datasets import make_classification -from skl2onnx import to_onnx -from mlprodict.onnx_conv import register_converters -from mlprodict.onnxrt.validate.validate_helper import measure_time -from mlprodict.onnxrt import OnnxInference - -############################# -# Registers new converters for :epkg:`sklearn-onnx`. -register_converters() - -######################################### -# Problem -# +++++++ - -max_depth = 7 -n_classes = 5 -n_estimators = 100 -n_features = 10 -REPEAT = 3 -NUMBER = 1 -train, test = 2000, 10000 - -print('dataset') -X_, y_ = make_classification(n_samples=train + test, n_features=n_features, - n_classes=n_classes, n_informative=n_features - 3) -X_ = X_.astype(numpy.float32) -y_ = y_.astype(numpy.int64) -X_train, X_test = X_[:train], X_[train:] -y_train, y_test = y_[:train], y_[train:] - -compilation = [] - - -def train_cache(model, X_train, y_train, max_depth, n_estimators, n_classes): - name = "cache-{}-N{}-f{}-d{}-e{}-cl{}.pkl".format( - model.__class__.__name__, X_train.shape[0], X_train.shape[1], - max_depth, n_estimators, n_classes) - if os.path.exists(name): - with open(name, 'rb') as f: - return pickle.load(f) - else: - model.fit(X_train, y_train) - with open(name, 'wb') as f: - pickle.dump(model, f) - return model - - -######################################## -# RandomForestClassifier -# ++++++++++++++++++++++ - -rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -rf = train_cache(rf, X_train, y_train, max_depth, n_estimators, n_classes) - -res = measure_time(rf.predict_proba, X_test[:10], - repeat=REPEAT, number=NUMBER, - div_by_number=True, first_run=True) -res['model'], res['runtime'] = rf.__class__.__name__, 'INNER' -pprint(res) - -######################################## -# ONNX -# ++++ - - -def measure_onnx_runtime(model, xt, repeat=REPEAT, number=NUMBER, - verbose=True): - if verbose: - print(model.__class__.__name__) - - res = measure_time(model.predict_proba, xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'INNER' - res['N'] = X_test.shape[0] - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - onx = to_onnx(model, X_train[:1], options={id(model): {'zipmap': False}}) - - oinf = OnnxInference(onx) - res = measure_time(lambda x: oinf.run({'X': x}), xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'NPY/C++' - res['N'] = X_test.shape[0] - res['size'] = len(onx.SerializeToString()) - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - sess = InferenceSession(onx.SerializeToString()) - res = measure_time(lambda x: sess.run(None, {'X': x}), xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'ORT' - res['N'] = X_test.shape[0] - res['size'] = len(onx.SerializeToString()) - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - -compilation.extend(list(measure_onnx_runtime(rf, X_test))) - - -######################################## -# HistGradientBoostingClassifier -# ++++++++++++++++++++++++++++++ - -hist = HistGradientBoostingClassifier( - max_iter=n_estimators, max_depth=max_depth) -print('train') -hist = train_cache(hist, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(hist, X_test))) - -######################################## -# LightGBM -# ++++++++ - -lgb = LGBMClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -lgb = train_cache(lgb, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(lgb, X_test))) - -######################################## -# XGBoost -# +++++++ - -xgb = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -xgb = train_cache(xgb, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(xgb, X_test))) - -############################################## -# Summary -# +++++++ -# -# All data -name = 'plot_time_tree_ensemble' -df = pandas.DataFrame(compilation) -df.to_csv('%s.csv' % name, index=False) -df.to_excel('%s.xlsx' % name, index=False) -df - -######################################### -# Time per model and runtime. -piv = df.pivot("model", "runtime", "average") -piv - -########################################### -# Graphs. -ax = piv.T.plot(kind="bar") -ax.set_title("Computation time ratio for %d observations and %d features\n" - "lower is better for onnx runtimes" % X_test.shape) -plt.savefig('%s.png' % name) - -########################################### -# Available optimisation on this machine: - -from mlprodict.testing.experimental_c import code_optimisation -print(code_optimisation()) - -plt.show() +""" +.. _l-example-tree-ensemble: + +Benchmark Random Forests, Tree Ensemble +======================================= + +The following script benchmarks different libraries +implementing random forests and boosting trees. +This benchmark can be replicated by installing the +following packages: + +:: + + python -m virtualenv env + cd env + pip install -i https://test.pypi.org/simple/ ort-nightly + pip install git+https://github.com/microsoft/onnxconverter-common.git@jenkins + pip install git+https://https://github.com/xadupre/sklearn-onnx.git@jenkins + pip install mlprodict matplotlib scikit-learn pandas threadpoolctl + pip install mlprodict lightgbm xgboost jinja2 + +.. contents:: + :local: + +Import +++++++ +""" +import os +import pickle +from pprint import pprint +import numpy +import pandas +import matplotlib.pyplot as plt +from xgboost import XGBClassifier +from lightgbm import LGBMClassifier +from onnxruntime import InferenceSession +from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +from skl2onnx import to_onnx +from mlprodict.onnx_conv import register_converters +from mlprodict.onnxrt.validate.validate_helper import measure_time +from mlprodict.onnxrt import OnnxInference + +############################# +# Registers new converters for :epkg:`sklearn-onnx`. +register_converters() + +######################################### +# Problem +# +++++++ + +max_depth = 7 +n_classes = 20 +n_estimators = 500 +n_features = 100 +REPEAT = 3 +NUMBER = 1 +train, test = 1000, 10000 + +print('dataset') +X_, y_ = make_classification(n_samples=train + test, n_features=n_features, + n_classes=n_classes, n_informative=n_features - 3) +X_ = X_.astype(numpy.float32) +y_ = y_.astype(numpy.int64) +X_train, X_test = X_[:train], X_[train:] +y_train, y_test = y_[:train], y_[train:] + +compilation = [] + + +def train_cache(model, X_train, y_train, max_depth, n_estimators, n_classes): + name = "cache-{}-N{}-f{}-d{}-e{}-cl{}.pkl".format( + model.__class__.__name__, X_train.shape[0], X_train.shape[1], + max_depth, n_estimators, n_classes) + if os.path.exists(name): + with open(name, 'rb') as f: + return pickle.load(f) + else: + model.fit(X_train, y_train) + with open(name, 'wb') as f: + pickle.dump(model, f) + return model + + +######################################## +# RandomForestClassifier +# ++++++++++++++++++++++ + +rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) +print('train') +rf = train_cache(rf, X_train, y_train, max_depth, n_estimators, n_classes) + +res = measure_time(rf.predict_proba, X_test[:10], + repeat=REPEAT, number=NUMBER, + div_by_number=True, first_run=True) +res['model'], res['runtime'] = rf.__class__.__name__, 'INNER' +pprint(res) + +######################################## +# ONNX +# ++++ + + +def measure_onnx_runtime(model, xt, repeat=REPEAT, number=NUMBER, + verbose=True): + if verbose: + print(model.__class__.__name__) + + res = measure_time(model.predict_proba, xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'INNER' + res['N'] = X_test.shape[0] + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + onx = to_onnx(model, X_train[:1], options={id(model): {'zipmap': False}}) + + oinf = OnnxInference(onx) + res = measure_time(lambda x: oinf.run({'X': x}), xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'NPY/C++' + res['N'] = X_test.shape[0] + res['size'] = len(onx.SerializeToString()) + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + sess = InferenceSession(onx.SerializeToString()) + res = measure_time(lambda x: sess.run(None, {'X': x}), xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'ORT' + res['N'] = X_test.shape[0] + res['size'] = len(onx.SerializeToString()) + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + +compilation.extend(list(measure_onnx_runtime(rf, X_test))) + + +######################################## +# HistGradientBoostingClassifier +# ++++++++++++++++++++++++++++++ + +hist = HistGradientBoostingClassifier( + max_iter=n_estimators, max_depth=max_depth) +print('train') +hist = train_cache(hist, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(hist, X_test))) + +######################################## +# LightGBM +# ++++++++ + +lgb = LGBMClassifier(n_estimators=n_estimators, + max_depth=max_depth, pred_early_stop=False) +print('train') +lgb = train_cache(lgb, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(lgb, X_test))) + +######################################## +# XGBoost +# +++++++ + +xgb = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth) +print('train') +xgb = train_cache(xgb, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(xgb, X_test))) + +############################################## +# Summary +# +++++++ +# +# All data +name = 'plot_time_tree_ensemble' +df = pandas.DataFrame(compilation) +df.to_csv('%s.csv' % name, index=False) +df.to_excel('%s.xlsx' % name, index=False) +df + +######################################### +# Time per model and runtime. +piv = df.pivot("model", "runtime", "average") +piv + +########################################### +# Graphs. +ax = piv.T.plot(kind="bar") +ax.set_title("Computation time ratio for %d observations and %d features\n" + "lower is better for onnx runtimes" % X_test.shape) +plt.savefig('%s.png' % name) + +########################################### +# Available optimisation on this machine: + +from mlprodict.testing.experimental_c import code_optimisation +print(code_optimisation()) + +plt.show() diff --git a/_unittests/ut_onnxrt/test_to_python.py b/_unittests/ut_onnxrt/test_onnx_inference_to_python.py similarity index 100% rename from _unittests/ut_onnxrt/test_to_python.py rename to _unittests/ut_onnxrt/test_onnx_inference_to_python.py diff --git a/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py b/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py index 325f4a809..61e6b2ee3 100644 --- a/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py +++ b/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py @@ -2,7 +2,6 @@ @brief test log(time=9s) """ import unittest -from logging import getLogger from pyquickhelper.loghelper import fLOG from pyquickhelper.pycode import ExtTestCase from sklearn.exceptions import ConvergenceWarning @@ -19,8 +18,6 @@ class TestRtValidateGridSearchCV(ExtTestCase): @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) def test_rt_grid_search_cv(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") - logger = getLogger('skl2onnx') - logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 buffer = [] diff --git a/_unittests/ut_tools/data/debug.onnx b/_unittests/ut_tools/data/debug.onnx new file mode 100644 index 0000000000000000000000000000000000000000..04e69a909788095004eeedd1741463cb513661ec GIT binary patch literal 12938 zcmeHOZ)jWB6_*^_l5gz9z3!j%ZQX(>rB&P1?>#C1x+bpNX0%DWtxnn{tW{*`DHV|= zSCTt>Ulvv<8}vh=qma-Kfk8*Nd?*YCg?6$%av3WGui8-u`L-vawI=ziy(d*A;T zEBicyE!}s|`JLZ6_uTW&yON6qSsY&MI{jX6)0(jl968_~XnAjz3XRSIy7q9+$sN7X z5`Ui%Q=Q)W#z^d5Y1UVojb3v&vgOp7PRm~87uzc8DZ4aD66Ie{h{@G{Ys>lNgjf`X zp0_!wcY3m@-t;#{*4}2TGaR+*&$YbXs59Cs70tV&Iq{&cAtp&fPPRJjwl%%7QD50u zE$zY|NSzVtGQ5DrI)TcUy!uOCYoqC{Y^<%kvC$ZKrMlbP@?KHA z=y~g{&f0Ka?rMJGXim=l0L~jS=kD{V$XOwZc{wd|Vxl;CZq`u4D8D){eAncpxng+n zm8+CX@Lxrar}kepJoNCg)4CC>|uBE;!_{d9l*BzCCWjrT7% zx*Ohb(U!9qJlAV=n%>ZcF)apTBwgMk3f;!wf;SwgO#s)YhyB6GYpLu6m!Al>9Wg-0 zv;k$z;9CU^KQ8(q5uqpNF7|p`w4qUSXo=MDM7J>-c|E&S(CHW&x@Hx zx7+{e&%8lj*GiON!!ZfGM_KZL^?GQu+rvJi(R^j_UR@vbSL@HBsCPlzku!W@d9iBE zhUTiJDJ6sKSWcqs2}zRkK8PCuaRZPeNf6vVI>SJ<5>c%Ls>>o4g4;OZL_nMXWGMj> z_n~M4VeJli7a$x?AjBN7Bev~8^GOlJM{2WrSGva7a$`7hOOy0co)HtHz9|_Y~G4pj?e9SHsHHH08s>4~d-PkZ9JH(6IvBG|doaCuY_On{{?jwqwe6SQ$k(8Ld}D zEWXsFAj@>x4^&grTC7;}?1A6ZR7x}c`$#uKeFZi_9JrDWTulI1?-00}4ty{HeDDr| zopfL)0qpDmd|E`aLF4Mcn-E(%8;yxz(*D~PK=^skaKePk=^PXTKcZ@Oy|wgF8o1eHUq-32+N{ojP5L(@>t#353`~k zBYj9NBuU%m-U7n5p?yK5xF;qq+ENA&A@x?f-t2d6>)zA`(itb*S%#bxLr#XF$~cBv zsUGDo%DWN{S?LY&I94u*vW{u~ZdiYe}vNrX`tC zSxX&3q%SD&?VEZT+MsCNbzSryi-fNj?I^TnC(E_8oTys&r6!-fIF4^mi{$EyVzb*> zudj7_=3Qe`3c9xLwUF}GV?1bVl3iWi^ zDDF-uE~8K@oQ+c138lg))Dttxk3=L}#2)qBrVFL*$Y~^phuXnXV?JCZLtWj8Se1!U zzs~lQ@9IO^%OYZ4nNwm*tFWE?}rxf2{Wf#Vt+_fBvs37m?-soV)pHGxw# zIMpm1FS@7jX+?CIvAqQEbMqgWd@7K=bEpH z=&Y3dJguD5r=r&Mxz73vjrEd#EkA&?q0?LS=s!gNJUUnzF6Hn=g$eqiLihvcr{ef? zjlqSr#-{#oo!jM%V|6$+3XWZ)it;-R`tZEo=1K;Ld~QOMahx>PVdg7h2lT{Q z!(P)981w!EZsd3XpCZQu<&+8{my_rxk@^_z3Be+ARh<9k6pQZ|8a#HJs${*{{DC6W6(H$=FEort%XImY&I|f z15Q}WfWuIV0bg|LiQj&Ye%Zj8r0AAYo-5|r2lXjxd4c1$v{VaLl%%>*tfUM=regoRm>Z}PR+Nnd%rcoHt1B-^P!`K+Ibte@KL zzXntO(@gPXaS}25cqW&-x`eb`!V(>)JfQKF%V+g?`&hgEA?DBH_qv*=G0nT~AN2Tr z$e!}`uUx*o)NY^f$2YdGDS6gsJlB0o<842c%Y6*_w|%@D+aD->V~_X{K5#X^j};&A zP-B*FYyIt~Zfw7U`6XcRyzBlZWS{*be`p?<@b_KlF8E{8CwoSp<8xm5L(5Eft~%f%7N0{upGo$cDnB zG5NthTm#M$ei@(bomFF+CqA^^?e_b)b{%+MKzD&`sTjfsrB7>4F(p2^+PLAV%@!`g2Xm6E5*3d&IXE`WkI{fo}#=wa~XC%cb~DaHG!~$)_`j$ z?cH|U)GWI9UW3oH|Gt9!wvsnB7VG5KWTVe}8rCDIF|^hkQ?Bte4)|_A zMSFq9l>e}g@h2Gbek1<0mSmId)7_EzvVX*va+Bjhd`X^in*5cXQ#fR@9 zImmKNHaSf-iTPZI{48N>-bntzJ}&z z^6gV#OhcDymda<0sYY>4V7*z=QSuaLl4ldCiqtLeEz?-yQ4S& literal 0 HcmV?d00001 diff --git a/_unittests/ut_tools/test_export_onnx.py b/_unittests/ut_tools/test_export_onnx.py index b652ca018..0db8551ee 100644 --- a/_unittests/ut_tools/test_export_onnx.py +++ b/_unittests/ut_tools/test_export_onnx.py @@ -672,6 +672,9 @@ def test_export_onnx(self): self.assertEqualArray(y['y'], y1['y']) self.assertEqualArray(y['y'], y2['y']) + code2 = oinf.to_onnx_code() + self.assertEqual(new_onnx, code2) + def verify_tf(self, content): try: left, __ = verify_code(content, exc=False) @@ -1099,6 +1102,13 @@ def onnx_rfft_2d_any_test(x, fft_length): self.assertNotIn("numpy.", code) # print(code) + def test_sub_graph(self): + data = os.path.abspath(os.path.dirname(__file__)) + debug = os.path.join(data, "data", "debug.onnx") + self.assertRaise(lambda: export2onnx(debug), NotImplementedError) + # new_onnx = export2onnx(debug) + # _, loc = self.verify(new_onnx) + if __name__ == "__main__": # TestExportOnnx().test_simple_configuration() diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index 5afd6729c..815a7e323 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -414,6 +414,10 @@ def rename_name(name): for at in node.attribute: temp = _var_as_dict(at) value = temp['value'] + if node.op_type in {'Scan', 'Loop', 'If'}: + raise NotImplementedError( + "Subgraph are not yet implemented (operator=%r)." + "" % node.op_type) if use_onnx_tensor: if node.op_type == 'Cast' and at.name == 'to': attributes.append( diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py index 763e3a430..47e3a16d2 100644 --- a/mlprodict/onnxrt/onnx_inference.py +++ b/mlprodict/onnxrt/onnx_inference.py @@ -221,6 +221,7 @@ def _init(self): self.to_dot = self.exporters_.to_dot self.to_python = self.exporters_.to_python self.to_text = self.exporters_.to_text + self.to_onnx_code = self.exporters_.to_onnx_code if self.runtime in ('python_compiled', 'python_compiled_debug'): # switch the inference method to the compiled one diff --git a/mlprodict/onnxrt/onnx_inference_exports.py b/mlprodict/onnxrt/onnx_inference_exports.py index 91ead76b5..76ebe7173 100644 --- a/mlprodict/onnxrt/onnx_inference_exports.py +++ b/mlprodict/onnxrt/onnx_inference_exports.py @@ -11,6 +11,7 @@ from onnx import numpy_helper from ..onnx_tools.onnx2py_helper import _var_as_dict, _type_to_string from ..tools.graphs import onnx2bigraph +from ..onnx_tools.onnx_export import export2onnx class OnnxInferenceExport: @@ -599,3 +600,12 @@ def to_text(self, recursive=False, grid=5, distance=5): bigraph = onnx2bigraph(self.oinf.obj, recursive=recursive) graph = bigraph.display_structure(grid=grid, distance=distance) return graph.to_text() + + def to_onnx_code(self): + """ + Exports the ONNX graph into an :epkg:`onnx` code + which replicates it. + + :return: string + """ + return export2onnx(self.oinf.obj) From f4ae028a6c28c3f14ad5c2d51468d6a4d0a84d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 28 Aug 2021 13:26:12 +0200 Subject: [PATCH 04/13] documentation --- mlprodict/onnx_tools/onnx_export.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index 815a7e323..c721ee72a 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -517,6 +517,7 @@ def export2onnx(model_onnx, opset=None, verbose=True, name=None, rename=False, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -558,6 +559,7 @@ def export2tf2onnx(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -601,6 +603,7 @@ def export2numpy(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -621,6 +624,7 @@ def export2numpy(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from mlprodict.testing.einsum import decompose_einsum_equation From 8bf9a35ac5e8de672e7dff977aed6855d098c307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 29 Aug 2021 01:40:16 +0200 Subject: [PATCH 05/13] convert to nump --- .../test_onnx_speedup_transformer.py | 51 +++++++++++ mlprodict/onnx_tools/onnx_export.py | 1 + mlprodict/sklapi/onnx_speed_up.py | 87 ++++++++++++++++++- 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index b7308393f..4bc2035cf 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -33,6 +33,24 @@ def test_speedup_transform32(self): spd.fit(X) spd.assert_almost_equal(X, decimal=5) + def test_speedup_transform32_onnxruntime(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer( + PCA(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + + def test_speedup_transform32_numpy(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer( + PCA(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + def test_speedup_transform64(self): data = load_iris() X, _ = data.data, data.target @@ -69,6 +87,26 @@ def test_speedup_transform64_pickle(self): got = spd2.raw_transform(X) self.assertEqualArray(expected, got) + def test_speedup_transform64_numpy_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime="numpy") + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + def test__speedup_transform64_onnx(self): data = load_iris() X, _ = data.data, data.target @@ -81,6 +119,19 @@ def test__speedup_transform64_onnx(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) + def test__speedup_transform64_onnx_numpy(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime='numpy') + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + if __name__ == '__main__': unittest.main() diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index c721ee72a..316216482 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -451,6 +451,7 @@ def rename_name(name): # graph context['name'] = name or model_onnx.graph.name + context['name'] = context['name'].replace("(", "_").replace(")", "") context['ir_version'] = model_onnx.ir_version context['producer_name'] = model_onnx.producer_name context['domain'] = model_onnx.domain diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 729241670..313df5330 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -5,15 +5,27 @@ .. versionadded:: 0.7 """ +import collections +import inspect +import io +from contextlib import redirect_stdout, redirect_stderr import numpy from numpy.testing import assert_almost_equal +from onnx import numpy_helper, helper, load from sklearn.base import BaseEstimator, TransformerMixin, clone +from sklearn.preprocessing import FunctionTransformer from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin +from ..tools.code_helper import print_code from ..onnx_conv import to_onnx +from ..onnx_tools.onnx_export import export2numpy +from ..onnx_tools.onnx2py_helper import onnx_model_opsets +from ..onnx_tools.exports.numpy_helper import ( + argmin_use_numpy_select_last_index, + make_slice) from .onnx_transformer import OnnxTransformer -class _OnnxPipelineStepSpeedUp(OnnxOperatorMixin): +class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): """ Speeds up inference by replacing methods *transform* or *predict* by a runtime for :epkg:`ONNX`. @@ -34,6 +46,7 @@ class _OnnxPipelineStepSpeedUp(OnnxOperatorMixin): def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None): + BaseEstimator.__init__(self) self.estimator = estimator self.runtime = runtime self.enforce_float32 = enforce_float32 @@ -66,12 +79,79 @@ def _build_onnx_runtime(self, onx): :param runtime: runtime type (see @see cl OnnxInference) :return: instance of @see cl OnnxInference """ + if self.runtime == 'numpy': + return self._build_onnx_runtime_numpy(onx) tr = OnnxTransformer( onx, runtime=self.runtime, enforce_float32=self.enforce_float32) tr.fit() return tr + def _build_onnx_runtime_numpy(self, onx): + """ + Builds a runtime based on numpy. + """ + st = io.BytesIO(onx) + model_onnx = load(st) + self.numpy_code_ = export2numpy(model_onnx, rename=True) + opsets = onnx_model_opsets(model_onnx) + return self._build_onnx_runtime_numpy_compile(opsets) + + def _build_onnx_runtime_numpy_compile(self, opsets): + try: + compiled_code = compile( + self.numpy_code_, '', 'exec') + except SyntaxError as e: + raise AssertionError( + "Unable to compile a script due to %r. " + "\n--CODE--\n%s" + "" % (e, print_code(self.numpy_code_))) from e + + glo = globals().copy() + loc = { + 'numpy': numpy, 'dict': dict, 'list': list, + 'print': print, 'sorted': sorted, + 'collections': collections, 'inspect': inspect, + 'helper': helper, + 'argmin_use_numpy_select_last_index': + argmin_use_numpy_select_last_index, + 'make_slice': make_slice} + out = io.StringIO() + err = io.StringIO() + with redirect_stdout(out): + with redirect_stderr(err): + try: + exec(compiled_code, glo, loc) # pylint: disable=W0122 + except Exception as e: + raise AssertionError( + "Unable to execute a script due to %r. " + "\n--OUT--\n%s\n--ERR--\n%s\n--CODE--\n%s" + "" % (e, out.getvalue(), err.getvalue(), + print_code(self.numpy_code_))) from e + names = [k for k in loc if k.startswith('numpy_')] + if len(names) != 1: + raise RuntimeError( + "Unable to guess which function is the one, names=%r." + "" % list(sorted(names))) + fct = loc[names[0]] + cl = FunctionTransformer(fct, accept_sparse=True) + cl.op_version = opsets[''] + return cl + + def __getstate__(self): + state = BaseEstimator.__getstate__(self) + if 'numpy_code_' in state: + del state['onnxrt_'] + return state + + def __setstate__(self, state): + BaseEstimator.__setstate__(self, state) + if 'numpy_code_' in state: + st = io.BytesIO(state['onnx_']) + model_onnx = load(st) + opsets = onnx_model_opsets(model_onnx) + self.onnxrt_ = self._build_onnx_runtime_numpy_compile(opsets) + def fit(self, X, *args, **kwargs): """ Fits the estimator, converts to ONNX. @@ -102,6 +182,8 @@ def onnx_parser(self, scope=None, inputs=None): Returns a parser for this model. """ self._check_fitted_() + if isinstance(self.onnxrt_, FunctionTransformer): + raise NotImplementedError() return self.onnxrt_.onnx_parser(scope, inputs) def onnx_shape_calculator(self): @@ -131,7 +213,7 @@ def converter(scope, operator, container): return converter -class OnnxSpeedUpTransformer(BaseEstimator, TransformerMixin, +class OnnxSpeedUpTransformer(TransformerMixin, _OnnxPipelineStepSpeedUp): """ Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. @@ -152,7 +234,6 @@ class OnnxSpeedUpTransformer(BaseEstimator, TransformerMixin, def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None): - BaseEstimator.__init__(self) _OnnxPipelineStepSpeedUp.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, target_opset=target_opset, conv_options=conv_options) From b665641b77da5a72f43abbc4ab5940c8989c577e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 29 Aug 2021 12:43:43 +0200 Subject: [PATCH 06/13] add numpy runtime --- _doc/sphinxdoc/source/api/tools.rst | 108 ++++++++++++++++++ .../test_onnx_speedup_transformer.py | 4 +- _unittests/ut_tools/test_onnx2py_helper.py | 17 +++ .../onnx_tools/exports/skl2onnx_helper.py | 88 ++++++++++++++ .../onnx_tools/exports/tf2onnx_helper.py | 21 ++-- mlprodict/onnx_tools/onnx2py_helper.py | 18 ++- mlprodict/sklapi/onnx_speed_up.py | 74 ++++++++++-- mlprodict/sklapi/onnx_transformer.py | 71 +----------- 8 files changed, 311 insertions(+), 90 deletions(-) create mode 100644 _unittests/ut_tools/test_onnx2py_helper.py create mode 100644 mlprodict/onnx_tools/exports/skl2onnx_helper.py diff --git a/_doc/sphinxdoc/source/api/tools.rst b/_doc/sphinxdoc/source/api/tools.rst index 3222eab1f..ea4b12ae4 100644 --- a/_doc/sphinxdoc/source/api/tools.rst +++ b/_doc/sphinxdoc/source/api/tools.rst @@ -138,3 +138,111 @@ Versions .. autosignature:: mlprodict.tools.asv_options_helper.get_ir_version_from_onnx .. autosignature:: mlprodict.tools.asv_options_helper.get_opset_number_from_onnx + +Type conversion +=============== + +.. autosignature:: mlprodict.onnx_conv.convert.guess_initial_types + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_numpy_type_from_string + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_numpy_type_from_dtype + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_proto_dtype + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_proto_dtype_name + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_dtype + +In :epkg:`sklearn-onnx`: + +* `skl2onnx.algebra.type_helper.guess_initial_types` +* `skl2onnx.common.data_types.guess_data_type` +* `skl2onnx.common.data_types.guess_numpy_type` +* `skl2onnx.common.data_types.guess_proto_type` +* `skl2onnx.common.data_types.guess_tensor_type` +* `skl2onnx.common.data_types._guess_type_proto` +* `skl2onnx.common.data_types._guess_numpy_type` + +The last example summarizes all the possibilities. + +.. runpython:: + :showcode: + :process: + + import numpy + from onnx import TensorProto + + from skl2onnx.algebra.type_helper import guess_initial_types + from skl2onnx.common.data_types import guess_data_type + from skl2onnx.common.data_types import guess_numpy_type + from skl2onnx.common.data_types import guess_proto_type + from skl2onnx.common.data_types import guess_tensor_type + from skl2onnx.common.data_types import _guess_type_proto + from skl2onnx.common.data_types import _guess_numpy_type + from skl2onnx.common.data_types import DoubleTensorType + + from mlprodict.onnx_conv.convert import guess_initial_types as guess_initial_types_mlprodict + from mlprodict.onnx_tools.onnx2py_helper import guess_numpy_type_from_string + from mlprodict.onnx_tools.onnx2py_helper import guess_numpy_type_from_dtype + from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype + from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype_name + from mlprodict.onnx_tools.onnx2py_helper import guess_dtype + + + def guess_initial_types0(t): + return guess_initial_types(numpy.array([[0, 1]], dtype=t), None) + + + def guess_initial_types1(t): + return guess_initial_types(None, [('X', t)]) + + + def guess_initial_types_mlprodict0(t): + return guess_initial_types_mlprodict(numpy.array([[0, 1]], dtype=t), None) + + + def guess_initial_types_mlprodict1(t): + return guess_initial_types_mlprodict(None, [('X', t)]) + + + def _guess_type_proto1(t): + return _guess_type_proto(t, [None, 4]) + + + def _guess_numpy_type1(t): + return _guess_numpy_type(t, [None, 4]) + + + fcts = [guess_initial_types0, guess_initial_types1, + guess_data_type, guess_numpy_type, + guess_proto_type, guess_tensor_type, + _guess_type_proto1, + _guess_numpy_type1, + guess_initial_types_mlprodict0, + guess_initial_types_mlprodict1, + guess_numpy_type_from_string, + guess_numpy_type_from_dtype, + guess_proto_dtype_name, guess_dtype] + + values = [numpy.float64, float, 'double', 'tensor(double)', + DoubleTensorType([None, 4]), + TensorProto.DOUBLE] + + print("---SUCCESS------------") + errors = [] + for f in fcts: + print("") + for v in values: + try: + r = f(v) + print("%s(%r) -> %r" % (f.__name__, v, r)) + except Exception as e: + errors.append("%s(%r) -> %r" % (f.__name__, v, e)) + errors.append("") + + print() + print('---ERRORS-------------') + print() + for e in errors: + print(e) diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index 4bc2035cf..db7e5491d 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -107,7 +107,7 @@ def test_speedup_transform64_numpy_pickle(self): got = spd2.raw_transform(X) self.assertEqualArray(expected, got) - def test__speedup_transform64_onnx(self): + def test_speedup_transform64_onnx(self): data = load_iris() X, _ = data.data, data.target spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), @@ -119,7 +119,7 @@ def test__speedup_transform64_onnx(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) - def test__speedup_transform64_onnx_numpy(self): + def test_speedup_transform64_onnx_numpy(self): data = load_iris() X, _ = data.data, data.target spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), diff --git a/_unittests/ut_tools/test_onnx2py_helper.py b/_unittests/ut_tools/test_onnx2py_helper.py new file mode 100644 index 000000000..5de32f65e --- /dev/null +++ b/_unittests/ut_tools/test_onnx2py_helper.py @@ -0,0 +1,17 @@ +""" +@brief test log(time=2s) +""" +import unittest +from pyquickhelper.pycode import ExtTestCase +from mlprodict.onnx_tools.onnx2py_helper import to_skl2onnx_type + + +class TestOnnx2PyHelper(ExtTestCase): + + def test_to_skl2onnx_type(self): + r = to_skl2onnx_type('NA', 'double', (0, 15)) + self.assertEqual(repr(r), "('NA', DoubleTensorType(shape=[None, 15]))") + + +if __name__ == "__main__": + unittest.main() diff --git a/mlprodict/onnx_tools/exports/skl2onnx_helper.py b/mlprodict/onnx_tools/exports/skl2onnx_helper.py new file mode 100644 index 000000000..74b1c1742 --- /dev/null +++ b/mlprodict/onnx_tools/exports/skl2onnx_helper.py @@ -0,0 +1,88 @@ +""" +@file +@brief Helpers to run examples created with :epkg:`sklearn-onnx`. +""" +import numpy +from onnx import helper, TensorProto + + +def _copy_inout(inout, scope, new_name): + shape = [s.dim_value for s in inout.type.tensor_type.shape.dim] + value_info = helper.make_tensor_value_info( + new_name, inout.type.tensor_type.elem_type, shape) + return value_info + + +def _clean_variable_name(name, scope): + return scope.get_unique_variable_name(name) + + +def _clean_operator_name(name, scope): + return scope.get_unique_operator_name(name) + + +def _clean_initializer_name(name, scope): + return scope.get_unique_variable_name(name) + + +def add_onnx_graph(scope, operator, container, onx): + """ + Adds a whole ONNX graph to an existing one following + :epkg:`skl2onnx` API. + + :param scope: scope (to get unique names) + :param operator: operator + :param container: container + :param onx: ONNX graph + """ + graph = onx.graph + name_mapping = {} + node_mapping = {} + for node in graph.node: + name = node.name + if name is not None: + node_mapping[node.name] = _clean_initializer_name( + node.name, scope) + for o in node.input: + name_mapping[o] = _clean_variable_name(o, scope) + for o in node.output: + name_mapping[o] = _clean_variable_name(o, scope) + for o in graph.initializer: + name_mapping[o.name] = _clean_operator_name(o.name, scope) + + inputs = [_copy_inout(o, scope, name_mapping[o.name]) + for o in graph.input] + outputs = [_copy_inout(o, scope, name_mapping[o.name]) + for o in graph.output] + + for inp, to in zip(operator.inputs, inputs): + n = helper.make_node('Identity', [inp.onnx_name], [to.name], + name=_clean_operator_name('Identity', scope)) + container.nodes.append(n) + + for inp, to in zip(outputs, operator.outputs): + n = helper.make_node('Identity', [inp.name], [to.onnx_name], + name=_clean_operator_name('Identity', scope)) + container.nodes.append(n) + + for node in graph.node: + n = helper.make_node( + node.op_type, + [name_mapping[o] for o in node.input], + [name_mapping[o] for o in node.output], + name=node_mapping[node.name] if node.name else None, + domain=node.domain if node.domain else None) + n.attribute.extend(node.attribute) # pylint: disable=E1101 + container.nodes.append(n) + + for o in graph.initializer: + as_str = o.SerializeToString() + tensor = TensorProto() + tensor.ParseFromString(as_str) + tensor.name = name_mapping[o.name] + container.initializers.append(tensor) + + # opset + for oimp in onx.opset_import: + container.node_domain_version_pair_sets.add( + (oimp.domain, oimp.version)) diff --git a/mlprodict/onnx_tools/exports/tf2onnx_helper.py b/mlprodict/onnx_tools/exports/tf2onnx_helper.py index 1a4b272d0..c4b51dc7f 100644 --- a/mlprodict/onnx_tools/exports/tf2onnx_helper.py +++ b/mlprodict/onnx_tools/exports/tf2onnx_helper.py @@ -446,13 +446,15 @@ def make_slice(self, kwargs, name=None, shapes=None, dtypes=None, return_node=Fa make_sure(dtype == self.graph.get_dtype( input_data), "dtype should be same") - node = self.graph.make_node(op_type="Slice", inputs=inputs, attr=attr, name=name, - outputs=outputs, shapes=shapes, dtypes=dtypes) + node = self.graph.make_node(op_type="Slice", inputs=inputs, attr=attr, + name=name, outputs=outputs, shapes=shapes, + dtypes=dtypes) if return_node: return node raise NotImplementedError("return_node must be True") - def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node=False, op_name_scope=None): + def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, + return_node=False, op_name_scope=None): """ Squeeze changes its schema at opset 13: it treats axes as a dynamic input kwargs: key could be ["data", "axes"]. @@ -487,13 +489,15 @@ def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node= while inputs[-1] == "": inputs = inputs[:-1] - node = self.graph.make_node(op_type="Squeeze", inputs=inputs, attr=attr, name=name, - outputs=outputs) + node = self.graph.make_node( + op_type="Squeeze", inputs=inputs, attr=attr, name=name, + outputs=outputs) if return_node: return node raise NotImplementedError("return_node must be True") - def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node=False, op_name_scope=None): + def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, + return_node=False, op_name_scope=None): """ Unsqueeze changes its schema at opset 13: it treats axes as a dynamic input kwargs: key could be ["data", "axes"]. @@ -528,8 +532,9 @@ def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, return_nod while inputs[-1] == "": inputs = inputs[:-1] - node = self.graph.make_node(op_type="Unsqueeze", inputs=inputs, attr=attr, name=name, - outputs=outputs) + node = self.graph.make_node( + op_type="Unsqueeze", inputs=inputs, attr=attr, name=name, + outputs=outputs) if return_node: return node raise NotImplementedError("return_node must be True") diff --git a/mlprodict/onnx_tools/onnx2py_helper.py b/mlprodict/onnx_tools/onnx2py_helper.py index 4c023254e..e4d3164bb 100644 --- a/mlprodict/onnx_tools/onnx2py_helper.py +++ b/mlprodict/onnx_tools/onnx2py_helper.py @@ -9,6 +9,7 @@ from scipy.sparse import coo_matrix from onnx import onnx_pb as onnx_proto, TensorProto from onnx.numpy_helper import to_array, from_array +from skl2onnx.common.data_types import _guess_numpy_type def to_bytes(val): @@ -158,7 +159,7 @@ def guess_numpy_type_from_dtype(dt): if dt == numpy.dtype('float32'): return numpy.float32 if dt == numpy.dtype('float64'): - return numpy.floa64 + return numpy.float64 if dt == numpy.dtype('int64'): return numpy.int64 if dt == numpy.dtype('int8'): @@ -565,3 +566,18 @@ def guess_dtype(proto_type): raise ValueError( "Unable to convert proto_type {} to numpy type.".format( proto_type)) + + +def to_skl2onnx_type(name, elem_type, shape): + """ + Converts *name*, *elem_type*, *shape* into a + :epkg:`sklearn-onnx` type. + + :param name: string + :param elem_type: tensor of elements of this type + :param shape: expected shape + :return: data type + """ + elem = guess_numpy_type_from_string(elem_type) + shape = list(None if d == 0 else d for d in shape) + return (name, _guess_numpy_type(elem, shape)) diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 313df5330..5c49ddd89 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -11,17 +11,18 @@ from contextlib import redirect_stdout, redirect_stderr import numpy from numpy.testing import assert_almost_equal -from onnx import numpy_helper, helper, load +from onnx import helper, load from sklearn.base import BaseEstimator, TransformerMixin, clone from sklearn.preprocessing import FunctionTransformer from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from ..tools.code_helper import print_code -from ..onnx_conv import to_onnx from ..onnx_tools.onnx_export import export2numpy -from ..onnx_tools.onnx2py_helper import onnx_model_opsets +from ..onnx_tools.onnx2py_helper import ( + onnx_model_opsets, _var_as_dict, to_skl2onnx_type) from ..onnx_tools.exports.numpy_helper import ( - argmin_use_numpy_select_last_index, - make_slice) + argmin_use_numpy_select_last_index, make_slice) +from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph +from ..onnx_conv import to_onnx from .onnx_transformer import OnnxTransformer @@ -90,14 +91,32 @@ def _build_onnx_runtime(self, onx): def _build_onnx_runtime_numpy(self, onx): """ Builds a runtime based on numpy. + Exports the ONNX graph into python code + based on numpy and then dynamically compiles + it with method @see me _build_onnx_runtime_numpy_compile. """ - st = io.BytesIO(onx) - model_onnx = load(st) + model_onnx = load(io.BytesIO(onx)) + self.onnx_io_names_ = {'inputs': [], 'outputs': []} + for inp in model_onnx.graph.input: # pylint: disable=E1101 + d = _var_as_dict(inp) + self.onnx_io_names_['inputs'].append((d['name'], d['type'])) + for inp in model_onnx.graph.output: # pylint: disable=E1101 + d = _var_as_dict(inp) + self.onnx_io_names_['outputs'].append((d['name'], d['type'])) + self.onnx_io_names_['skl2onnx_inputs'] = [ + to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape']) + for d in self.onnx_io_names_['inputs']] + self.onnx_io_names_['skl2onnx_outputs'] = [ + to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape']) + for d in self.onnx_io_names_['outputs']] self.numpy_code_ = export2numpy(model_onnx, rename=True) opsets = onnx_model_opsets(model_onnx) return self._build_onnx_runtime_numpy_compile(opsets) def _build_onnx_runtime_numpy_compile(self, opsets): + """ + Second part of @see me _build_onnx_runtime_numpy. + """ try: compiled_code = compile( self.numpy_code_, '', 'exec') @@ -139,16 +158,25 @@ def _build_onnx_runtime_numpy_compile(self, opsets): return cl def __getstate__(self): + """ + :epkg:`pickle` does not support functions. + This method removes any link to function + when the runtime is `'numpy'`. + """ state = BaseEstimator.__getstate__(self) if 'numpy_code_' in state: del state['onnxrt_'] return state def __setstate__(self, state): + """ + :epkg:`pickle` does not support functions. + This method restores the function created when + the runtime is `'numpy'`. + """ BaseEstimator.__setstate__(self, state) if 'numpy_code_' in state: - st = io.BytesIO(state['onnx_']) - model_onnx = load(st) + model_onnx = load(io.BytesIO(state['onnx_'])) opsets = onnx_model_opsets(model_onnx) self.onnxrt_ = self._build_onnx_runtime_numpy_compile(opsets) @@ -183,7 +211,10 @@ def onnx_parser(self, scope=None, inputs=None): """ self._check_fitted_() if isinstance(self.onnxrt_, FunctionTransformer): - raise NotImplementedError() + def parser(): + # Types should be included as well. + return [r[0] for r in self.onnx_io_names_['skl2onnx_outputs']] + return parser return self.onnxrt_.onnx_parser(scope, inputs) def onnx_shape_calculator(self): @@ -191,6 +222,19 @@ def onnx_shape_calculator(self): Returns a shape calculator for this transform. """ self._check_fitted_() + + if isinstance(self.onnxrt_, FunctionTransformer): + def fct_shape_calculator(operator): + # Types should be included as well. + outputs = self.onnx_io_names_['skl2onnx_outputs'] + if len(operator.outputs) != len(outputs): + raise RuntimeError( # pragma: no cover + "Mismatch between parser and shape calculator, " + "%r != %r." % (outputs, operator.outputs)) + for a, b in zip(operator.outputs, outputs): + a.type = b[1] + return fct_shape_calculator + calc = self.onnxrt_.onnx_shape_calculator() def shape_calculator(operator): @@ -203,6 +247,16 @@ def onnx_converter(self): Returns a converter for this transform. """ self._check_fitted_() + + if isinstance(self.onnxrt_, FunctionTransformer): + + def fct_converter(scope, operator, container): + op = operator.raw_operator + onnx_model = load(io.BytesIO(op.onnx_)) + add_onnx_graph(scope, operator, container, onnx_model) + + return fct_converter + conv = self.onnxrt_.onnx_converter() def converter(scope, operator, container): diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index 359723e10..eb6586918 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -7,10 +7,8 @@ import numpy import pandas import onnx -from onnx import helper from sklearn.base import BaseEstimator, TransformerMixin from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin -from skl2onnx.proto import TensorProto from skl2onnx.helpers.onnx_helper import ( load_onnx_model, enumerate_model_node_outputs) from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs @@ -18,6 +16,7 @@ FloatTensorType, DoubleTensorType, Int64TensorType) from ..onnx_tools.onnx2py_helper import _var_as_dict, onnx_model_opsets +from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph from ..onnxrt import OnnxInference @@ -286,76 +285,10 @@ def onnx_converter(self): mapped to the first *scikit-learn* parent it can find. """ - def copy_inout(inout, scope, new_name): - shape = [s.dim_value for s in inout.type.tensor_type.shape.dim] - value_info = helper.make_tensor_value_info( - new_name, inout.type.tensor_type.elem_type, shape) - return value_info - - def clean_variable_name(name, scope): - return scope.get_unique_variable_name(name) - - def clean_operator_name(name, scope): - return scope.get_unique_operator_name(name) - - def clean_initializer_name(name, scope): - return scope.get_unique_variable_name(name) - def converter(scope, operator, container, onnx_model=None): op = operator.raw_operator - onx = onnx_model or op.onnxrt_.obj - graph = onx.graph - name_mapping = {} - node_mapping = {} - for node in graph.node: - name = node.name - if name is not None: - node_mapping[node.name] = clean_initializer_name( - node.name, scope) - for o in node.input: - name_mapping[o] = clean_variable_name(o, scope) - for o in node.output: - name_mapping[o] = clean_variable_name(o, scope) - for o in graph.initializer: - name_mapping[o.name] = clean_operator_name(o.name, scope) - - inputs = [copy_inout(o, scope, name_mapping[o.name]) - for o in graph.input] - outputs = [copy_inout(o, scope, name_mapping[o.name]) - for o in graph.output] - - for inp, to in zip(operator.inputs, inputs): - n = helper.make_node('Identity', [inp.onnx_name], [to.name], - name=clean_operator_name('Identity', scope)) - container.nodes.append(n) - - for inp, to in zip(outputs, operator.outputs): - n = helper.make_node('Identity', [inp.name], [to.onnx_name], - name=clean_operator_name('Identity', scope)) - container.nodes.append(n) - - for node in graph.node: - n = helper.make_node( - node.op_type, - [name_mapping[o] for o in node.input], - [name_mapping[o] for o in node.output], - name=node_mapping[node.name] if node.name else None, - domain=node.domain if node.domain else None) - n.attribute.extend(node.attribute) # pylint: disable=E1101 - container.nodes.append(n) - - for o in graph.initializer: - as_str = o.SerializeToString() - tensor = TensorProto() - tensor.ParseFromString(as_str) - tensor.name = name_mapping[o.name] - container.initializers.append(tensor) - - # opset - for oimp in onx.opset_import: - container.node_domain_version_pair_sets.add( - (oimp.domain, oimp.version)) + add_onnx_graph(scope, operator, container, onx) return converter From 03c550c2c30d493acdb5272efa88f12d052e362b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 29 Aug 2021 16:50:49 +0200 Subject: [PATCH 07/13] add runtime numba --- _doc/examples/plot_speedup_pca.py | 128 ++++++++++++++++++ _doc/sphinxdoc/source/api/tools.rst | 14 +- .../test_onnx_speedup_transformer.py | 46 ++++++- .../onnx_tools/exports/skl2onnx_helper.py | 5 +- mlprodict/sklapi/onnx_speed_up.py | 16 ++- mlprodict/sklapi/onnx_transformer.py | 13 +- 6 files changed, 204 insertions(+), 18 deletions(-) create mode 100644 _doc/examples/plot_speedup_pca.py diff --git a/_doc/examples/plot_speedup_pca.py b/_doc/examples/plot_speedup_pca.py new file mode 100644 index 000000000..35c800f6b --- /dev/null +++ b/_doc/examples/plot_speedup_pca.py @@ -0,0 +1,128 @@ +""" +.. _l-speedup-pca: + +Speed up scikit-learn inference with ONNX +========================================= + +Is it possible to make :epkg:`scikit-learn` faster with ONNX? +That's question this example tries to answer. The scenario is +is the following: + +* a model is trained +* it is converted into ONNX for inference +* it selects a runtime to compute the prediction + +The following runtime are tested: + +* `python`: python runtime for ONNX +* `onnxruntime1`: :epkg:`onnxruntime` +* `numpy`: the ONNX graph is converted into numpy code +* `numba`: the numpy code is accelerated with :epkg:`numba`. + +.. contents:: + :local: + +PCA ++++ + +Let's look at a very simple model, a PCA. +""" + +import numpy +from pandas import DataFrame +import matplotlib.pyplot as plt +from sklearn.datasets import make_regression +from sklearn.decomposition import PCA +from pyquickhelper.pycode.profiling import profile +from mlprodict.sklapi import OnnxSpeedUpTransformer +from mlprodict.tools.speed_measure import measure_time +from tqdm import tqdm + +################################ +# Data and models to test. + +data, _ = make_regression(1000, n_features=20) +data = data.astype(numpy.float32) +models = [ + ('sklearn', PCA(n_components=10)), + ('python', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='python')), + ('onnxruntime1', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='onnxruntime1')), + ('numpy', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='numpy')), + ('numba', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='numba'))] + +################################# +# Training. + +for name, model in tqdm(models): + model.fit(data) + +################################# +# Profiling of runtime `onnxruntime1`. + + +def fct(): + for i in range(1000): + models[2][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + + +################################# +# Profiling of runtime `numpy`. + +def fct(): + for i in range(1000): + models[3][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + +################################# +# The class *OnnxSpeedUpTransformer* converts the PCA +# into ONNX and then converts it into a python code using +# *numpy*. The code is the following. + +print(models[3][1].numpy_code_) + +################################# +# Benchmark. + +bench = [] +for name, model in tqdm(models): + for size in (1, 10, 100, 1000, 10000, 100000, 200000): + data, _ = make_regression(size, n_features=20) + data = data.astype(numpy.float32) + + # We run it a first time (numba compiles + # the function during the first execution). + model.transform(data) + res = measure_time( + "model.transform(data)", div_by_number=True, + context={'data': data, 'model': model}) + res['name'] = name + res['size'] = size + bench.append(res) + +df = DataFrame(bench) +piv = df.pivot("size", "name", "average") +piv + +###################################### +# Graph. +fig, ax = plt.subplots(1, 2, figsize=(10, 4)) +piv.plot(title="Speedup PCA with ONNX (lower better)", + logx=True, logy=True, ax=ax[0]) +piv2 = piv.copy() +for c in piv2.columns: + piv2[c] /= piv['sklearn'] +print(piv2) +piv2.plot(title="baseline=scikit-learn (lower better)", + logx=True, logy=True, ax=ax[1]) +plt.show() diff --git a/_doc/sphinxdoc/source/api/tools.rst b/_doc/sphinxdoc/source/api/tools.rst index ea4b12ae4..9ec143f5c 100644 --- a/_doc/sphinxdoc/source/api/tools.rst +++ b/_doc/sphinxdoc/source/api/tools.rst @@ -189,31 +189,24 @@ The last example summarizes all the possibilities. from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype_name from mlprodict.onnx_tools.onnx2py_helper import guess_dtype - def guess_initial_types0(t): return guess_initial_types(numpy.array([[0, 1]], dtype=t), None) - def guess_initial_types1(t): return guess_initial_types(None, [('X', t)]) - def guess_initial_types_mlprodict0(t): return guess_initial_types_mlprodict(numpy.array([[0, 1]], dtype=t), None) - def guess_initial_types_mlprodict1(t): return guess_initial_types_mlprodict(None, [('X', t)]) - def _guess_type_proto1(t): return _guess_type_proto(t, [None, 4]) - def _guess_numpy_type1(t): return _guess_numpy_type(t, [None, 4]) - fcts = [guess_initial_types0, guess_initial_types1, guess_data_type, guess_numpy_type, guess_proto_type, guess_tensor_type, @@ -221,7 +214,7 @@ The last example summarizes all the possibilities. _guess_numpy_type1, guess_initial_types_mlprodict0, guess_initial_types_mlprodict1, - guess_numpy_type_from_string, + guess_numpy_type_from_string, guess_numpy_type_from_dtype, guess_proto_dtype_name, guess_dtype] @@ -246,3 +239,8 @@ The last example summarizes all the possibilities. print() for e in errors: print(e) + +skl2onnx +======== + +.. autosignature:: mlprodict.onnx_tools.exports.skl2onnx_helper.add_onnx_graph diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index db7e5491d..7a2a93c72 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -5,7 +5,7 @@ import pickle import unittest from logging import getLogger -# import numpy as np +import numpy # import pandas # from sklearn.pipeline import make_pipeline from sklearn.decomposition import PCA @@ -51,6 +51,17 @@ def test_speedup_transform32_numpy(self): spd.fit(X) spd.assert_almost_equal(X, decimal=5) + def test_speedup_transform32_numba(self): + data = load_iris() + X, _ = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedUpTransformer( + PCA(), target_opset=self.opset(), + runtime="numba") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + def test_speedup_transform64(self): data = load_iris() X, _ = data.data, data.target @@ -107,6 +118,26 @@ def test_speedup_transform64_numpy_pickle(self): got = spd2.raw_transform(X) self.assertEqualArray(expected, got) + def test_speedup_transform64_numba_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime="numba") + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + def test_speedup_transform64_onnx(self): data = load_iris() X, _ = data.data, data.target @@ -132,6 +163,19 @@ def test_speedup_transform64_onnx_numpy(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) + def test_speedup_transform64_onnx_numba(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime='numba') + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + if __name__ == '__main__': unittest.main() diff --git a/mlprodict/onnx_tools/exports/skl2onnx_helper.py b/mlprodict/onnx_tools/exports/skl2onnx_helper.py index 74b1c1742..41aafde1c 100644 --- a/mlprodict/onnx_tools/exports/skl2onnx_helper.py +++ b/mlprodict/onnx_tools/exports/skl2onnx_helper.py @@ -2,7 +2,6 @@ @file @brief Helpers to run examples created with :epkg:`sklearn-onnx`. """ -import numpy from onnx import helper, TensorProto @@ -28,7 +27,9 @@ def _clean_initializer_name(name, scope): def add_onnx_graph(scope, operator, container, onx): """ Adds a whole ONNX graph to an existing one following - :epkg:`skl2onnx` API. + :epkg:`skl2onnx` API assuming this ONNX graph implements + an `operator `_. :param scope: scope (to get unique names) :param operator: operator diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 5c49ddd89..c864acb35 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -42,6 +42,16 @@ class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): :param target_opset: targetted ONNX opset :param conv_options: options for covnersions, see @see fn to_onnx + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + .. versionadded:: 0.7 """ @@ -80,7 +90,7 @@ def _build_onnx_runtime(self, onx): :param runtime: runtime type (see @see cl OnnxInference) :return: instance of @see cl OnnxInference """ - if self.runtime == 'numpy': + if self.runtime in ('numpy', 'numba'): return self._build_onnx_runtime_numpy(onx) tr = OnnxTransformer( onx, runtime=self.runtime, @@ -153,6 +163,10 @@ def _build_onnx_runtime_numpy_compile(self, opsets): "Unable to guess which function is the one, names=%r." "" % list(sorted(names))) fct = loc[names[0]] + if self.runtime == 'numba': + from numba import jit + jitter = jit(nopython=True) + fct = jitter(fct) cl = FunctionTransformer(fct, accept_sparse=True) cl.op_version = opsets[''] return cl diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index eb6586918..9a969022c 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -104,6 +104,7 @@ def fit(self, X=None, y=None, **fit_params): onx.SerializeToString() if updated else self.onnx_bytes) self.onnxrt_ = OnnxInference(onnx_bytes, runtime=self.runtime) self.inputs_ = self.onnxrt_.input_names + self.inputs_shape_types_ = self.onnxrt_.input_names_shapes_types return self def _check_arrays(self, inputs): @@ -111,8 +112,8 @@ def _check_arrays(self, inputs): Ensures that double floats are converted into single floats if *enforce_float32* is True or raises an exception. """ - sht = self.onnxrt_.input_names_shapes_types if hasattr( - self, "onnxrt_") else None + has = hasattr(self, "onnxrt_") + sht = self.inputs_shape_types_ if has else None if sht is not None and len(sht) < len(inputs): raise RuntimeError( # pragma: no cover "Unexpected number of inputs {} > {} (expected).".format( @@ -123,7 +124,7 @@ def _check_arrays(self, inputs): if v.dtype == numpy.float64 and self.enforce_float32: inputs[k] = v.astype(numpy.float32) continue - if not hasattr(self, "onnxrt_"): + if not has: continue exp = sht[i] if exp[1] != ('?', ) and exp[1][1:] != v.shape[1:]: @@ -158,11 +159,11 @@ def transform(self, X, y=None, **inputs): raise AttributeError( # pragma: no cover "Transform OnnxTransformer must be fit first.") rt_inputs = {} - if isinstance(X, pandas.DataFrame): + if isinstance(X, numpy.ndarray): + rt_inputs[self.inputs_[0]] = X + elif isinstance(X, pandas.DataFrame): for c in X.columns: rt_inputs[c] = X[c] - elif isinstance(X, numpy.ndarray): - rt_inputs[self.inputs_[0]] = X elif isinstance(X, dict) and len(inputs) == 0: for k, v in X.items(): rt_inputs[k] = v From 0510974cda7a7beaf68195105c57c0814ce8eb66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 29 Aug 2021 19:27:09 +0200 Subject: [PATCH 08/13] speed up regressor --- _doc/examples/plot_time_tree_ensemble.py | 2 +- .../source/_exts/generate_automated_pages.py | 2 +- _doc/sphinxdoc/source/conf.py | 2 +- .../ut_sklapi/test_onnx_speedup_regressor.py | 201 ++++++++++ .../_onnx_export_templates_numpy.tmpl | 2 +- mlprodict/onnx_tools/onnx_export.py | 370 +++++++++++------- mlprodict/sklapi/__init__.py | 2 +- mlprodict/sklapi/onnx_speed_up.py | 106 ++++- 8 files changed, 539 insertions(+), 148 deletions(-) create mode 100644 _unittests/ut_sklapi/test_onnx_speedup_regressor.py diff --git a/_doc/examples/plot_time_tree_ensemble.py b/_doc/examples/plot_time_tree_ensemble.py index 728317282..77d55d57c 100644 --- a/_doc/examples/plot_time_tree_ensemble.py +++ b/_doc/examples/plot_time_tree_ensemble.py @@ -34,7 +34,7 @@ from xgboost import XGBClassifier from lightgbm import LGBMClassifier from onnxruntime import InferenceSession -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import make_classification diff --git a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py index 704860655..c57620407 100644 --- a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py +++ b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py @@ -7,7 +7,7 @@ from pandas import DataFrame, read_excel, read_csv, concat, Series from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ignore_warnings -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 from sklearn.ensemble import AdaBoostRegressor, HistGradientBoostingRegressor from sklearn.gaussian_process import GaussianProcessClassifier import sphinx diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py index a3e010876..0755113b8 100644 --- a/_doc/sphinxdoc/source/conf.py +++ b/_doc/sphinxdoc/source/conf.py @@ -3,7 +3,7 @@ import os import alabaster from pyquickhelper.helpgen.default_conf import set_sphinx_variables -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 try: from mlprodict.onnx_conv import register_converters, register_rewritten_operators except ImportError as e: diff --git a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py new file mode 100644 index 000000000..12bf008aa --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py @@ -0,0 +1,201 @@ +""" +@brief test log(time=4s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import LinearRegression +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedUpRegressor +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedUpRegressor(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="numba") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numba") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba') + spd.fit(X, y) + # print(spd.numpy_code_) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + +if __name__ == '__main__': + # TestOnnxSpeedUpRegressor().test_speedup_regressor64_onnx_numba() + unittest.main() diff --git a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl index 4f178fbb3..e40ff659e 100644 --- a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl +++ b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl @@ -30,6 +30,6 @@ def numpy_{{name}}({{ inputs[0][0] }}{% for i in inputs[1:]: %}, {{ i[0] }}{% en # nodes {% for node in nodes: %} - {{ make_numpy_code(target_opset, **node) }}{% endfor %} + {{ make_numpy_code(target_opset, indent=" ", **node) }}{% endfor %} return {{ outputs[0][0] }}{% for o in outputs[1:]: %}, {{ o[0] }}{% endfor %} diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index 316216482..b0c3c53dd 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -96,10 +96,7 @@ def simplify(name, kind, force=True): return "\n".join(rows) -def make_numpy_code(opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, - **unused): +class NumpyCode: """ Converts an ONNX operators into :epkg:`numpy` code. @@ -114,48 +111,67 @@ def make_numpy_code(opset, name=None, op_type=None, domain='', list of nodes taking *k* as input :param context: whole context :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following :return: code as str """ - def make_sure_inputs(n, m=None): + + def __init__(self, opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + self.opset = opset + self.name = name + self.op_type = op_type + self.domain = domain + self.inputs = inputs + self.outputs = outputs + self.attributes = attributes + self.used = used + self.context = context + self.mark_inits = mark_inits + self.unused = unused + self.indent = indent + + def _make_sure_inputs(self, n, m=None): if m is None: m = n - if len(inputs) < n: + if len(self.inputs) < n: raise RuntimeError( # pragma: no cover "Expecting at least %d inputs for operator %r not %r." % ( - n, op_type, inputs)) - if len(inputs) > m: + n, self.op_type, self.inputs)) + if len(self.inputs) > m: raise RuntimeError( # pragma: no cover "Expecting at most %d inputs for operator %r not %r." % ( - m, op_type, inputs)) + m, self.op_type, self.inputs)) - def make_sure_opsets(mi, ma=None): - if mi is not None and opset < mi: + def _make_sure_opsets(self, mi, ma=None): + if mi is not None and self.opset < mi: raise RuntimeError( # pragma: no cover "Cannot convert operator type %d, opset %d < %d." % ( - op_type, opset, mi)) - if ma is not None and opset > ma: + self.op_type, self.opset, mi)) + if ma is not None and self.opset > ma: raise RuntimeError( # pragma: no cover "Cannot convert operator type %d, opset %d > %d." % ( - op_type, opset, mi)) + self.op_type, self.opset, mi)) - def getat(name, defval=None): - for n, val in attributes: + def _getat(self, name, defval=None): + for n, val in self.attributes: if name == n: return val return defval - def simplify(name, kind): + def _simplify(self, name, kind): value = None - if (used is not None and name in used and - len(used[name]) == 1 and context is not None): - inits = context['initializers_dict'] + if (self.used is not None and name in self.used and + len(self.used[name]) == 1 and self.context is not None): + inits = self.context['initializers_dict'] if name in inits: v = inits[name] if v.dtype == numpy.int64 and v.size < 10: value = v - if name not in mark_inits: - mark_inits[name] = [] - mark_inits[name].append(v) + if name not in self.mark_inits: + self.mark_inits[name] = [] + self.mark_inits[name].append(v) if kind == 'tuple': if value is None: @@ -172,7 +188,8 @@ def simplify(name, kind): raise NotImplementedError( "Unknown scenario to simplify (%r)." % kind) - def make_tuple(val): + @staticmethod + def _make_tuple(val): if isinstance(val, tuple): return val if isinstance(val, list): @@ -184,124 +201,199 @@ def make_tuple(val): raise NotImplementedError( "Unable to convert %r into tuple." % val) - if domain != '': + def make_numpy_code(self): + + if self.domain == '': + return self._make_numpy_code_onnx() + + if self.domain == 'ai.onnx.ml': + return self._make_numpy_code_onnxml() + raise NotImplementedError( - "Unable to convert any operator from domain %r." % domain) - - binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', - Pow='**') - unary_ops = dict(Neg='-') - unary_ops_ = dict(Sqrt='** 0.5') - - outs = ", ".join(outputs) - - if op_type in binary_ops: - make_sure_inputs(2) - return "%s = %s %s %s" % (outs, inputs[0], binary_ops[op_type], inputs[1]) - - if op_type in unary_ops: - make_sure_inputs(1) - return "%s = %s %s" % (outs, unary_ops[op_type], inputs[0]) - - if op_type in unary_ops_: - make_sure_inputs(1) - return "%s = %s %s" % (outs, inputs[0], unary_ops_[op_type]) - - if op_type == 'ArgMin': - make_sure_opsets(12) - make_sure_inputs(1) - axis = getat('axis', 0) - keepdims = getat('keepdims', 1) - select_last_index = getat('keepdims', 0) - return ( - "%s = argmin_use_numpy_select_last_index(" - "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( - outs, inputs[0], axis, keepdims, select_last_index)) - - if op_type == 'Concat': - axis = getat('axis', 0) - return "%s = numpy.concatenate([%s], %s)" % (outs, ", ".join(inputs), axis) - - if op_type == 'Max': - return "%s = numpy.maximum(%s)" % (outs, ", ".join(inputs)) - - if op_type == 'Gather': - make_sure_opsets(11) - make_sure_inputs(2) - axis = getat('axis', 0) - return "%s = numpy.take(%s, %s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'list'), axis) - - if op_type == 'Gemm': - make_sure_inputs(2, 3) - alpha = getat('alpha', 0.) - transA = getat('transA', 0) - transB = getat('transB', 0) - ta = ".T" if transA in ('1', 1, True) else "" - tb = ".T" if transB in ('1', 1, True) else "" - if len(inputs) == 2: - return "%s = %s%s @ %s%s * %s" % ( - outs, inputs[0], ta, inputs[1], tb, alpha) - beta = getat('beta', 0.) - return "%s = %s%s @ %s%s * %s + %s * %s" % ( - outs, inputs[0], ta, inputs[1], tb, alpha, inputs[2], beta) - - if op_type == 'Identity': - return "%s = %s" % (outs, inputs[0]) - - if op_type == 'ReduceProd': - make_sure_inputs(1) - axes = getat('axes', "[0]") - keepdims = getat('keepdims', 0) - return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( - outs, inputs[0], axes, keepdims) - - if op_type == 'ReduceSum': - make_sure_opsets(11) - make_sure_inputs(2) - keepdims = getat('keepdims', 0) - return "%s = %s.sum(axis=%s, keepdims=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple'), keepdims) - - if op_type == 'ReduceSumSquare': - make_sure_inputs(1) - axes = getat('axes', "[0]") - keepdims = getat('keepdims', 0) - return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( - outs, inputs[0], axes, keepdims) - - if op_type == 'Reshape': - make_sure_inputs(2) - return "%s = %s.reshape(%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - - if op_type == 'Shape': - make_sure_inputs(1) - return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % (outs, inputs[0]) - - if op_type == 'Slice': - return "%s = make_slice(%s)" % (outs, ", ".join(inputs)) - - if op_type == 'Squeeze': - make_sure_opsets(13) - make_sure_inputs(2) - return "%s = numpy.squeeze(%s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - - if op_type == 'Transpose': - make_sure_inputs(1) - perm = getat('perm', None) - return "%s = numpy.transpose(%s, axes=%s)" % ( - outs, inputs[0], make_tuple(perm)) + "Unable to convert any operator from domain %r." % self.domain) + + def _make_numpy_code_onnx(self): + + binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', + Pow='**') + unary_ops = dict(Neg='-') + unary_ops_ = dict(Sqrt='** 0.5') + + outs = ", ".join(self.outputs) + + if self.op_type in binary_ops: + self._make_sure_inputs(2) + return "%s = %s %s %s" % ( + outs, self.inputs[0], binary_ops[self.op_type], + self.inputs[1]) + + if self.op_type in unary_ops: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, unary_ops[self.op_type], self.inputs[0]) + + if self.op_type in unary_ops_: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, self.inputs[0], unary_ops_[self.op_type]) + + if self.op_type == 'ArgMin': + self._make_sure_opsets(12) + self._make_sure_inputs(1) + axis = self._getat('axis', 0) + keepdims = self._getat('keepdims', 1) + select_last_index = self._getat('keepdims', 0) + return ( + "%s = argmin_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + + if self.op_type == 'Concat': + axis = self._getat('axis', 0) + return "%s = numpy.concatenate([%s], %s)" % ( + outs, ", ".join(self.inputs), axis) + + if self.op_type == 'Max': + return "%s = numpy.maximum(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Gather': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + axis = self._getat('axis', 0) + return "%s = numpy.take(%s, %s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'list'), axis) + + if self.op_type == 'Gemm': + self._make_sure_inputs(2, 3) + alpha = self._getat('alpha', 0.) + transA = self._getat('transA', 0) + transB = self._getat('transB', 0) + ta = ".T" if transA in ('1', 1, True) else "" + tb = ".T" if transB in ('1', 1, True) else "" + if len(self.inputs) == 2: + return "%s = %s%s @ %s%s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha) + beta = self._getat('beta', 0.) + return "%s = %s%s @ %s%s * %s + %s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha, + self.inputs[2], beta) + + if self.op_type == 'Identity': + return "%s = %s" % (outs, self.inputs[0]) + + if self.op_type == 'ReduceProd': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'ReduceSum': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + keepdims = self._getat('keepdims', 0) + return "%s = %s.sum(axis=%s, keepdims=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'), + keepdims) + + if self.op_type == 'ReduceSumSquare': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'Reshape': + self._make_sure_inputs(2) + return "%s = %s.reshape(%s)" % ( + outs, self.inputs[0], self.inputs[1]) + + if self.op_type == 'Shape': + self._make_sure_inputs(1) + return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % ( + outs, self.inputs[0]) + + if self.op_type == 'Slice': + return "%s = make_slice(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Squeeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.squeeze(%s, axis=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple')) + + if self.op_type == 'Transpose': + self._make_sure_inputs(1) + perm = self._getat('perm', None) + return "%s = numpy.transpose(%s, axes=%s)" % ( + outs, self.inputs[0], self._make_tuple(perm)) + + if self.op_type == 'Unsqueeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.expand_dims(%s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'tuple')) + + raise NotImplementedError( + "Unable to convert operator type %r name=%r." % ( + self.op_type, name)) + + def _make_numpy_code_onnxml(self): + outs = ", ".join(self.outputs) + + if self.op_type == 'LinearRegressor': + self._make_sure_inputs(1) + coefficients = self._getat('coefficients', None) + intercepts = self._getat('intercepts', None) + post_transform = self._getat('post_transform', 'NONE') + targets = self._getat('targets', 1) + if post_transform != "NONE": + raise NotImplementedError( + "Conversion of operator %r with post_transform %r " + "is not implemented." % (self.op_type, post_transform)) + rows = [ + "coefs = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (coefficients, targets), + "%sinter = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (self.indent, intercepts, targets), + "%s%s = %s @ coefs + inter" % ( + self.indent, outs, self.inputs[0])] + return "\n".join(rows) + + raise NotImplementedError( + "Unable to convert operator type %r name=%r (onnxml)." % ( + self.op_type, name)) - if op_type == 'Unsqueeze': - make_sure_opsets(13) - make_sure_inputs(2) - return "%s = numpy.expand_dims(%s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - raise NotImplementedError( - "Unable to convert operator type %r name=%r." % (op_type, name)) +def make_numpy_code(opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + """ + Converts an ONNX operators into :epkg:`numpy` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following + :return: code as str + """ + cl = NumpyCode( + opset=opset, name=name, op_type=op_type, domain=domain, + inputs=inputs, outputs=outputs, attributes=attributes, + used=used, context=context, mark_inits=mark_inits, + indent=indent, **unused) + return cl.make_numpy_code() def export_template(model_onnx, templates, opset=None, verbose=True, name=None, diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py index 6b3564b36..c771d6130 100644 --- a/mlprodict/sklapi/__init__.py +++ b/mlprodict/sklapi/__init__.py @@ -5,4 +5,4 @@ """ from .onnx_pipeline import OnnxPipeline from .onnx_transformer import OnnxTransformer -from .onnx_speed_up import OnnxSpeedUpTransformer +from .onnx_speed_up import OnnxSpeedUpTransformer, OnnxSpeedUpRegressor diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index c864acb35..c32bb9b86 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -12,10 +12,13 @@ import numpy from numpy.testing import assert_almost_equal from onnx import helper, load -from sklearn.base import BaseEstimator, TransformerMixin, clone +from sklearn.base import ( + BaseEstimator, clone, + TransformerMixin, RegressorMixin) from sklearn.preprocessing import FunctionTransformer from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from ..tools.code_helper import print_code +from ..tools.asv_options_helper import get_opset_number_from_onnx from ..onnx_tools.onnx_export import export2numpy from ..onnx_tools.onnx2py_helper import ( onnx_model_opsets, _var_as_dict, to_skl2onnx_type) @@ -168,7 +171,7 @@ def _build_onnx_runtime_numpy_compile(self, opsets): jitter = jit(nopython=True) fct = jitter(fct) cl = FunctionTransformer(fct, accept_sparse=True) - cl.op_version = opsets[''] + cl.op_version = opsets.get('', get_opset_number_from_onnx()) return cl def __getstate__(self): @@ -194,7 +197,7 @@ def __setstate__(self, state): opsets = onnx_model_opsets(model_onnx) self.onnxrt_ = self._build_onnx_runtime_numpy_compile(opsets) - def fit(self, X, *args, **kwargs): + def fit(self, X, y=None, sample_weight=None, **kwargs): """ Fits the estimator, converts to ONNX. @@ -204,7 +207,18 @@ def fit(self, X, *args, **kwargs): """ if not hasattr(self, 'estimator_'): self.estimator_ = clone(self.estimator) - self.estimator_.fit(X, *args, **kwargs) + if y is None: + if sample_weight is None: + self.estimator_.fit(X, **kwargs) + else: + self.estimator_.fit(X, sample_weight=sample_weight, **kwargs) + else: + if sample_weight is None: + self.estimator_.fit(X, y, **kwargs) + else: + self.estimator_.fit( + X, y, sample_weight=sample_weight, **kwargs) + if self.enforce_float32: X = X.astype(numpy.float32) self.onnx_ = self._to_onnx(self.estimator_, X).SerializeToString() @@ -297,6 +311,16 @@ class OnnxSpeedUpTransformer(TransformerMixin, :param target_opset: targetted ONNX opset :param conv_options: conversion options, see @see fn to_onnx + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + .. versionadded:: 0.7 """ @@ -343,3 +367,77 @@ def assert_almost_equal(self, X, **kwargs): expected = self.raw_transform(X) got = self.transform(X) assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedUpRegressor(RegressorMixin, + _OnnxPipelineStepSpeedUp): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None): + _OnnxPipelineStepSpeedUp.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedUp.fit(self, X, y) + else: + _OnnxPipelineStepSpeedUp.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + return self.onnxrt_.transform(X) + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) From 3fe46c008839ea93ad17526ad773922e2a1380ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 30 Aug 2021 12:27:38 +0200 Subject: [PATCH 09/13] add parameter nopython --- .../ut_sklapi/test_onnx_speedup_regressor.py | 15 + mlprodict/onnx_tools/exports/numpy_helper.py | 304 ++++++++++++++ .../onnx_tools/exports/tf2onnx_helper.py | 80 ++++ mlprodict/onnx_tools/onnx_export.py | 389 +----------------- mlprodict/sklapi/onnx_speed_up.py | 20 +- 5 files changed, 419 insertions(+), 389 deletions(-) diff --git a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py index 12bf008aa..34409ca40 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py @@ -195,6 +195,21 @@ def test_speedup_regressor64_onnx_numba(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + if __name__ == '__main__': # TestOnnxSpeedUpRegressor().test_speedup_regressor64_onnx_numba() diff --git a/mlprodict/onnx_tools/exports/numpy_helper.py b/mlprodict/onnx_tools/exports/numpy_helper.py index 51df0f028..1d22e81ed 100644 --- a/mlprodict/onnx_tools/exports/numpy_helper.py +++ b/mlprodict/onnx_tools/exports/numpy_helper.py @@ -44,3 +44,307 @@ def argmin_use_numpy_select_last_index( if keepdims: result = numpy.expand_dims(result, axis) return result.astype(numpy.int64) + + +class NumpyCode: + """ + Converts an ONNX operators into :epkg:`numpy` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following + :return: code as str + """ + + def __init__(self, opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + self.opset = opset + self.name = name + self.op_type = op_type + self.domain = domain + self.inputs = inputs + self.outputs = outputs + self.attributes = attributes + self.used = used + self.context = context + self.mark_inits = mark_inits + self.unused = unused + self.indent = indent + + def _make_sure_inputs(self, n, m=None): + if m is None: + m = n + if len(self.inputs) < n: + raise RuntimeError( # pragma: no cover + "Expecting at least %d inputs for operator %r not %r." % ( + n, self.op_type, self.inputs)) + if len(self.inputs) > m: + raise RuntimeError( # pragma: no cover + "Expecting at most %d inputs for operator %r not %r." % ( + m, self.op_type, self.inputs)) + + def _make_sure_opsets(self, mi, ma=None): + if mi is not None and self.opset < mi: + raise RuntimeError( # pragma: no cover + "Cannot convert operator type %d, opset %d < %d." % ( + self.op_type, self.opset, mi)) + if ma is not None and self.opset > ma: + raise RuntimeError( # pragma: no cover + "Cannot convert operator type %d, opset %d > %d." % ( + self.op_type, self.opset, mi)) + + def _getat(self, name, defval=None): + for n, val in self.attributes: + if name == n: + return val + return defval + + def _simplify(self, name, kind): + value = None + if (self.used is not None and name in self.used and + len(self.used[name]) == 1 and self.context is not None): + inits = self.context['initializers_dict'] + if name in inits: + v = inits[name] + if v.dtype == numpy.int64 and v.size < 10: + value = v + if name not in self.mark_inits: + self.mark_inits[name] = [] + self.mark_inits[name].append(v) + + if kind == 'tuple': + if value is None: + return "tuple(%s)" % name + if value.size == 1: + return str(tuple(value)[0]) + return str(tuple(value)) + elif kind == 'list': + if value is None: + return name + if len(value.shape) == 0: + return str(value) + return str(list(value)) + raise NotImplementedError( + "Unknown scenario to simplify (%r)." % kind) + + @staticmethod + def _make_tuple(val): + if isinstance(val, tuple): + return val + if isinstance(val, list): + return tuple(val) + if isinstance(val, int): + return val + if isinstance(val, str): + return tuple(map(int, val.strip('()[]').replace(" ", "").split(","))) + raise NotImplementedError( + "Unable to convert %r into tuple." % val) + + def make_numpy_code(self): + """ + Main method, returns the python code for a given + operator. + """ + if self.domain == '': + return self._make_numpy_code_onnx() + + if self.domain == 'ai.onnx.ml': + return self._make_numpy_code_onnxml() + + raise NotImplementedError( + "Unable to convert any operator from domain %r." % self.domain) + + def _make_numpy_code_onnx(self): + + binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', + Pow='**') + unary_ops = dict(Neg='-') + unary_ops_ = dict(Sqrt='** 0.5') + + outs = ", ".join(self.outputs) + + if self.op_type in binary_ops: + self._make_sure_inputs(2) + return "%s = %s %s %s" % ( + outs, self.inputs[0], binary_ops[self.op_type], + self.inputs[1]) + + if self.op_type in unary_ops: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, unary_ops[self.op_type], self.inputs[0]) + + if self.op_type in unary_ops_: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, self.inputs[0], unary_ops_[self.op_type]) + + if self.op_type == 'ArgMin': + self._make_sure_opsets(12) + self._make_sure_inputs(1) + axis = self._getat('axis', 0) + keepdims = self._getat('keepdims', 1) + select_last_index = self._getat('keepdims', 0) + return ( + "%s = argmin_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + + if self.op_type == 'Concat': + axis = self._getat('axis', 0) + return "%s = numpy.concatenate([%s], %s)" % ( + outs, ", ".join(self.inputs), axis) + + if self.op_type == 'Max': + return "%s = numpy.maximum(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Gather': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + axis = self._getat('axis', 0) + return "%s = numpy.take(%s, %s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'list'), axis) + + if self.op_type == 'Gemm': + self._make_sure_inputs(2, 3) + alpha = self._getat('alpha', 0.) + transA = self._getat('transA', 0) + transB = self._getat('transB', 0) + ta = ".T" if transA in ('1', 1, True) else "" + tb = ".T" if transB in ('1', 1, True) else "" + if len(self.inputs) == 2: + return "%s = %s%s @ %s%s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha) + beta = self._getat('beta', 0.) + return "%s = %s%s @ %s%s * %s + %s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha, + self.inputs[2], beta) + + if self.op_type == 'Identity': + return "%s = %s" % (outs, self.inputs[0]) + + if self.op_type == 'ReduceProd': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'ReduceSum': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + keepdims = self._getat('keepdims', 0) + return "%s = %s.sum(axis=%s, keepdims=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'), + keepdims) + + if self.op_type == 'ReduceSumSquare': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'Reshape': + self._make_sure_inputs(2) + simp = self._simplify(self.inputs[1], 'tuple') + return "%s = %s.reshape(%s)" % ( + outs, self.inputs[0], simp) + + if self.op_type == 'Shape': + self._make_sure_inputs(1) + return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % ( + outs, self.inputs[0]) + + if self.op_type == 'Slice': + return "%s = make_slice(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Squeeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.squeeze(%s, axis=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple')) + + if self.op_type == 'Transpose': + self._make_sure_inputs(1) + perm = self._getat('perm', None) + return "%s = numpy.transpose(%s, axes=%s)" % ( + outs, self.inputs[0], self._make_tuple(perm)) + + if self.op_type == 'Unsqueeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.expand_dims(%s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'tuple')) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r name=%r." % ( + self.op_type, self.name)) + + def _make_numpy_code_onnxml(self): + outs = ", ".join(self.outputs) + + if self.op_type == 'LinearRegressor': + self._make_sure_inputs(1) + coefficients = self._getat('coefficients', None) + intercepts = self._getat('intercepts', None) + post_transform = self._getat('post_transform', 'NONE') + targets = self._getat('targets', 1) + if post_transform != "NONE": + raise NotImplementedError( + "Conversion of operator %r with post_transform %r " + "is not implemented." % (self.op_type, post_transform)) + rows = [ + "coefs = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (coefficients, targets), + "%sinter = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (self.indent, intercepts, targets), + "%s%s = %s @ coefs + inter" % ( + self.indent, outs, self.inputs[0])] + return "\n".join(rows) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r name=%r (onnxml)." % ( + self.op_type, self.name)) + + +def make_numpy_code(opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + """ + Converts an ONNX operators into :epkg:`numpy` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following + :return: code as str + """ + cl = NumpyCode( + opset=opset, name=name, op_type=op_type, domain=domain, + inputs=inputs, outputs=outputs, attributes=attributes, + used=used, context=context, mark_inits=mark_inits, + indent=indent, **unused) + return cl.make_numpy_code() diff --git a/mlprodict/onnx_tools/exports/tf2onnx_helper.py b/mlprodict/onnx_tools/exports/tf2onnx_helper.py index c4b51dc7f..9d912c816 100644 --- a/mlprodict/onnx_tools/exports/tf2onnx_helper.py +++ b/mlprodict/onnx_tools/exports/tf2onnx_helper.py @@ -17,6 +17,86 @@ _make_name_id = 0 +def make_tf2onnx_code(opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, indent=8, + **unused): + """ + Converts an ONNX operators into :epkg:`tf2onnx` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: number of spaces to add on the second + and following rows + :return: code as str + """ + def simplify(name, kind, force=True): + value = None + if (used is not None and name in used and + len(used[name]) == 1 and context is not None): + inits = context['initializers_dict'] + if name in inits: + v = inits[name] + if v.dtype == numpy.int64 and v.size < 10: + value = v + if name not in mark_inits: + mark_inits[name] = [] + mark_inits[name].append(v) + + if value is None and force: + inits = context['initializers_dict'] + value = inits[name] + if kind == 'list': + if value is None: + return name + if len(value.shape) == 0: + return str(value) + return str(list(value)) + raise NotImplementedError( + "Unknown scenario to simplify (%r)." % kind) + + rows = [] + if op_type == 'Unsqueeze': + if len(inputs) == 2: + rows.append( + "node = GraphBuilder(ctx).make_unsqueeze(" + "{'data': varx[%r], 'axes': %s}, return_node=True)" + "" % (inputs[0], simplify(inputs[1], 'list'))) + else: + raise NotImplementedError( # pragma: no cover + "Unable to create code for operator %r (opset <= 12)" + "." % op_type) + else: + if len(attributes) > 0: + attributes_str = ", ".join("%s=%s" % (k, v) for k, v in attributes) + attr = ", attr=dict(%s)" % attributes_str + else: + attr = "" + rows.append( + "inputs = [%s]" % ", ".join("varx[%r]" % n for n in inputs)) + sdomain = '' if domain == '' else ("domain=%r, " % domain) + rows.append( + "node = ctx.make_node(%r, inputs=inputs%s, %s" + "name=make_name(%r))" % ( + op_type, attr, sdomain, name)) + for i, n in enumerate(outputs): + rows.append("varx[%r] = node.output[%d]" % (n, i)) + if indent > 0: + sind = " " * indent + for i in range(1, len(rows)): + rows[i] = sind + rows[i] + return "\n".join(rows) + + def make_name(name): "Creates a unique name." global _make_name_id # pylint: disable=W0603 diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index b0c3c53dd..2e44de6cb 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -14,386 +14,8 @@ _var_as_dict, guess_proto_dtype, guess_proto_dtype_name) from .onnx_export_templates import ( get_onnx_template, get_tf2onnx_template, get_numpy_template) - - -def make_tf2onnx_code(opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, indent=8, - **unused): - """ - Converts an ONNX operators into :epkg:`tf2onnx` code. - - :param opset: target opset for the conversion (usually unused) - :param name: node name - :param op_type: operator type - :param domain: domain - :param inputs: inputs - :param outputs: outputs - :param attributes: attributes - :param used: dictionary `{k: v}`, - list of nodes taking *k* as input - :param context: whole context - :param mark_inits: marks initializer as replaced - :param indent: number of spaces to add on the second - and following rows - :return: code as str - """ - def simplify(name, kind, force=True): - value = None - if (used is not None and name in used and - len(used[name]) == 1 and context is not None): - inits = context['initializers_dict'] - if name in inits: - v = inits[name] - if v.dtype == numpy.int64 and v.size < 10: - value = v - if name not in mark_inits: - mark_inits[name] = [] - mark_inits[name].append(v) - - if value is None and force: - inits = context['initializers_dict'] - value = inits[name] - if kind == 'list': - if value is None: - return name - if len(value.shape) == 0: - return str(value) - return str(list(value)) - raise NotImplementedError( - "Unknown scenario to simplify (%r)." % kind) - - rows = [] - if op_type == 'Unsqueeze': - if len(inputs) == 2: - rows.append( - "node = GraphBuilder(ctx).make_unsqueeze(" - "{'data': varx[%r], 'axes': %s}, return_node=True)" - "" % (inputs[0], simplify(inputs[1], 'list'))) - else: - raise NotImplementedError( # pragma: no cover - "Unable to create code for operator %r (opset <= 12)" - "." % op_type) - else: - if len(attributes) > 0: - attributes_str = ", ".join("%s=%s" % (k, v) for k, v in attributes) - attr = ", attr=dict(%s)" % attributes_str - else: - attr = "" - rows.append( - "inputs = [%s]" % ", ".join("varx[%r]" % n for n in inputs)) - sdomain = '' if domain == '' else ("domain=%r, " % domain) - rows.append( - "node = ctx.make_node(%r, inputs=inputs%s, %s" - "name=make_name(%r))" % ( - op_type, attr, sdomain, name)) - for i, n in enumerate(outputs): - rows.append("varx[%r] = node.output[%d]" % (n, i)) - if indent > 0: - sind = " " * indent - for i in range(1, len(rows)): - rows[i] = sind + rows[i] - return "\n".join(rows) - - -class NumpyCode: - """ - Converts an ONNX operators into :epkg:`numpy` code. - - :param opset: target opset for the conversion (usually unused) - :param name: node name - :param op_type: operator type - :param domain: domain - :param inputs: inputs - :param outputs: outputs - :param attributes: attributes - :param used: dictionary `{k: v}`, - list of nodes taking *k* as input - :param context: whole context - :param mark_inits: marks initializer as replaced - :param indent: indentation of the second line and following - :return: code as str - """ - - def __init__(self, opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, - indent="", **unused): - self.opset = opset - self.name = name - self.op_type = op_type - self.domain = domain - self.inputs = inputs - self.outputs = outputs - self.attributes = attributes - self.used = used - self.context = context - self.mark_inits = mark_inits - self.unused = unused - self.indent = indent - - def _make_sure_inputs(self, n, m=None): - if m is None: - m = n - if len(self.inputs) < n: - raise RuntimeError( # pragma: no cover - "Expecting at least %d inputs for operator %r not %r." % ( - n, self.op_type, self.inputs)) - if len(self.inputs) > m: - raise RuntimeError( # pragma: no cover - "Expecting at most %d inputs for operator %r not %r." % ( - m, self.op_type, self.inputs)) - - def _make_sure_opsets(self, mi, ma=None): - if mi is not None and self.opset < mi: - raise RuntimeError( # pragma: no cover - "Cannot convert operator type %d, opset %d < %d." % ( - self.op_type, self.opset, mi)) - if ma is not None and self.opset > ma: - raise RuntimeError( # pragma: no cover - "Cannot convert operator type %d, opset %d > %d." % ( - self.op_type, self.opset, mi)) - - def _getat(self, name, defval=None): - for n, val in self.attributes: - if name == n: - return val - return defval - - def _simplify(self, name, kind): - value = None - if (self.used is not None and name in self.used and - len(self.used[name]) == 1 and self.context is not None): - inits = self.context['initializers_dict'] - if name in inits: - v = inits[name] - if v.dtype == numpy.int64 and v.size < 10: - value = v - if name not in self.mark_inits: - self.mark_inits[name] = [] - self.mark_inits[name].append(v) - - if kind == 'tuple': - if value is None: - return "tuple(%s)" % name - if value.size == 1: - return str(tuple(value)[0]) - return str(tuple(value)) - elif kind == 'list': - if value is None: - return name - if len(value.shape) == 0: - return str(value) - return str(list(value)) - raise NotImplementedError( - "Unknown scenario to simplify (%r)." % kind) - - @staticmethod - def _make_tuple(val): - if isinstance(val, tuple): - return val - if isinstance(val, list): - return tuple(val) - if isinstance(val, int): - return val - if isinstance(val, str): - return tuple(map(int, val.strip('()[]').replace(" ", "").split(","))) - raise NotImplementedError( - "Unable to convert %r into tuple." % val) - - def make_numpy_code(self): - - if self.domain == '': - return self._make_numpy_code_onnx() - - if self.domain == 'ai.onnx.ml': - return self._make_numpy_code_onnxml() - - raise NotImplementedError( - "Unable to convert any operator from domain %r." % self.domain) - - def _make_numpy_code_onnx(self): - - binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', - Pow='**') - unary_ops = dict(Neg='-') - unary_ops_ = dict(Sqrt='** 0.5') - - outs = ", ".join(self.outputs) - - if self.op_type in binary_ops: - self._make_sure_inputs(2) - return "%s = %s %s %s" % ( - outs, self.inputs[0], binary_ops[self.op_type], - self.inputs[1]) - - if self.op_type in unary_ops: - self._make_sure_inputs(1) - return "%s = %s %s" % ( - outs, unary_ops[self.op_type], self.inputs[0]) - - if self.op_type in unary_ops_: - self._make_sure_inputs(1) - return "%s = %s %s" % ( - outs, self.inputs[0], unary_ops_[self.op_type]) - - if self.op_type == 'ArgMin': - self._make_sure_opsets(12) - self._make_sure_inputs(1) - axis = self._getat('axis', 0) - keepdims = self._getat('keepdims', 1) - select_last_index = self._getat('keepdims', 0) - return ( - "%s = argmin_use_numpy_select_last_index(" - "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( - outs, self.inputs[0], axis, keepdims, select_last_index)) - - if self.op_type == 'Concat': - axis = self._getat('axis', 0) - return "%s = numpy.concatenate([%s], %s)" % ( - outs, ", ".join(self.inputs), axis) - - if self.op_type == 'Max': - return "%s = numpy.maximum(%s)" % (outs, ", ".join(self.inputs)) - - if self.op_type == 'Gather': - self._make_sure_opsets(11) - self._make_sure_inputs(2) - axis = self._getat('axis', 0) - return "%s = numpy.take(%s, %s, axis=%s)" % ( - outs, self.inputs[0], - self._simplify(self.inputs[1], 'list'), axis) - - if self.op_type == 'Gemm': - self._make_sure_inputs(2, 3) - alpha = self._getat('alpha', 0.) - transA = self._getat('transA', 0) - transB = self._getat('transB', 0) - ta = ".T" if transA in ('1', 1, True) else "" - tb = ".T" if transB in ('1', 1, True) else "" - if len(self.inputs) == 2: - return "%s = %s%s @ %s%s * %s" % ( - outs, self.inputs[0], ta, self.inputs[1], tb, alpha) - beta = self._getat('beta', 0.) - return "%s = %s%s @ %s%s * %s + %s * %s" % ( - outs, self.inputs[0], ta, self.inputs[1], tb, alpha, - self.inputs[2], beta) - - if self.op_type == 'Identity': - return "%s = %s" % (outs, self.inputs[0]) - - if self.op_type == 'ReduceProd': - self._make_sure_inputs(1) - axes = self._getat('axes', "[0]") - keepdims = self._getat('keepdims', 0) - return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( - outs, self.inputs[0], axes, keepdims) - - if self.op_type == 'ReduceSum': - self._make_sure_opsets(11) - self._make_sure_inputs(2) - keepdims = self._getat('keepdims', 0) - return "%s = %s.sum(axis=%s, keepdims=%s)" % ( - outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'), - keepdims) - - if self.op_type == 'ReduceSumSquare': - self._make_sure_inputs(1) - axes = self._getat('axes', "[0]") - keepdims = self._getat('keepdims', 0) - return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( - outs, self.inputs[0], axes, keepdims) - - if self.op_type == 'Reshape': - self._make_sure_inputs(2) - return "%s = %s.reshape(%s)" % ( - outs, self.inputs[0], self.inputs[1]) - - if self.op_type == 'Shape': - self._make_sure_inputs(1) - return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % ( - outs, self.inputs[0]) - - if self.op_type == 'Slice': - return "%s = make_slice(%s)" % (outs, ", ".join(self.inputs)) - - if self.op_type == 'Squeeze': - self._make_sure_opsets(13) - self._make_sure_inputs(2) - return "%s = numpy.squeeze(%s, axis=%s)" % ( - outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple')) - - if self.op_type == 'Transpose': - self._make_sure_inputs(1) - perm = self._getat('perm', None) - return "%s = numpy.transpose(%s, axes=%s)" % ( - outs, self.inputs[0], self._make_tuple(perm)) - - if self.op_type == 'Unsqueeze': - self._make_sure_opsets(13) - self._make_sure_inputs(2) - return "%s = numpy.expand_dims(%s, axis=%s)" % ( - outs, self.inputs[0], - self._simplify(self.inputs[1], 'tuple')) - - raise NotImplementedError( - "Unable to convert operator type %r name=%r." % ( - self.op_type, name)) - - def _make_numpy_code_onnxml(self): - outs = ", ".join(self.outputs) - - if self.op_type == 'LinearRegressor': - self._make_sure_inputs(1) - coefficients = self._getat('coefficients', None) - intercepts = self._getat('intercepts', None) - post_transform = self._getat('post_transform', 'NONE') - targets = self._getat('targets', 1) - if post_transform != "NONE": - raise NotImplementedError( - "Conversion of operator %r with post_transform %r " - "is not implemented." % (self.op_type, post_transform)) - rows = [ - "coefs = numpy.array(%s, dtype=numpy.float32)." - "reshape((-1, %d))" % (coefficients, targets), - "%sinter = numpy.array(%s, dtype=numpy.float32)." - "reshape((-1, %d))" % (self.indent, intercepts, targets), - "%s%s = %s @ coefs + inter" % ( - self.indent, outs, self.inputs[0])] - return "\n".join(rows) - - raise NotImplementedError( - "Unable to convert operator type %r name=%r (onnxml)." % ( - self.op_type, name)) - - -def make_numpy_code(opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, - indent="", **unused): - """ - Converts an ONNX operators into :epkg:`numpy` code. - - :param opset: target opset for the conversion (usually unused) - :param name: node name - :param op_type: operator type - :param domain: domain - :param inputs: inputs - :param outputs: outputs - :param attributes: attributes - :param used: dictionary `{k: v}`, - list of nodes taking *k* as input - :param context: whole context - :param mark_inits: marks initializer as replaced - :param indent: indentation of the second line and following - :return: code as str - """ - cl = NumpyCode( - opset=opset, name=name, op_type=op_type, domain=domain, - inputs=inputs, outputs=outputs, attributes=attributes, - used=used, context=context, mark_inits=mark_inits, - indent=indent, **unused) - return cl.make_numpy_code() +from .exports.numpy_helper import make_numpy_code +from .exports.tf2onnx_helper import make_tf2onnx_code def export_template(model_onnx, templates, opset=None, verbose=True, name=None, @@ -498,7 +120,8 @@ def rename_name(name): # node nodes = [] for node in model_onnx.graph.node: - for i in node.input: + for i_raw_name in node.input: + i = rename_name(i_raw_name) if i not in used: used[i] = [] used[i].append(node) @@ -553,7 +176,7 @@ def rename_name(name): context['skip_inits'] = {} mark_inits = {} - # final + # First rendering to detect any unused or replaced initializer. template = Template(templates) final = template.render( enumerate=enumerate, sorted=sorted, len=len, @@ -572,6 +195,8 @@ def rename_name(name): skip_inits.add(k) if len(skip_inits) > 0: + # Second rendering if needed when an initializer was replaced + # or removed. context['skip_inits'] = skip_inits # Again with skip_inits. final = template.render( diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index c32bb9b86..050b704f1 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -43,7 +43,8 @@ class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): :param runtime: string, defined the runtime to use as described in @see cl OnnxInference. :param target_opset: targetted ONNX opset - :param conv_options: options for covnersions, see @see fn to_onnx + :param conv_options: options for conversions, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter Attributes created by method *fit*: @@ -59,13 +60,14 @@ class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): """ def __init__(self, estimator, runtime='python', enforce_float32=True, - target_opset=None, conv_options=None): + target_opset=None, conv_options=None, nopython=True): BaseEstimator.__init__(self) self.estimator = estimator self.runtime = runtime self.enforce_float32 = enforce_float32 self.target_opset = target_opset self.conv_options = conv_options + self.nopython = nopython def _check_fitted_(self): if not hasattr(self, 'onnxrt_'): @@ -168,7 +170,7 @@ def _build_onnx_runtime_numpy_compile(self, opsets): fct = loc[names[0]] if self.runtime == 'numba': from numba import jit - jitter = jit(nopython=True) + jitter = jit(nopython=self.nopython) fct = jitter(fct) cl = FunctionTransformer(fct, accept_sparse=True) cl.op_version = opsets.get('', get_opset_number_from_onnx()) @@ -310,6 +312,7 @@ class OnnxSpeedUpTransformer(TransformerMixin, as described in @see cl OnnxInference. :param target_opset: targetted ONNX opset :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter Attributes created by method *fit*: @@ -325,10 +328,11 @@ class OnnxSpeedUpTransformer(TransformerMixin, """ def __init__(self, estimator, runtime='python', enforce_float32=True, - target_opset=None, conv_options=None): + target_opset=None, conv_options=None, nopython=True): _OnnxPipelineStepSpeedUp.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, - target_opset=target_opset, conv_options=conv_options) + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) def fit(self, X, y=None, sample_weight=None): # pylint: disable=W0221 """ @@ -384,6 +388,7 @@ class OnnxSpeedUpRegressor(RegressorMixin, as described in @see cl OnnxInference. :param target_opset: targetted ONNX opset :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter Attributes created by method *fit*: @@ -399,10 +404,11 @@ class OnnxSpeedUpRegressor(RegressorMixin, """ def __init__(self, estimator, runtime='python', enforce_float32=True, - target_opset=None, conv_options=None): + target_opset=None, conv_options=None, nopython=True): _OnnxPipelineStepSpeedUp.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, - target_opset=target_opset, conv_options=conv_options) + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 """ From 1634a36ceda233a667b1b2c0b053d526ac88da98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 30 Aug 2021 17:39:25 +0200 Subject: [PATCH 10/13] support classifiers --- _doc/sphinxdoc/source/api/sklapi.rst | 23 +- _doc/sphinxdoc/source/api/tools.rst | 4 +- .../ut_sklapi/test_onnx_speedup_classifier.py | 225 ++++++++++++++++++ .../_onnx_export_templates_numpy.tmpl | 3 + mlprodict/onnx_tools/exports/numpy_helper.py | 176 +++++++++++++- mlprodict/sklapi/__init__.py | 5 +- mlprodict/sklapi/onnx_speed_up.py | 121 +++++++++- mlprodict/sklapi/onnx_transformer.py | 15 +- 8 files changed, 547 insertions(+), 25 deletions(-) create mode 100644 _unittests/ut_sklapi/test_onnx_speedup_classifier.py diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst index 17531e8c8..8df5467cd 100644 --- a/_doc/sphinxdoc/source/api/sklapi.rst +++ b/_doc/sphinxdoc/source/api/sklapi.rst @@ -9,10 +9,10 @@ pipeline. .. contents:: :local: -OnnxSpeedUpTransformer -++++++++++++++++++++++ +OnnxPipeline +++++++++++++ -.. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxSpeedUpTransformer +.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline :members: OnnxTransformer @@ -21,8 +21,19 @@ OnnxTransformer .. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer :members: -OnnxPipeline -++++++++++++ +SpeedUp scikit-learn pipeline with ONNX ++++++++++++++++++++++++++++++++++++++++ -.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline +These classes wraps an existing pipeline from *scikit-learn* +and replaces the inference (*transform*, *predict*, *predict_proba*) +by another runtime built after the model was converted into ONNX. +See example :ref:`l-b-numpy-numba-ort` for further details. + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpClassifier + :members: + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpRegressor + :members: + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpTransformer :members: diff --git a/_doc/sphinxdoc/source/api/tools.rst b/_doc/sphinxdoc/source/api/tools.rst index 9ec143f5c..277d8310e 100644 --- a/_doc/sphinxdoc/source/api/tools.rst +++ b/_doc/sphinxdoc/source/api/tools.rst @@ -50,8 +50,8 @@ Functions to help understand models or modify them. .. autosignature:: mlprodict.testing.script_testing.verify_script -Optimisation -++++++++++++ +Onnx Optimisation ++++++++++++++++++ The following functions reduce the number of ONNX operators in a graph while keeping the same results. The optimized graph diff --git a/_unittests/ut_sklapi/test_onnx_speedup_classifier.py b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py new file mode 100644 index 000000000..d458ddf2d --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py @@ -0,0 +1,225 @@ +""" +@brief test log(time=5s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +from numba import NumbaWarning +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import LogisticRegression +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedUpClassifier +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedUpClassifier(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="numba", nopython=False) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numba", nopython=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedUpClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + +if __name__ == '__main__': + # TestOnnxSpeedUpClassifier().test_speedup_classifier64_numba_pickle() + unittest.main() diff --git a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl index e40ff659e..b16110611 100644 --- a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl +++ b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl @@ -1,6 +1,9 @@ import numpy +import scipy.special as scipy_special from mlprodict.onnx_tools.exports.numpy_helper import ( + argmax_use_numpy_select_last_index, argmin_use_numpy_select_last_index, + array_feature_extrator, make_slice) def numpy_{{name}}({{ inputs[0][0] }}{% for i in inputs[1:]: %}, {{ i[0] }}{% endfor %}): diff --git a/mlprodict/onnx_tools/exports/numpy_helper.py b/mlprodict/onnx_tools/exports/numpy_helper.py index 1d22e81ed..ae076699f 100644 --- a/mlprodict/onnx_tools/exports/numpy_helper.py +++ b/mlprodict/onnx_tools/exports/numpy_helper.py @@ -27,12 +27,31 @@ def make_slice(data, starts, ends, axes=None, steps=None): return data[slices] +def argmax_use_numpy_select_last_index( + data, axis=0, keepdims=True, select_last_index=False): + """ + Needed or operator `ArgMax`. + """ + if not select_last_index: + result = numpy.argmax(data, axis=axis) + if keepdims and len(result.shape) < len(data.shape): + result = numpy.expand_dims(result, axis) + return result.astype(numpy.int64) + + data = numpy.flip(data, axis) + result = numpy.argmax(data, axis=axis) + result = data.shape[axis] - result - 1 + if keepdims: + result = numpy.expand_dims(result, axis) + return result.astype(numpy.int64) + + def argmin_use_numpy_select_last_index( data, axis=0, keepdims=True, select_last_index=False): """ Needed or operator `ArgMin`. """ - if select_last_index: + if not select_last_index: result = numpy.argmin(data, axis=axis) if keepdims and len(result.shape) < len(data.shape): result = numpy.expand_dims(result, axis) @@ -46,6 +65,31 @@ def argmin_use_numpy_select_last_index( return result.astype(numpy.int64) +def array_feature_extrator(data, indices): + """ + Implementation of operator *ArrayFeatureExtractor* + with :epkg:`numpy`. + """ + if len(indices.shape) == 2 and indices.shape[0] == 1: + index = indices.ravel().tolist() + add = len(index) + elif len(indices.shape) == 1: + index = indices.tolist() + add = len(index) + else: + add = 1 + for s in indices.shape: + add *= s + index = indices.ravel().tolist() + if len(data.shape) == 1: + new_shape = (1, add) + else: + new_shape = list(data.shape[:-1]) + [add] + tem = data[..., index] + res = tem.reshape(new_shape) + return res + + class NumpyCode: """ Converts an ONNX operators into :epkg:`numpy` code. @@ -104,10 +148,20 @@ def _make_sure_opsets(self, mi, ma=None): "Cannot convert operator type %d, opset %d > %d." % ( self.op_type, self.opset, mi)) - def _getat(self, name, defval=None): + def _getat(self, name, defval=None, format=None): + + def f(v): + if format is None: + return v + if format == 'listint' and isinstance(v, str): + return list( + map(int, v.strip('[]').replace(' ', '').split(','))) + raise ValueError( + "Unable to convert %r with format=%r." % (v, format)) + for n, val in self.attributes: if name == n: - return val + return f(val) return defval def _simplify(self, name, kind): @@ -190,16 +244,47 @@ def _make_numpy_code_onnx(self): return "%s = %s %s" % ( outs, self.inputs[0], unary_ops_[self.op_type]) + if self.op_type == 'ArgMax': + self._make_sure_opsets(12) + self._make_sure_inputs(1) + axis = self._getat('axis', 0) + keepdims = self._getat('keepdims', 1) + select_last_index = self._getat('keepdims', 0) + if select_last_index: + return ( + "%s = argmax_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + if keepdims: + return "%s = numpy.expand_dims(numpy.argmax(%s, axis=%s), -1)" % ( + outs, self.inputs[0], axis) + return "%s = numpy.argmax(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + if self.op_type == 'ArgMin': self._make_sure_opsets(12) self._make_sure_inputs(1) axis = self._getat('axis', 0) keepdims = self._getat('keepdims', 1) select_last_index = self._getat('keepdims', 0) - return ( - "%s = argmin_use_numpy_select_last_index(" - "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( - outs, self.inputs[0], axis, keepdims, select_last_index)) + if select_last_index: + return ( + "%s = argmin_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + if keepdims: + return "%s = numpy.expand_dims(numpy.argmin(%s, axis=%s), -1)" % ( + outs, self.inputs[0], axis) + return "%s = numpy.argmin(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + + if self.op_type == 'Cast': + from ..onnx2py_helper import _elem_type_as_str + self._make_sure_inputs(1) + to = int(self._getat('to', 1)) + dtype = _elem_type_as_str(to) + dtype = {'double': 'float64', 'float': 'float32'}.get(dtype, dtype) + return "%s = %s.astype(numpy.%s)" % (outs, self.inputs[0], dtype) if self.op_type == 'Concat': axis = self._getat('axis', 0) @@ -271,6 +356,12 @@ def _make_numpy_code_onnx(self): if self.op_type == 'Slice': return "%s = make_slice(%s)" % (outs, ", ".join(self.inputs)) + if self.op_type == 'Softmax': + self._make_sure_inputs(1) + axis = self._getat('axis', -1) + return "%s = scipy_special.softmax(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + if self.op_type == 'Squeeze': self._make_sure_opsets(13) self._make_sure_inputs(2) @@ -297,25 +388,90 @@ def _make_numpy_code_onnx(self): def _make_numpy_code_onnxml(self): outs = ", ".join(self.outputs) + if self.op_type == 'ArrayFeatureExtractor': + self._make_sure_inputs(2) + return "%s = array_feature_extrator(%s, %s)" % ( + outs, self.inputs[0], self.inputs[1]) + + if self.op_type == 'LinearClassifier': + multi_class = self._getat('targets', 0) + if multi_class != 0: + raise NotImplementedError( + "Conversion of operator %r with multi_class=%r " + "is not implemented." % (self.op_type, multi_class)) + self._make_sure_inputs(1) + coefficients = self._getat('coefficients', None) + intercepts = self._getat('intercepts', None) + post_transform = self._getat( + 'post_transform', 'NONE').strip('"\'b') + classlabels_strings = self._getat('classlabels_strings', None) + if classlabels_strings is not None: + raise NotImplementedError( + "Conversion of operator %r with classlabels_strings=%r " + "is not implemented." % (self.op_type, classlabels_strings)) + classlabels_ints = self._getat( + 'classlabels_ints', None, format="listint") + if classlabels_ints != list(range(len(classlabels_ints))): + raise NotImplementedError( + "Conversion of operator %r with classlabels_ints=%r!=%r " + "is not implemented." % ( + self.op_type, classlabels_ints, + list(range(len(classlabels_ints))))) + targets = len(classlabels_ints) + rows = [ + "coefs = numpy.array(%s, dtype=numpy.float32)." + "reshape((%d, -1)).T" % (coefficients, targets), + "%sinter = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (self.indent, intercepts, targets)] + + if post_transform == "SOFTMAX": + rows.append( + "%s%s = scipy_special.softmax" + "(%s @ coefs + inter, axis=1)" % ( + self.indent, self.outputs[1], self.inputs[0])) + elif post_transform == 'NONE': + rows.append( + "%s%s = %s @ coefs + inter" % ( + self.indent, self.outputs[1], self.inputs[0])) + elif post_transform != "NONE": + raise NotImplementedError( + "Conversion of operator %r with post_transform=%r " + "is not implemented." % (self.op_type, post_transform)) + rows.append("%s%s = numpy.argmax(%s, axis=1)" % ( + self.indent, self.outputs[0], self.outputs[1])) + return "\n".join(rows) + if self.op_type == 'LinearRegressor': self._make_sure_inputs(1) coefficients = self._getat('coefficients', None) intercepts = self._getat('intercepts', None) - post_transform = self._getat('post_transform', 'NONE') + post_transform = self._getat( + 'post_transform', 'NONE').strip('"\'b') targets = self._getat('targets', 1) if post_transform != "NONE": raise NotImplementedError( - "Conversion of operator %r with post_transform %r " + "Conversion of operator %r with post_transform=%r " "is not implemented." % (self.op_type, post_transform)) rows = [ "coefs = numpy.array(%s, dtype=numpy.float32)." - "reshape((-1, %d))" % (coefficients, targets), + "reshape((%d, -1)).T" % (coefficients, targets), "%sinter = numpy.array(%s, dtype=numpy.float32)." "reshape((-1, %d))" % (self.indent, intercepts, targets), "%s%s = %s @ coefs + inter" % ( self.indent, outs, self.inputs[0])] return "\n".join(rows) + if self.op_type == 'Normalizer': + self._make_sure_inputs(1) + post_transform = self._getat('norm', 'MAX').strip('"\'b') + if post_transform == 'L2': + return "%s = %s / (%s ** 2).sum(axis=1) ** 0.5" % ( + outs, self.inputs[0], self.inputs[0]) + if post_transform == 'L1': + post_transform = 'sum' + return "%s = %s / %s.%s(axis=1, keepdims=1)" % ( + outs, self.inputs[0], self.inputs[0], post_transform.lower()) + raise NotImplementedError( # pragma: no cover "Unable to convert operator type %r name=%r (onnxml)." % ( self.op_type, self.name)) diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py index c771d6130..28ae9365f 100644 --- a/mlprodict/sklapi/__init__.py +++ b/mlprodict/sklapi/__init__.py @@ -5,4 +5,7 @@ """ from .onnx_pipeline import OnnxPipeline from .onnx_transformer import OnnxTransformer -from .onnx_speed_up import OnnxSpeedUpTransformer, OnnxSpeedUpRegressor +from .onnx_speed_up import ( + OnnxSpeedUpTransformer, + OnnxSpeedUpRegressor, + OnnxSpeedUpClassifier) diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 050b704f1..878a1711c 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -11,10 +11,11 @@ from contextlib import redirect_stdout, redirect_stderr import numpy from numpy.testing import assert_almost_equal +import scipy.special as scipy_special from onnx import helper, load from sklearn.base import ( BaseEstimator, clone, - TransformerMixin, RegressorMixin) + TransformerMixin, RegressorMixin, ClassifierMixin) from sklearn.preprocessing import FunctionTransformer from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from ..tools.code_helper import print_code @@ -23,7 +24,10 @@ from ..onnx_tools.onnx2py_helper import ( onnx_model_opsets, _var_as_dict, to_skl2onnx_type) from ..onnx_tools.exports.numpy_helper import ( - argmin_use_numpy_select_last_index, make_slice) + array_feature_extrator, + argmax_use_numpy_select_last_index, + argmin_use_numpy_select_last_index, + make_slice) from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph from ..onnx_conv import to_onnx from .onnx_transformer import OnnxTransformer @@ -81,10 +85,9 @@ def _to_onnx(self, fitted_estimator, inputs): :param inputs: example of inputs :return: ONNX """ - opts = self.conv_options or {} return to_onnx( self.estimator_, inputs, target_opset=self.target_opset, - **opts) + options=self.conv_options) def _build_onnx_runtime(self, onx): """ @@ -146,9 +149,12 @@ def _build_onnx_runtime_numpy_compile(self, opsets): 'numpy': numpy, 'dict': dict, 'list': list, 'print': print, 'sorted': sorted, 'collections': collections, 'inspect': inspect, - 'helper': helper, + 'helper': helper, 'scipy_special': scipy_special, + 'array_feature_extrator': array_feature_extrator, 'argmin_use_numpy_select_last_index': argmin_use_numpy_select_last_index, + 'argmax_use_numpy_select_last_index': + argmax_use_numpy_select_last_index, 'make_slice': make_slice} out = io.StringIO() err = io.StringIO() @@ -447,3 +453,108 @@ def assert_almost_equal(self, X, **kwargs): expected = numpy.squeeze(self.raw_predict(X)) got = numpy.squeeze(self.predict(X)) assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedUpClassifier(ClassifierMixin, + _OnnxPipelineStepSpeedUp): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + if conv_options is None: + conv_options = {'zipmap': False} + _OnnxPipelineStepSpeedUp.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedUp.fit(self, X, y) + else: + _OnnxPipelineStepSpeedUp.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[0] + return pred.iloc[:, 0].values + + def predict_proba(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[1] + return pred.iloc[:, 1:].values + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def raw_predict_proba(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict_proba(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_predict_proba(X)) + got = numpy.squeeze(self.predict_proba(X)) + assert_almost_equal(expected, got, **kwargs) + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index 9a969022c..475e1ba2d 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -194,7 +194,20 @@ def transform(self, X, y=None, **inputs): names = self.output_name if self.output_name else [ o for o in self.onnxrt_.output_names] - return pandas.DataFrame({k: v for k, v in zip(names, outputs)}) + concat = [] + colnames = [] + for k, v in zip(names, outputs): + if len(v.shape) == 1: + v = v.reshape((-1, 1)) + colnames.append(k) + elif len(v.shape) == 2: + colnames.extend("%s%d" % (k, i) for i in range(v.shape[1])) + else: + raise RuntimeError( # pragma: no cover + "Unexpected shape for results %r: %r." % (k, v.shape)) + concat.append(v) + res = numpy.hstack(concat) + return pandas.DataFrame(res, columns=colnames) def fit_transform(self, X, y=None, **inputs): """ From 12e592693604e7fc91c64de62fe10beeb42fa42b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Mon, 30 Aug 2021 18:06:46 +0200 Subject: [PATCH 11/13] fix zipmap issue --- mlprodict/sklapi/onnx_transformer.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index 475e1ba2d..b57c044ee 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -197,14 +197,27 @@ def transform(self, X, y=None, **inputs): concat = [] colnames = [] for k, v in zip(names, outputs): - if len(v.shape) == 1: - v = v.reshape((-1, 1)) - colnames.append(k) - elif len(v.shape) == 2: + if isinstance(v, numpy.ndarray): + if len(v.shape) == 1: + v = v.reshape((-1, 1)) + colnames.append(k) + elif len(v.shape) == 2: + colnames.extend("%s%d" % (k, i) for i in range(v.shape[1])) + else: + raise RuntimeError( # pragma: no cover + "Unexpected shape for results %r: %r." % (k, v.shape)) + if isinstance(v, list): + if len(v) == 0: + raise RuntimeError( # pragma: no cover + "Output %r is empty." % k) + if not isinstance(v[0], dict): + raise RuntimeError( # pragma: no cover + "Unexpected type for output %r - value=%r." + "" % (k, v[0])) + df = pandas.DataFrame(v) + cols = list(sorted(df.columns)) + v = df[cols].copy().values colnames.extend("%s%d" % (k, i) for i in range(v.shape[1])) - else: - raise RuntimeError( # pragma: no cover - "Unexpected shape for results %r: %r." % (k, v.shape)) concat.append(v) res = numpy.hstack(concat) return pandas.DataFrame(res, columns=colnames) From f2be6a53b1b6ebf4f9e5dbc8b6c4d9c44962eb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 31 Aug 2021 02:28:37 +0200 Subject: [PATCH 12/13] add cluster --- _doc/examples/plot_speedup_pca.py | 14 +- _doc/sphinxdoc/source/api/sklapi.rst | 8 +- .../ut_sklapi/test_onnx_speedup_classifier.py | 32 +-- .../ut_sklapi/test_onnx_speedup_cluster.py | 225 ++++++++++++++++++ .../ut_sklapi/test_onnx_speedup_regressor.py | 70 ++++-- .../test_onnx_speedup_transformer.py | 28 +-- .../_onnx_export_templates_numpy.tmpl | 1 + mlprodict/onnx_tools/exports/numpy_helper.py | 31 +++ mlprodict/sklapi/__init__.py | 7 +- mlprodict/sklapi/onnx_speed_up.py | 140 +++++++++-- 10 files changed, 478 insertions(+), 78 deletions(-) create mode 100644 _unittests/ut_sklapi/test_onnx_speedup_cluster.py diff --git a/_doc/examples/plot_speedup_pca.py b/_doc/examples/plot_speedup_pca.py index 35c800f6b..04dc9289c 100644 --- a/_doc/examples/plot_speedup_pca.py +++ b/_doc/examples/plot_speedup_pca.py @@ -1,5 +1,5 @@ """ -.. _l-speedup-pca: +.. _l-Speedup-pca: Speed up scikit-learn inference with ONNX ========================================= @@ -34,7 +34,7 @@ from sklearn.datasets import make_regression from sklearn.decomposition import PCA from pyquickhelper.pycode.profiling import profile -from mlprodict.sklapi import OnnxSpeedUpTransformer +from mlprodict.sklapi import OnnxSpeedupTransformer from mlprodict.tools.speed_measure import measure_time from tqdm import tqdm @@ -45,13 +45,13 @@ data = data.astype(numpy.float32) models = [ ('sklearn', PCA(n_components=10)), - ('python', OnnxSpeedUpTransformer( + ('python', OnnxSpeedupTransformer( PCA(n_components=10), runtime='python')), - ('onnxruntime1', OnnxSpeedUpTransformer( + ('onnxruntime1', OnnxSpeedupTransformer( PCA(n_components=10), runtime='onnxruntime1')), - ('numpy', OnnxSpeedUpTransformer( + ('numpy', OnnxSpeedupTransformer( PCA(n_components=10), runtime='numpy')), - ('numba', OnnxSpeedUpTransformer( + ('numba', OnnxSpeedupTransformer( PCA(n_components=10), runtime='numba'))] ################################# @@ -85,7 +85,7 @@ def fct(): print(res[1]) ################################# -# The class *OnnxSpeedUpTransformer* converts the PCA +# The class *OnnxSpeedupTransformer* converts the PCA # into ONNX and then converts it into a python code using # *numpy*. The code is the following. diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst index 8df5467cd..63a9e324f 100644 --- a/_doc/sphinxdoc/source/api/sklapi.rst +++ b/_doc/sphinxdoc/source/api/sklapi.rst @@ -21,7 +21,7 @@ OnnxTransformer .. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer :members: -SpeedUp scikit-learn pipeline with ONNX +Speedup scikit-learn pipeline with ONNX +++++++++++++++++++++++++++++++++++++++ These classes wraps an existing pipeline from *scikit-learn* @@ -29,11 +29,11 @@ and replaces the inference (*transform*, *predict*, *predict_proba*) by another runtime built after the model was converted into ONNX. See example :ref:`l-b-numpy-numba-ort` for further details. -.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpClassifier +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupClassifier :members: -.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpRegressor +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupRegressor :members: -.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedUpTransformer +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupTransformer :members: diff --git a/_unittests/ut_sklapi/test_onnx_speedup_classifier.py b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py index d458ddf2d..1df6940d7 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_classifier.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py @@ -13,13 +13,13 @@ from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris from pyquickhelper.pycode import ExtTestCase, ignore_warnings -from mlprodict.sklapi import OnnxSpeedUpClassifier +from mlprodict.sklapi import OnnxSpeedupClassifier from mlprodict.tools import get_opset_number_from_onnx from mlprodict.onnx_conv import to_onnx from mlprodict.onnxrt import OnnxInference -class TestOnnxSpeedUpClassifier(ExtTestCase): +class TestOnnxSpeedupClassifier(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') @@ -32,7 +32,7 @@ def opset(self): def test_speedup_classifier32(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset()) spd.fit(X, y) spd.assert_almost_equal(X, decimal=5) @@ -41,7 +41,7 @@ def test_speedup_classifier32(self): def test_speedup_classifier32_onnxruntime(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), runtime="onnxruntime1") spd.fit(X, y) @@ -51,7 +51,7 @@ def test_speedup_classifier32_onnxruntime(self): def test_speedup_classifier32_numpy(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), runtime="numpy") spd.fit(X, y) @@ -62,7 +62,7 @@ def test_speedup_classifier32_numba(self): data = load_iris() X, y = data.data, data.target X = X.astype(numpy.float32) - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), runtime="numba", nopython=False) spd.fit(X, y) @@ -73,7 +73,7 @@ def test_speedup_classifier32_numba(self): def test_speedup_classifier64(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -83,7 +83,7 @@ def test_speedup_classifier64(self): def test_speedup_classifier64_op_version(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -94,7 +94,7 @@ def test_speedup_classifier64_op_version(self): def test_speedup_classifier64_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -115,7 +115,7 @@ def test_speedup_classifier64_pickle(self): def test_speedup_classifier64_numpy_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime="numpy") spd.fit(X, y) @@ -136,7 +136,7 @@ def test_speedup_classifier64_numpy_pickle(self): def test_speedup_classifier64_numba_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime="numba", nopython=False) spd.fit(X, y) @@ -157,7 +157,7 @@ def test_speedup_classifier64_numba_pickle(self): def test_speedup_classifier64_onnx(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -173,7 +173,7 @@ def test_speedup_classifier64_onnx(self): def test_speedup_classifier64_onnx_numpy(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numpy') spd.fit(X, y) @@ -189,7 +189,7 @@ def test_speedup_classifier64_onnx_numpy(self): def test_speedup_classifier64_onnx_numba(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba', nopython=False) spd.fit(X, y) @@ -206,7 +206,7 @@ def test_speedup_classifier64_onnx_numba(self): def test_speedup_classifier64_onnx_numba_python(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpClassifier( + spd = OnnxSpeedupClassifier( LogisticRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba', nopython=False) spd.fit(X, y) @@ -221,5 +221,5 @@ def test_speedup_classifier64_onnx_numba_python(self): if __name__ == '__main__': - # TestOnnxSpeedUpClassifier().test_speedup_classifier64_numba_pickle() + # TestOnnxSpeedupClassifier().test_speedup_classifier64_numba_pickle() unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_cluster.py b/_unittests/ut_sklapi/test_onnx_speedup_cluster.py new file mode 100644 index 000000000..413937787 --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_cluster.py @@ -0,0 +1,225 @@ +""" +@brief test log(time=5s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +from numba import NumbaWarning +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.cluster import KMeans +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedupCluster +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedupCluster(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="numba", nopython=False) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime="numba", nopython=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + +if __name__ == '__main__': + # TestOnnxSpeedupCluster().test_speedup_kmeans32() + unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py index 34409ca40..dc508752f 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py @@ -6,19 +6,21 @@ import unittest from logging import getLogger import numpy +from numba import NumbaWarning # import pandas # from sklearn.pipeline import make_pipeline from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LinearRegression -from sklearn.datasets import load_iris +from sklearn.datasets import load_iris, make_regression +from sklearn.gaussian_process import GaussianProcessRegressor from pyquickhelper.pycode import ExtTestCase, ignore_warnings -from mlprodict.sklapi import OnnxSpeedUpRegressor +from mlprodict.sklapi import OnnxSpeedupRegressor from mlprodict.tools import get_opset_number_from_onnx from mlprodict.onnx_conv import to_onnx from mlprodict.onnxrt import OnnxInference -class TestOnnxSpeedUpRegressor(ExtTestCase): +class TestOnnxSpeedupRegressor(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') @@ -31,7 +33,7 @@ def opset(self): def test_speedup_regressor32(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset()) spd.fit(X, y) spd.assert_almost_equal(X, decimal=5) @@ -40,7 +42,7 @@ def test_speedup_regressor32(self): def test_speedup_regressor32_onnxruntime(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), runtime="onnxruntime1") spd.fit(X, y) @@ -50,7 +52,7 @@ def test_speedup_regressor32_onnxruntime(self): def test_speedup_regressor32_numpy(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), runtime="numpy") spd.fit(X, y) @@ -61,7 +63,7 @@ def test_speedup_regressor32_numba(self): data = load_iris() X, y = data.data, data.target X = X.astype(numpy.float32) - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), runtime="numba") spd.fit(X, y) @@ -72,7 +74,7 @@ def test_speedup_regressor32_numba(self): def test_speedup_regressor64(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -82,7 +84,7 @@ def test_speedup_regressor64(self): def test_speedup_regressor64_op_version(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -93,7 +95,7 @@ def test_speedup_regressor64_op_version(self): def test_speedup_regressor64_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -114,7 +116,7 @@ def test_speedup_regressor64_pickle(self): def test_speedup_regressor64_numpy_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime="numpy") spd.fit(X, y) @@ -135,7 +137,7 @@ def test_speedup_regressor64_numpy_pickle(self): def test_speedup_regressor64_numba_pickle(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime="numba") spd.fit(X, y) @@ -156,7 +158,7 @@ def test_speedup_regressor64_numba_pickle(self): def test_speedup_regressor64_onnx(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False) spd.fit(X, y) @@ -170,7 +172,7 @@ def test_speedup_regressor64_onnx(self): def test_speedup_regressor64_onnx_numpy(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numpy') spd.fit(X, y) @@ -184,7 +186,7 @@ def test_speedup_regressor64_onnx_numpy(self): def test_speedup_regressor64_onnx_numba(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba') spd.fit(X, y) @@ -199,7 +201,7 @@ def test_speedup_regressor64_onnx_numba(self): def test_speedup_regressor64_onnx_numba_python(self): data = load_iris() X, y = data.data, data.target - spd = OnnxSpeedUpRegressor( + spd = OnnxSpeedupRegressor( LinearRegression(), target_opset=self.opset(), enforce_float32=False, runtime='numba', nopython=False) spd.fit(X, y) @@ -210,7 +212,41 @@ def test_speedup_regressor64_onnx_numba_python(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) + @ignore_warnings((ConvergenceWarning, NumbaWarning, DeprecationWarning)) + def test_speedup_gaussian_regressor64_onnx_numpy_python(self): + X, y = make_regression( + n_features=2, n_samples=100, n_targets=1, random_state=42) + model = GaussianProcessRegressor( + alpha=1e-5, n_restarts_optimizer=25, normalize_y=True) + model.fit(X, y) + expected_t = model.predict(X) + onx = to_onnx(model, X[:1], target_opset=self.opset(), + options={'optim': 'cdist'}) + + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_t.squeeze(), got.squeeze()) + spd = OnnxSpeedupRegressor( + model, target_opset=self.opset(), + enforce_float32=False, runtime='numpy', nopython=False, + conv_options={'optim': 'cdist'}) + spd.fit(X, y) + expected_r = spd.raw_predict(X) + self.assertEqualArray(expected_t.squeeze(), expected_r.squeeze()) + + oinf = OnnxInference(spd.onnx_) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_r.squeeze(), got.squeeze()) + + onx = to_onnx(spd, X[:1]) + self.assertIn('CDist', str(onx)) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_r.squeeze(), got.squeeze()) + + expected = spd.predict(X) + self.assertEqualArray(expected_r.squeeze(), expected.squeeze()) + if __name__ == '__main__': - # TestOnnxSpeedUpRegressor().test_speedup_regressor64_onnx_numba() unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index 7a2a93c72..bedb1581c 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -11,13 +11,13 @@ from sklearn.decomposition import PCA from sklearn.datasets import load_iris from pyquickhelper.pycode import ExtTestCase -from mlprodict.sklapi import OnnxSpeedUpTransformer +from mlprodict.sklapi import OnnxSpeedupTransformer from mlprodict.tools import get_opset_number_from_onnx from mlprodict.onnx_conv import to_onnx from mlprodict.onnxrt import OnnxInference -class TestOnnxSpeedUpTransformer(ExtTestCase): +class TestOnnxSpeedupTransformer(ExtTestCase): def setUp(self): logger = getLogger('skl2onnx') @@ -29,14 +29,14 @@ def opset(self): def test_speedup_transform32(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset()) + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset()) spd.fit(X) spd.assert_almost_equal(X, decimal=5) def test_speedup_transform32_onnxruntime(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer( + spd = OnnxSpeedupTransformer( PCA(), target_opset=self.opset(), runtime="onnxruntime1") spd.fit(X) @@ -45,7 +45,7 @@ def test_speedup_transform32_onnxruntime(self): def test_speedup_transform32_numpy(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer( + spd = OnnxSpeedupTransformer( PCA(), target_opset=self.opset(), runtime="numpy") spd.fit(X) @@ -55,7 +55,7 @@ def test_speedup_transform32_numba(self): data = load_iris() X, _ = data.data, data.target X = X.astype(numpy.float32) - spd = OnnxSpeedUpTransformer( + spd = OnnxSpeedupTransformer( PCA(), target_opset=self.opset(), runtime="numba") spd.fit(X) @@ -65,7 +65,7 @@ def test_speedup_transform32_numba(self): def test_speedup_transform64(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False) spd.fit(X) spd.assert_almost_equal(X) @@ -73,7 +73,7 @@ def test_speedup_transform64(self): def test_speedup_transform64_op_version(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False) spd.fit(X) opset = spd.op_version @@ -82,7 +82,7 @@ def test_speedup_transform64_op_version(self): def test_speedup_transform64_pickle(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False) spd.fit(X) @@ -101,7 +101,7 @@ def test_speedup_transform64_pickle(self): def test_speedup_transform64_numpy_pickle(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False, runtime="numpy") spd.fit(X) @@ -121,7 +121,7 @@ def test_speedup_transform64_numpy_pickle(self): def test_speedup_transform64_numba_pickle(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False, runtime="numba") spd.fit(X) @@ -141,7 +141,7 @@ def test_speedup_transform64_numba_pickle(self): def test_speedup_transform64_onnx(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False) spd.fit(X) expected = spd.transform(X) @@ -153,7 +153,7 @@ def test_speedup_transform64_onnx(self): def test_speedup_transform64_onnx_numpy(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False, runtime='numpy') spd.fit(X) @@ -166,7 +166,7 @@ def test_speedup_transform64_onnx_numpy(self): def test_speedup_transform64_onnx_numba(self): data = load_iris() X, _ = data.data, data.target - spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), enforce_float32=False, runtime='numba') spd.fit(X) diff --git a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl index b16110611..1a25d24f4 100644 --- a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl +++ b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl @@ -1,5 +1,6 @@ import numpy import scipy.special as scipy_special +import scipy.spatial.distance as scipy_distance from mlprodict.onnx_tools.exports.numpy_helper import ( argmax_use_numpy_select_last_index, argmin_use_numpy_select_last_index, diff --git a/mlprodict/onnx_tools/exports/numpy_helper.py b/mlprodict/onnx_tools/exports/numpy_helper.py index ae076699f..aa0df0e66 100644 --- a/mlprodict/onnx_tools/exports/numpy_helper.py +++ b/mlprodict/onnx_tools/exports/numpy_helper.py @@ -156,6 +156,9 @@ def f(v): if format == 'listint' and isinstance(v, str): return list( map(int, v.strip('[]').replace(' ', '').split(','))) + if format == 'listfloat' and isinstance(v, str): + return list( + map(float, v.strip('[]').replace(' ', '').split(','))) raise ValueError( "Unable to convert %r with format=%r." % (v, format)) @@ -216,6 +219,9 @@ def make_numpy_code(self): if self.domain == 'ai.onnx.ml': return self._make_numpy_code_onnxml() + if self.domain == 'com.microsoft': + return self._make_numpy_code_others() + raise NotImplementedError( "Unable to convert any operator from domain %r." % self.domain) @@ -291,6 +297,17 @@ def _make_numpy_code_onnx(self): return "%s = numpy.concatenate([%s], %s)" % ( outs, ", ".join(self.inputs), axis) + if self.op_type == 'ConstantOfShape': + self._make_sure_opsets(9) + self._make_sure_inputs(1) + value = self._getat('value', 0, format='listfloat') + shape = self._simplify(self.inputs[0], kind='tuple') + return "%s = numpy.full(%s, %s)" % ( + outs, shape, value) + + if self.op_type == 'Exp': + return "%s = numpy.exp(%s)" % (outs, self.inputs[0]) + if self.op_type == 'Max': return "%s = numpy.maximum(%s)" % (outs, ", ".join(self.inputs)) @@ -476,6 +493,20 @@ def _make_numpy_code_onnxml(self): "Unable to convert operator type %r name=%r (onnxml)." % ( self.op_type, self.name)) + def _make_numpy_code_others(self): + outs = ", ".join(self.outputs) + + if self.op_type == 'CDist': + self._make_sure_inputs(2) + metric = self._getat('metric', 'euclidean').strip("'b") + return "%s = scipy_distance.cdist(%s, %s, metric=%r)" % ( + outs, self.inputs[0], self.inputs[1], metric) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r (domain=%r) " + "name=%r (onnxml)." % ( + self.op_type, self.domain, self.name)) + def make_numpy_code(opset, name=None, op_type=None, domain='', inputs=None, outputs=None, attributes=None, diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py index 28ae9365f..5a11bd8a5 100644 --- a/mlprodict/sklapi/__init__.py +++ b/mlprodict/sklapi/__init__.py @@ -6,6 +6,7 @@ from .onnx_pipeline import OnnxPipeline from .onnx_transformer import OnnxTransformer from .onnx_speed_up import ( - OnnxSpeedUpTransformer, - OnnxSpeedUpRegressor, - OnnxSpeedUpClassifier) + OnnxSpeedupClassifier, + OnnxSpeedupCluster, + OnnxSpeedupRegressor, + OnnxSpeedupTransformer) diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 878a1711c..87ee1e8f4 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -12,10 +12,12 @@ import numpy from numpy.testing import assert_almost_equal import scipy.special as scipy_special +import scipy.spatial.distance as scipy_distance from onnx import helper, load from sklearn.base import ( BaseEstimator, clone, - TransformerMixin, RegressorMixin, ClassifierMixin) + TransformerMixin, RegressorMixin, ClassifierMixin, + ClusterMixin) from sklearn.preprocessing import FunctionTransformer from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin from ..tools.code_helper import print_code @@ -33,7 +35,7 @@ from .onnx_transformer import OnnxTransformer -class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): +class _OnnxPipelineStepSpeedup(BaseEstimator, OnnxOperatorMixin): """ Speeds up inference by replacing methods *transform* or *predict* by a runtime for :epkg:`ONNX`. @@ -150,6 +152,7 @@ def _build_onnx_runtime_numpy_compile(self, opsets): 'print': print, 'sorted': sorted, 'collections': collections, 'inspect': inspect, 'helper': helper, 'scipy_special': scipy_special, + 'scipy_distance': scipy_distance, 'array_feature_extrator': array_feature_extrator, 'argmin_use_numpy_select_last_index': argmin_use_numpy_select_last_index, @@ -303,8 +306,8 @@ def converter(scope, operator, container): return converter -class OnnxSpeedUpTransformer(TransformerMixin, - _OnnxPipelineStepSpeedUp): +class OnnxSpeedupTransformer(TransformerMixin, + _OnnxPipelineStepSpeedup): """ Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. @@ -335,7 +338,7 @@ class OnnxSpeedUpTransformer(TransformerMixin, def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None, nopython=True): - _OnnxPipelineStepSpeedUp.__init__( + _OnnxPipelineStepSpeedup.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, target_opset=target_opset, conv_options=conv_options, nopython=nopython) @@ -345,9 +348,9 @@ def fit(self, X, y=None, sample_weight=None): # pylint: disable=W0221 Trains based estimator. """ if sample_weight is None: - _OnnxPipelineStepSpeedUp.fit(self, X, y) + _OnnxPipelineStepSpeedup.fit(self, X, y) else: - _OnnxPipelineStepSpeedUp.fit( + _OnnxPipelineStepSpeedup.fit( self, X, y, sample_weight=sample_weight) return self @@ -379,8 +382,8 @@ def assert_almost_equal(self, X, **kwargs): assert_almost_equal(expected, got, **kwargs) -class OnnxSpeedUpRegressor(RegressorMixin, - _OnnxPipelineStepSpeedUp): +class OnnxSpeedupRegressor(RegressorMixin, + _OnnxPipelineStepSpeedup): """ Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. @@ -411,7 +414,7 @@ class OnnxSpeedUpRegressor(RegressorMixin, def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None, nopython=True): - _OnnxPipelineStepSpeedUp.__init__( + _OnnxPipelineStepSpeedup.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, target_opset=target_opset, conv_options=conv_options, nopython=nopython) @@ -421,9 +424,9 @@ def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 Trains based estimator. """ if sample_weight is None: - _OnnxPipelineStepSpeedUp.fit(self, X, y) + _OnnxPipelineStepSpeedup.fit(self, X, y) else: - _OnnxPipelineStepSpeedUp.fit( + _OnnxPipelineStepSpeedup.fit( self, X, y, sample_weight=sample_weight) return self @@ -455,8 +458,8 @@ def assert_almost_equal(self, X, **kwargs): assert_almost_equal(expected, got, **kwargs) -class OnnxSpeedUpClassifier(ClassifierMixin, - _OnnxPipelineStepSpeedUp): +class OnnxSpeedupClassifier(ClassifierMixin, + _OnnxPipelineStepSpeedup): """ Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. @@ -489,7 +492,7 @@ def __init__(self, estimator, runtime='python', enforce_float32=True, target_opset=None, conv_options=None, nopython=True): if conv_options is None: conv_options = {'zipmap': False} - _OnnxPipelineStepSpeedUp.__init__( + _OnnxPipelineStepSpeedup.__init__( self, estimator, runtime=runtime, enforce_float32=enforce_float32, target_opset=target_opset, conv_options=conv_options, nopython=nopython) @@ -499,9 +502,9 @@ def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 Trains based estimator. """ if sample_weight is None: - _OnnxPipelineStepSpeedUp.fit(self, X, y) + _OnnxPipelineStepSpeedup.fit(self, X, y) else: - _OnnxPipelineStepSpeedUp.fit( + _OnnxPipelineStepSpeedup.fit( self, X, y, sample_weight=sample_weight) return self @@ -558,3 +561,106 @@ def assert_almost_equal(self, X, **kwargs): expected = numpy.squeeze(self.raw_predict(X)) got = numpy.squeeze(self.predict(X)) assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedupCluster(ClusterMixin, + _OnnxPipelineStepSpeedup): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + _OnnxPipelineStepSpeedup.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedup.fit(self, X, y) + else: + _OnnxPipelineStepSpeedup.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[0] + return pred.iloc[:, 0].values + + def transform(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[1] + return pred.iloc[:, 1:].values + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def raw_transform(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.transform(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_transform(X)) + got = numpy.squeeze(self.transform(X)) + assert_almost_equal(expected, got, **kwargs) + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) From 7a9961809f2efed9fc0c29e9609b3795d5cf12c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 31 Aug 2021 20:04:27 +0200 Subject: [PATCH 13/13] lint --- _unittests/ut_sklapi/test_onnx_speedup_regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py index dc508752f..a8d922635 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py @@ -214,7 +214,7 @@ def test_speedup_regressor64_onnx_numba_python(self): @ignore_warnings((ConvergenceWarning, NumbaWarning, DeprecationWarning)) def test_speedup_gaussian_regressor64_onnx_numpy_python(self): - X, y = make_regression( + X, y = make_regression( # pylint: disable=W0632 n_features=2, n_samples=100, n_targets=1, random_state=42) model = GaussianProcessRegressor( alpha=1e-5, n_restarts_optimizer=25, normalize_y=True)