diff --git a/.gitignore b/.gitignore index c12278227..1659a0de0 100644 --- a/.gitignore +++ b/.gitignore @@ -314,3 +314,8 @@ _unittests/ut_tools/**/*.pb _unittests/ut_onnxrt/onnxruntime_profile*.json _doc/notebooks/onnxruntime_profile*.json _doc/sphinxdoc/source/phdoc_static/embed*.js +cache-*.pickle +*/*/*.pb +onnxruntime*.json +*net*.tar* +_unittests/unittests.out diff --git a/_doc/examples/plot_speedup_pca.py b/_doc/examples/plot_speedup_pca.py new file mode 100644 index 000000000..04dc9289c --- /dev/null +++ b/_doc/examples/plot_speedup_pca.py @@ -0,0 +1,128 @@ +""" +.. _l-Speedup-pca: + +Speed up scikit-learn inference with ONNX +========================================= + +Is it possible to make :epkg:`scikit-learn` faster with ONNX? +That's question this example tries to answer. The scenario is +is the following: + +* a model is trained +* it is converted into ONNX for inference +* it selects a runtime to compute the prediction + +The following runtime are tested: + +* `python`: python runtime for ONNX +* `onnxruntime1`: :epkg:`onnxruntime` +* `numpy`: the ONNX graph is converted into numpy code +* `numba`: the numpy code is accelerated with :epkg:`numba`. + +.. contents:: + :local: + +PCA ++++ + +Let's look at a very simple model, a PCA. +""" + +import numpy +from pandas import DataFrame +import matplotlib.pyplot as plt +from sklearn.datasets import make_regression +from sklearn.decomposition import PCA +from pyquickhelper.pycode.profiling import profile +from mlprodict.sklapi import OnnxSpeedupTransformer +from mlprodict.tools.speed_measure import measure_time +from tqdm import tqdm + +################################ +# Data and models to test. + +data, _ = make_regression(1000, n_features=20) +data = data.astype(numpy.float32) +models = [ + ('sklearn', PCA(n_components=10)), + ('python', OnnxSpeedupTransformer( + PCA(n_components=10), runtime='python')), + ('onnxruntime1', OnnxSpeedupTransformer( + PCA(n_components=10), runtime='onnxruntime1')), + ('numpy', OnnxSpeedupTransformer( + PCA(n_components=10), runtime='numpy')), + ('numba', OnnxSpeedupTransformer( + PCA(n_components=10), runtime='numba'))] + +################################# +# Training. + +for name, model in tqdm(models): + model.fit(data) + +################################# +# Profiling of runtime `onnxruntime1`. + + +def fct(): + for i in range(1000): + models[2][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + + +################################# +# Profiling of runtime `numpy`. + +def fct(): + for i in range(1000): + models[3][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + +################################# +# The class *OnnxSpeedupTransformer* converts the PCA +# into ONNX and then converts it into a python code using +# *numpy*. The code is the following. + +print(models[3][1].numpy_code_) + +################################# +# Benchmark. + +bench = [] +for name, model in tqdm(models): + for size in (1, 10, 100, 1000, 10000, 100000, 200000): + data, _ = make_regression(size, n_features=20) + data = data.astype(numpy.float32) + + # We run it a first time (numba compiles + # the function during the first execution). + model.transform(data) + res = measure_time( + "model.transform(data)", div_by_number=True, + context={'data': data, 'model': model}) + res['name'] = name + res['size'] = size + bench.append(res) + +df = DataFrame(bench) +piv = df.pivot("size", "name", "average") +piv + +###################################### +# Graph. +fig, ax = plt.subplots(1, 2, figsize=(10, 4)) +piv.plot(title="Speedup PCA with ONNX (lower better)", + logx=True, logy=True, ax=ax[0]) +piv2 = piv.copy() +for c in piv2.columns: + piv2[c] /= piv['sklearn'] +print(piv2) +piv2.plot(title="baseline=scikit-learn (lower better)", + logx=True, logy=True, ax=ax[1]) +plt.show() diff --git a/_doc/examples/plot_time_tree_ensemble.py b/_doc/examples/plot_time_tree_ensemble.py index ad75edb32..77d55d57c 100644 --- a/_doc/examples/plot_time_tree_ensemble.py +++ b/_doc/examples/plot_time_tree_ensemble.py @@ -1,216 +1,218 @@ -""" -.. _l-example-tree-ensemble: - -Benchmark Random Forests, Tree Ensemble -======================================= - -The following script benchmarks different libraries -implementing random forests and boosting trees. -This benchmark can be replicated by installing the -following packages: - -:: - - python -m virtualenv env - cd env - pip install -i https://test.pypi.org/simple/ ort-nightly - pip install git+https://github.com/microsoft/onnxconverter-common.git@jenkins - pip install git+https://https://github.com/xadupre/sklearn-onnx.git@jenkins - pip install mlprodict matplotlib scikit-learn pandas threadpoolctl - pip install mlprodict lightgbm xgboost jinja2 - -.. contents:: - :local: - -Import -++++++ -""" -import os -import pickle -from pprint import pprint -import numpy -import pandas -import matplotlib.pyplot as plt -from xgboost import XGBClassifier -from lightgbm import LGBMClassifier -from onnxruntime import InferenceSession -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble import RandomForestClassifier -from sklearn.datasets import make_classification -from skl2onnx import to_onnx -from mlprodict.onnx_conv import register_converters -from mlprodict.onnxrt.validate.validate_helper import measure_time -from mlprodict.onnxrt import OnnxInference - -############################# -# Registers new converters for :epkg:`sklearn-onnx`. -register_converters() - -######################################### -# Problem -# +++++++ - -max_depth = 7 -n_classes = 5 -n_estimators = 100 -n_features = 10 -REPEAT = 3 -NUMBER = 1 -train, test = 2000, 10000 - -print('dataset') -X_, y_ = make_classification(n_samples=train + test, n_features=n_features, - n_classes=n_classes, n_informative=n_features - 3) -X_ = X_.astype(numpy.float32) -y_ = y_.astype(numpy.int64) -X_train, X_test = X_[:train], X_[train:] -y_train, y_test = y_[:train], y_[train:] - -compilation = [] - - -def train_cache(model, X_train, y_train, max_depth, n_estimators, n_classes): - name = "cache-{}-N{}-f{}-d{}-e{}-cl{}.pkl".format( - model.__class__.__name__, X_train.shape[0], X_train.shape[1], - max_depth, n_estimators, n_classes) - if os.path.exists(name): - with open(name, 'rb') as f: - return pickle.load(f) - else: - model.fit(X_train, y_train) - with open(name, 'wb') as f: - pickle.dump(model, f) - return model - - -######################################## -# RandomForestClassifier -# ++++++++++++++++++++++ - -rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -rf = train_cache(rf, X_train, y_train, max_depth, n_estimators, n_classes) - -res = measure_time(rf.predict_proba, X_test[:10], - repeat=REPEAT, number=NUMBER, - div_by_number=True, first_run=True) -res['model'], res['runtime'] = rf.__class__.__name__, 'INNER' -pprint(res) - -######################################## -# ONNX -# ++++ - - -def measure_onnx_runtime(model, xt, repeat=REPEAT, number=NUMBER, - verbose=True): - if verbose: - print(model.__class__.__name__) - - res = measure_time(model.predict_proba, xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'INNER' - res['N'] = X_test.shape[0] - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - onx = to_onnx(model, X_train[:1], options={id(model): {'zipmap': False}}) - - oinf = OnnxInference(onx) - res = measure_time(lambda x: oinf.run({'X': x}), xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'NPY/C++' - res['N'] = X_test.shape[0] - res['size'] = len(onx.SerializeToString()) - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - sess = InferenceSession(onx.SerializeToString()) - res = measure_time(lambda x: sess.run(None, {'X': x}), xt, - repeat=repeat, number=number, - div_by_number=True, first_run=True) - res['model'], res['runtime'] = model.__class__.__name__, 'ORT' - res['N'] = X_test.shape[0] - res['size'] = len(onx.SerializeToString()) - res["max_depth"] = max_depth - res["n_estimators"] = n_estimators - res["n_features"] = n_features - if verbose: - pprint(res) - yield res - - -compilation.extend(list(measure_onnx_runtime(rf, X_test))) - - -######################################## -# HistGradientBoostingClassifier -# ++++++++++++++++++++++++++++++ - -hist = HistGradientBoostingClassifier( - max_iter=n_estimators, max_depth=max_depth) -print('train') -hist = train_cache(hist, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(hist, X_test))) - -######################################## -# LightGBM -# ++++++++ - -lgb = LGBMClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -lgb = train_cache(lgb, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(lgb, X_test))) - -######################################## -# XGBoost -# +++++++ - -xgb = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth) -print('train') -xgb = train_cache(xgb, X_train, y_train, max_depth, n_estimators, n_classes) - -compilation.extend(list(measure_onnx_runtime(xgb, X_test))) - -############################################## -# Summary -# +++++++ -# -# All data -name = 'plot_time_tree_ensemble' -df = pandas.DataFrame(compilation) -df.to_csv('%s.csv' % name, index=False) -df.to_excel('%s.xlsx' % name, index=False) -df - -######################################### -# Time per model and runtime. -piv = df.pivot("model", "runtime", "average") -piv - -########################################### -# Graphs. -ax = piv.T.plot(kind="bar") -ax.set_title("Computation time ratio for %d observations and %d features\n" - "lower is better for onnx runtimes" % X_test.shape) -plt.savefig('%s.png' % name) - -########################################### -# Available optimisation on this machine: - -from mlprodict.testing.experimental_c import code_optimisation -print(code_optimisation()) - -plt.show() +""" +.. _l-example-tree-ensemble: + +Benchmark Random Forests, Tree Ensemble +======================================= + +The following script benchmarks different libraries +implementing random forests and boosting trees. +This benchmark can be replicated by installing the +following packages: + +:: + + python -m virtualenv env + cd env + pip install -i https://test.pypi.org/simple/ ort-nightly + pip install git+https://github.com/microsoft/onnxconverter-common.git@jenkins + pip install git+https://https://github.com/xadupre/sklearn-onnx.git@jenkins + pip install mlprodict matplotlib scikit-learn pandas threadpoolctl + pip install mlprodict lightgbm xgboost jinja2 + +.. contents:: + :local: + +Import +++++++ +""" +import os +import pickle +from pprint import pprint +import numpy +import pandas +import matplotlib.pyplot as plt +from xgboost import XGBClassifier +from lightgbm import LGBMClassifier +from onnxruntime import InferenceSession +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +from skl2onnx import to_onnx +from mlprodict.onnx_conv import register_converters +from mlprodict.onnxrt.validate.validate_helper import measure_time +from mlprodict.onnxrt import OnnxInference + +############################# +# Registers new converters for :epkg:`sklearn-onnx`. +register_converters() + +######################################### +# Problem +# +++++++ + +max_depth = 7 +n_classes = 20 +n_estimators = 500 +n_features = 100 +REPEAT = 3 +NUMBER = 1 +train, test = 1000, 10000 + +print('dataset') +X_, y_ = make_classification(n_samples=train + test, n_features=n_features, + n_classes=n_classes, n_informative=n_features - 3) +X_ = X_.astype(numpy.float32) +y_ = y_.astype(numpy.int64) +X_train, X_test = X_[:train], X_[train:] +y_train, y_test = y_[:train], y_[train:] + +compilation = [] + + +def train_cache(model, X_train, y_train, max_depth, n_estimators, n_classes): + name = "cache-{}-N{}-f{}-d{}-e{}-cl{}.pkl".format( + model.__class__.__name__, X_train.shape[0], X_train.shape[1], + max_depth, n_estimators, n_classes) + if os.path.exists(name): + with open(name, 'rb') as f: + return pickle.load(f) + else: + model.fit(X_train, y_train) + with open(name, 'wb') as f: + pickle.dump(model, f) + return model + + +######################################## +# RandomForestClassifier +# ++++++++++++++++++++++ + +rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) +print('train') +rf = train_cache(rf, X_train, y_train, max_depth, n_estimators, n_classes) + +res = measure_time(rf.predict_proba, X_test[:10], + repeat=REPEAT, number=NUMBER, + div_by_number=True, first_run=True) +res['model'], res['runtime'] = rf.__class__.__name__, 'INNER' +pprint(res) + +######################################## +# ONNX +# ++++ + + +def measure_onnx_runtime(model, xt, repeat=REPEAT, number=NUMBER, + verbose=True): + if verbose: + print(model.__class__.__name__) + + res = measure_time(model.predict_proba, xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'INNER' + res['N'] = X_test.shape[0] + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + onx = to_onnx(model, X_train[:1], options={id(model): {'zipmap': False}}) + + oinf = OnnxInference(onx) + res = measure_time(lambda x: oinf.run({'X': x}), xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'NPY/C++' + res['N'] = X_test.shape[0] + res['size'] = len(onx.SerializeToString()) + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + sess = InferenceSession(onx.SerializeToString()) + res = measure_time(lambda x: sess.run(None, {'X': x}), xt, + repeat=repeat, number=number, + div_by_number=True, first_run=True) + res['model'], res['runtime'] = model.__class__.__name__, 'ORT' + res['N'] = X_test.shape[0] + res['size'] = len(onx.SerializeToString()) + res["max_depth"] = max_depth + res["n_estimators"] = n_estimators + res["n_features"] = n_features + if verbose: + pprint(res) + yield res + + +compilation.extend(list(measure_onnx_runtime(rf, X_test))) + + +######################################## +# HistGradientBoostingClassifier +# ++++++++++++++++++++++++++++++ + +hist = HistGradientBoostingClassifier( + max_iter=n_estimators, max_depth=max_depth) +print('train') +hist = train_cache(hist, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(hist, X_test))) + +######################################## +# LightGBM +# ++++++++ + +lgb = LGBMClassifier(n_estimators=n_estimators, + max_depth=max_depth, pred_early_stop=False) +print('train') +lgb = train_cache(lgb, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(lgb, X_test))) + +######################################## +# XGBoost +# +++++++ + +xgb = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth) +print('train') +xgb = train_cache(xgb, X_train, y_train, max_depth, n_estimators, n_classes) + +compilation.extend(list(measure_onnx_runtime(xgb, X_test))) + +############################################## +# Summary +# +++++++ +# +# All data +name = 'plot_time_tree_ensemble' +df = pandas.DataFrame(compilation) +df.to_csv('%s.csv' % name, index=False) +df.to_excel('%s.xlsx' % name, index=False) +df + +######################################### +# Time per model and runtime. +piv = df.pivot("model", "runtime", "average") +piv + +########################################### +# Graphs. +ax = piv.T.plot(kind="bar") +ax.set_title("Computation time ratio for %d observations and %d features\n" + "lower is better for onnx runtimes" % X_test.shape) +plt.savefig('%s.png' % name) + +########################################### +# Available optimisation on this machine: + +from mlprodict.testing.experimental_c import code_optimisation +print(code_optimisation()) + +plt.show() diff --git a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py index 704860655..c57620407 100644 --- a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py +++ b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py @@ -7,7 +7,7 @@ from pandas import DataFrame, read_excel, read_csv, concat, Series from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ignore_warnings -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 from sklearn.ensemble import AdaBoostRegressor, HistGradientBoostingRegressor from sklearn.gaussian_process import GaussianProcessClassifier import sphinx diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst index 8ce2480a2..63a9e324f 100644 --- a/_doc/sphinxdoc/source/api/sklapi.rst +++ b/_doc/sphinxdoc/source/api/sklapi.rst @@ -9,14 +9,31 @@ pipeline. .. contents:: :local: +OnnxPipeline +++++++++++++ + +.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline + :members: + OnnxTransformer +++++++++++++++ .. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer :members: -OnnxPipeline -++++++++++++ +Speedup scikit-learn pipeline with ONNX ++++++++++++++++++++++++++++++++++++++++ -.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline +These classes wraps an existing pipeline from *scikit-learn* +and replaces the inference (*transform*, *predict*, *predict_proba*) +by another runtime built after the model was converted into ONNX. +See example :ref:`l-b-numpy-numba-ort` for further details. + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupClassifier + :members: + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupRegressor + :members: + +.. autosignature:: mlprodict.sklapi.onnx_speed_up.OnnxSpeedupTransformer :members: diff --git a/_doc/sphinxdoc/source/api/tools.rst b/_doc/sphinxdoc/source/api/tools.rst index 3222eab1f..277d8310e 100644 --- a/_doc/sphinxdoc/source/api/tools.rst +++ b/_doc/sphinxdoc/source/api/tools.rst @@ -50,8 +50,8 @@ Functions to help understand models or modify them. .. autosignature:: mlprodict.testing.script_testing.verify_script -Optimisation -++++++++++++ +Onnx Optimisation ++++++++++++++++++ The following functions reduce the number of ONNX operators in a graph while keeping the same results. The optimized graph @@ -138,3 +138,109 @@ Versions .. autosignature:: mlprodict.tools.asv_options_helper.get_ir_version_from_onnx .. autosignature:: mlprodict.tools.asv_options_helper.get_opset_number_from_onnx + +Type conversion +=============== + +.. autosignature:: mlprodict.onnx_conv.convert.guess_initial_types + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_numpy_type_from_string + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_numpy_type_from_dtype + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_proto_dtype + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_proto_dtype_name + +.. autosignature:: mlprodict.onnx_tools.onnx2py_helper.guess_dtype + +In :epkg:`sklearn-onnx`: + +* `skl2onnx.algebra.type_helper.guess_initial_types` +* `skl2onnx.common.data_types.guess_data_type` +* `skl2onnx.common.data_types.guess_numpy_type` +* `skl2onnx.common.data_types.guess_proto_type` +* `skl2onnx.common.data_types.guess_tensor_type` +* `skl2onnx.common.data_types._guess_type_proto` +* `skl2onnx.common.data_types._guess_numpy_type` + +The last example summarizes all the possibilities. + +.. runpython:: + :showcode: + :process: + + import numpy + from onnx import TensorProto + + from skl2onnx.algebra.type_helper import guess_initial_types + from skl2onnx.common.data_types import guess_data_type + from skl2onnx.common.data_types import guess_numpy_type + from skl2onnx.common.data_types import guess_proto_type + from skl2onnx.common.data_types import guess_tensor_type + from skl2onnx.common.data_types import _guess_type_proto + from skl2onnx.common.data_types import _guess_numpy_type + from skl2onnx.common.data_types import DoubleTensorType + + from mlprodict.onnx_conv.convert import guess_initial_types as guess_initial_types_mlprodict + from mlprodict.onnx_tools.onnx2py_helper import guess_numpy_type_from_string + from mlprodict.onnx_tools.onnx2py_helper import guess_numpy_type_from_dtype + from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype + from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype_name + from mlprodict.onnx_tools.onnx2py_helper import guess_dtype + + def guess_initial_types0(t): + return guess_initial_types(numpy.array([[0, 1]], dtype=t), None) + + def guess_initial_types1(t): + return guess_initial_types(None, [('X', t)]) + + def guess_initial_types_mlprodict0(t): + return guess_initial_types_mlprodict(numpy.array([[0, 1]], dtype=t), None) + + def guess_initial_types_mlprodict1(t): + return guess_initial_types_mlprodict(None, [('X', t)]) + + def _guess_type_proto1(t): + return _guess_type_proto(t, [None, 4]) + + def _guess_numpy_type1(t): + return _guess_numpy_type(t, [None, 4]) + + fcts = [guess_initial_types0, guess_initial_types1, + guess_data_type, guess_numpy_type, + guess_proto_type, guess_tensor_type, + _guess_type_proto1, + _guess_numpy_type1, + guess_initial_types_mlprodict0, + guess_initial_types_mlprodict1, + guess_numpy_type_from_string, + guess_numpy_type_from_dtype, + guess_proto_dtype_name, guess_dtype] + + values = [numpy.float64, float, 'double', 'tensor(double)', + DoubleTensorType([None, 4]), + TensorProto.DOUBLE] + + print("---SUCCESS------------") + errors = [] + for f in fcts: + print("") + for v in values: + try: + r = f(v) + print("%s(%r) -> %r" % (f.__name__, v, r)) + except Exception as e: + errors.append("%s(%r) -> %r" % (f.__name__, v, e)) + errors.append("") + + print() + print('---ERRORS-------------') + print() + for e in errors: + print(e) + +skl2onnx +======== + +.. autosignature:: mlprodict.onnx_tools.exports.skl2onnx_helper.add_onnx_graph diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py index a3e010876..0755113b8 100644 --- a/_doc/sphinxdoc/source/conf.py +++ b/_doc/sphinxdoc/source/conf.py @@ -3,7 +3,7 @@ import os import alabaster from pyquickhelper.helpgen.default_conf import set_sphinx_variables -from sklearn.experimental import enable_hist_gradient_boosting +from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 try: from mlprodict.onnx_conv import register_converters, register_rewritten_operators except ImportError as e: diff --git a/_unittests/ut_onnxrt/test_to_python.py b/_unittests/ut_onnxrt/test_onnx_inference_to_python.py similarity index 100% rename from _unittests/ut_onnxrt/test_to_python.py rename to _unittests/ut_onnxrt/test_onnx_inference_to_python.py diff --git a/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py b/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py index 325f4a809..61e6b2ee3 100644 --- a/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py +++ b/_unittests/ut_onnxrt/test_rt_valid_model_grid_search_cv.py @@ -2,7 +2,6 @@ @brief test log(time=9s) """ import unittest -from logging import getLogger from pyquickhelper.loghelper import fLOG from pyquickhelper.pycode import ExtTestCase from sklearn.exceptions import ConvergenceWarning @@ -19,8 +18,6 @@ class TestRtValidateGridSearchCV(ExtTestCase): @ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning)) def test_rt_grid_search_cv(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") - logger = getLogger('skl2onnx') - logger.disabled = True verbose = 1 if __name__ == "__main__" else 0 buffer = [] diff --git a/_unittests/ut_sklapi/test_onnx_speedup_classifier.py b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py new file mode 100644 index 000000000..1df6940d7 --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_classifier.py @@ -0,0 +1,225 @@ +""" +@brief test log(time=5s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +from numba import NumbaWarning +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import LogisticRegression +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedupClassifier +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedupClassifier(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + runtime="numba", nopython=False) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numba", nopython=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_classifier64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_classifier64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupClassifier( + LogisticRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_proba = spd.predict_proba(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_proba, got['probabilities']) + self.assertEqualArray(expected_label, got['label']) + + +if __name__ == '__main__': + # TestOnnxSpeedupClassifier().test_speedup_classifier64_numba_pickle() + unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_cluster.py b/_unittests/ut_sklapi/test_onnx_speedup_cluster.py new file mode 100644 index 000000000..413937787 --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_cluster.py @@ -0,0 +1,225 @@ +""" +@brief test log(time=5s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +from numba import NumbaWarning +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.cluster import KMeans +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedupCluster +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedupCluster(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + runtime="numba", nopython=False) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=4) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime="numba", nopython=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_kmeans64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + @ignore_warnings((ConvergenceWarning, NumbaWarning)) + def test_speedup_kmeans64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupCluster( + KMeans(n_clusters=3), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected_label = spd.predict(X) + expected_score = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X}) + self.assertEqualArray(expected_score, got['scores']) + self.assertEqualArray(expected_label, got['label']) + + +if __name__ == '__main__': + # TestOnnxSpeedupCluster().test_speedup_kmeans32() + unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_regressor.py b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py new file mode 100644 index 000000000..a8d922635 --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_regressor.py @@ -0,0 +1,252 @@ +""" +@brief test log(time=4s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +from numba import NumbaWarning +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import LinearRegression +from sklearn.datasets import load_iris, make_regression +from sklearn.gaussian_process import GaussianProcessRegressor +from pyquickhelper.pycode import ExtTestCase, ignore_warnings +from mlprodict.sklapi import OnnxSpeedupRegressor +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedupRegressor(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset()) + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_onnxruntime(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor32_numba(self): + data = load_iris() + X, y = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + runtime="numba") + spd.fit(X, y) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + spd.assert_almost_equal(X) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_op_version(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_numpy_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numpy") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_numba_pickle(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime="numba") + spd.fit(X, y) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.predict(X) + got = spd2.predict(X) + self.assertEqualArray(expected, got) + expected = spd.raw_predict(X) + got = spd2.raw_predict(X) + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X, y) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numpy(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numpy') + spd.fit(X, y) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numba(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba') + spd.fit(X, y) + # print(spd.numpy_code_) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings(ConvergenceWarning) + def test_speedup_regressor64_onnx_numba_python(self): + data = load_iris() + X, y = data.data, data.target + spd = OnnxSpeedupRegressor( + LinearRegression(), target_opset=self.opset(), + enforce_float32=False, runtime='numba', nopython=False) + spd.fit(X, y) + # print(spd.numpy_code_) + expected = spd.predict(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + @ignore_warnings((ConvergenceWarning, NumbaWarning, DeprecationWarning)) + def test_speedup_gaussian_regressor64_onnx_numpy_python(self): + X, y = make_regression( # pylint: disable=W0632 + n_features=2, n_samples=100, n_targets=1, random_state=42) + model = GaussianProcessRegressor( + alpha=1e-5, n_restarts_optimizer=25, normalize_y=True) + model.fit(X, y) + expected_t = model.predict(X) + onx = to_onnx(model, X[:1], target_opset=self.opset(), + options={'optim': 'cdist'}) + + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_t.squeeze(), got.squeeze()) + spd = OnnxSpeedupRegressor( + model, target_opset=self.opset(), + enforce_float32=False, runtime='numpy', nopython=False, + conv_options={'optim': 'cdist'}) + spd.fit(X, y) + expected_r = spd.raw_predict(X) + self.assertEqualArray(expected_t.squeeze(), expected_r.squeeze()) + + oinf = OnnxInference(spd.onnx_) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_r.squeeze(), got.squeeze()) + + onx = to_onnx(spd, X[:1]) + self.assertIn('CDist', str(onx)) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['GPmean'] + self.assertEqualArray(expected_r.squeeze(), got.squeeze()) + + expected = spd.predict(X) + self.assertEqualArray(expected_r.squeeze(), expected.squeeze()) + + +if __name__ == '__main__': + unittest.main() diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py new file mode 100644 index 000000000..bedb1581c --- /dev/null +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -0,0 +1,181 @@ +""" +@brief test log(time=4s) +""" +from io import BytesIO +import pickle +import unittest +from logging import getLogger +import numpy +# import pandas +# from sklearn.pipeline import make_pipeline +from sklearn.decomposition import PCA +from sklearn.datasets import load_iris +from pyquickhelper.pycode import ExtTestCase +from mlprodict.sklapi import OnnxSpeedupTransformer +from mlprodict.tools import get_opset_number_from_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference + + +class TestOnnxSpeedupTransformer(ExtTestCase): + + def setUp(self): + logger = getLogger('skl2onnx') + logger.disabled = True + + def opset(self): + return get_opset_number_from_onnx() + + def test_speedup_transform32(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset()) + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + + def test_speedup_transform32_onnxruntime(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer( + PCA(), target_opset=self.opset(), + runtime="onnxruntime1") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + + def test_speedup_transform32_numpy(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer( + PCA(), target_opset=self.opset(), + runtime="numpy") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + + def test_speedup_transform32_numba(self): + data = load_iris() + X, _ = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedupTransformer( + PCA(), target_opset=self.opset(), + runtime="numba") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + + def test_speedup_transform64(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + spd.assert_almost_equal(X) + + def test_speedup_transform64_op_version(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + opset = spd.op_version + self.assertGreater(self.opset(), opset['']) + + def test_speedup_transform64_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + + def test_speedup_transform64_numpy_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime="numpy") + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + + def test_speedup_transform64_numba_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime="numba") + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + + def test_speedup_transform64_onnx(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False) + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + def test_speedup_transform64_onnx_numpy(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime='numpy') + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + def test_speedup_transform64_onnx_numba(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedupTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime='numba') + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + + +if __name__ == '__main__': + unittest.main() diff --git a/_unittests/ut_tools/data/debug.onnx b/_unittests/ut_tools/data/debug.onnx new file mode 100644 index 000000000..04e69a909 Binary files /dev/null and b/_unittests/ut_tools/data/debug.onnx differ diff --git a/_unittests/ut_tools/test_export_onnx.py b/_unittests/ut_tools/test_export_onnx.py index b652ca018..0db8551ee 100644 --- a/_unittests/ut_tools/test_export_onnx.py +++ b/_unittests/ut_tools/test_export_onnx.py @@ -672,6 +672,9 @@ def test_export_onnx(self): self.assertEqualArray(y['y'], y1['y']) self.assertEqualArray(y['y'], y2['y']) + code2 = oinf.to_onnx_code() + self.assertEqual(new_onnx, code2) + def verify_tf(self, content): try: left, __ = verify_code(content, exc=False) @@ -1099,6 +1102,13 @@ def onnx_rfft_2d_any_test(x, fft_length): self.assertNotIn("numpy.", code) # print(code) + def test_sub_graph(self): + data = os.path.abspath(os.path.dirname(__file__)) + debug = os.path.join(data, "data", "debug.onnx") + self.assertRaise(lambda: export2onnx(debug), NotImplementedError) + # new_onnx = export2onnx(debug) + # _, loc = self.verify(new_onnx) + if __name__ == "__main__": # TestExportOnnx().test_simple_configuration() diff --git a/_unittests/ut_tools/test_onnx2py_helper.py b/_unittests/ut_tools/test_onnx2py_helper.py new file mode 100644 index 000000000..5de32f65e --- /dev/null +++ b/_unittests/ut_tools/test_onnx2py_helper.py @@ -0,0 +1,17 @@ +""" +@brief test log(time=2s) +""" +import unittest +from pyquickhelper.pycode import ExtTestCase +from mlprodict.onnx_tools.onnx2py_helper import to_skl2onnx_type + + +class TestOnnx2PyHelper(ExtTestCase): + + def test_to_skl2onnx_type(self): + r = to_skl2onnx_type('NA', 'double', (0, 15)) + self.assertEqual(repr(r), "('NA', DoubleTensorType(shape=[None, 15]))") + + +if __name__ == "__main__": + unittest.main() diff --git a/mlprodict/onnx_conv/convert.py b/mlprodict/onnx_conv/convert.py index ec7416231..9aa9ca40d 100644 --- a/mlprodict/onnx_conv/convert.py +++ b/mlprodict/onnx_conv/convert.py @@ -356,8 +356,8 @@ def to_onnx(model, X=None, name=None, initial_types=None, type(model))) return model.to_onnx( X=X, name=name, options=options, black_op=black_op, - white_op=white_op, final_types=final_types, - verbose=verbose) + white_op=white_op, final_types=final_types) + # verbose=verbose) if rewrite_ops: old_values, old_shapes = register_rewritten_operators() diff --git a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl index 4f178fbb3..1a25d24f4 100644 --- a/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl +++ b/mlprodict/onnx_tools/_onnx_export_templates_numpy.tmpl @@ -1,6 +1,10 @@ import numpy +import scipy.special as scipy_special +import scipy.spatial.distance as scipy_distance from mlprodict.onnx_tools.exports.numpy_helper import ( + argmax_use_numpy_select_last_index, argmin_use_numpy_select_last_index, + array_feature_extrator, make_slice) def numpy_{{name}}({{ inputs[0][0] }}{% for i in inputs[1:]: %}, {{ i[0] }}{% endfor %}): @@ -30,6 +34,6 @@ def numpy_{{name}}({{ inputs[0][0] }}{% for i in inputs[1:]: %}, {{ i[0] }}{% en # nodes {% for node in nodes: %} - {{ make_numpy_code(target_opset, **node) }}{% endfor %} + {{ make_numpy_code(target_opset, indent=" ", **node) }}{% endfor %} return {{ outputs[0][0] }}{% for o in outputs[1:]: %}, {{ o[0] }}{% endfor %} diff --git a/mlprodict/onnx_tools/exports/numpy_helper.py b/mlprodict/onnx_tools/exports/numpy_helper.py index 51df0f028..aa0df0e66 100644 --- a/mlprodict/onnx_tools/exports/numpy_helper.py +++ b/mlprodict/onnx_tools/exports/numpy_helper.py @@ -27,12 +27,31 @@ def make_slice(data, starts, ends, axes=None, steps=None): return data[slices] +def argmax_use_numpy_select_last_index( + data, axis=0, keepdims=True, select_last_index=False): + """ + Needed or operator `ArgMax`. + """ + if not select_last_index: + result = numpy.argmax(data, axis=axis) + if keepdims and len(result.shape) < len(data.shape): + result = numpy.expand_dims(result, axis) + return result.astype(numpy.int64) + + data = numpy.flip(data, axis) + result = numpy.argmax(data, axis=axis) + result = data.shape[axis] - result - 1 + if keepdims: + result = numpy.expand_dims(result, axis) + return result.astype(numpy.int64) + + def argmin_use_numpy_select_last_index( data, axis=0, keepdims=True, select_last_index=False): """ Needed or operator `ArgMin`. """ - if select_last_index: + if not select_last_index: result = numpy.argmin(data, axis=axis) if keepdims and len(result.shape) < len(data.shape): result = numpy.expand_dims(result, axis) @@ -44,3 +63,475 @@ def argmin_use_numpy_select_last_index( if keepdims: result = numpy.expand_dims(result, axis) return result.astype(numpy.int64) + + +def array_feature_extrator(data, indices): + """ + Implementation of operator *ArrayFeatureExtractor* + with :epkg:`numpy`. + """ + if len(indices.shape) == 2 and indices.shape[0] == 1: + index = indices.ravel().tolist() + add = len(index) + elif len(indices.shape) == 1: + index = indices.tolist() + add = len(index) + else: + add = 1 + for s in indices.shape: + add *= s + index = indices.ravel().tolist() + if len(data.shape) == 1: + new_shape = (1, add) + else: + new_shape = list(data.shape[:-1]) + [add] + tem = data[..., index] + res = tem.reshape(new_shape) + return res + + +class NumpyCode: + """ + Converts an ONNX operators into :epkg:`numpy` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following + :return: code as str + """ + + def __init__(self, opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + self.opset = opset + self.name = name + self.op_type = op_type + self.domain = domain + self.inputs = inputs + self.outputs = outputs + self.attributes = attributes + self.used = used + self.context = context + self.mark_inits = mark_inits + self.unused = unused + self.indent = indent + + def _make_sure_inputs(self, n, m=None): + if m is None: + m = n + if len(self.inputs) < n: + raise RuntimeError( # pragma: no cover + "Expecting at least %d inputs for operator %r not %r." % ( + n, self.op_type, self.inputs)) + if len(self.inputs) > m: + raise RuntimeError( # pragma: no cover + "Expecting at most %d inputs for operator %r not %r." % ( + m, self.op_type, self.inputs)) + + def _make_sure_opsets(self, mi, ma=None): + if mi is not None and self.opset < mi: + raise RuntimeError( # pragma: no cover + "Cannot convert operator type %d, opset %d < %d." % ( + self.op_type, self.opset, mi)) + if ma is not None and self.opset > ma: + raise RuntimeError( # pragma: no cover + "Cannot convert operator type %d, opset %d > %d." % ( + self.op_type, self.opset, mi)) + + def _getat(self, name, defval=None, format=None): + + def f(v): + if format is None: + return v + if format == 'listint' and isinstance(v, str): + return list( + map(int, v.strip('[]').replace(' ', '').split(','))) + if format == 'listfloat' and isinstance(v, str): + return list( + map(float, v.strip('[]').replace(' ', '').split(','))) + raise ValueError( + "Unable to convert %r with format=%r." % (v, format)) + + for n, val in self.attributes: + if name == n: + return f(val) + return defval + + def _simplify(self, name, kind): + value = None + if (self.used is not None and name in self.used and + len(self.used[name]) == 1 and self.context is not None): + inits = self.context['initializers_dict'] + if name in inits: + v = inits[name] + if v.dtype == numpy.int64 and v.size < 10: + value = v + if name not in self.mark_inits: + self.mark_inits[name] = [] + self.mark_inits[name].append(v) + + if kind == 'tuple': + if value is None: + return "tuple(%s)" % name + if value.size == 1: + return str(tuple(value)[0]) + return str(tuple(value)) + elif kind == 'list': + if value is None: + return name + if len(value.shape) == 0: + return str(value) + return str(list(value)) + raise NotImplementedError( + "Unknown scenario to simplify (%r)." % kind) + + @staticmethod + def _make_tuple(val): + if isinstance(val, tuple): + return val + if isinstance(val, list): + return tuple(val) + if isinstance(val, int): + return val + if isinstance(val, str): + return tuple(map(int, val.strip('()[]').replace(" ", "").split(","))) + raise NotImplementedError( + "Unable to convert %r into tuple." % val) + + def make_numpy_code(self): + """ + Main method, returns the python code for a given + operator. + """ + if self.domain == '': + return self._make_numpy_code_onnx() + + if self.domain == 'ai.onnx.ml': + return self._make_numpy_code_onnxml() + + if self.domain == 'com.microsoft': + return self._make_numpy_code_others() + + raise NotImplementedError( + "Unable to convert any operator from domain %r." % self.domain) + + def _make_numpy_code_onnx(self): + + binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', + Pow='**') + unary_ops = dict(Neg='-') + unary_ops_ = dict(Sqrt='** 0.5') + + outs = ", ".join(self.outputs) + + if self.op_type in binary_ops: + self._make_sure_inputs(2) + return "%s = %s %s %s" % ( + outs, self.inputs[0], binary_ops[self.op_type], + self.inputs[1]) + + if self.op_type in unary_ops: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, unary_ops[self.op_type], self.inputs[0]) + + if self.op_type in unary_ops_: + self._make_sure_inputs(1) + return "%s = %s %s" % ( + outs, self.inputs[0], unary_ops_[self.op_type]) + + if self.op_type == 'ArgMax': + self._make_sure_opsets(12) + self._make_sure_inputs(1) + axis = self._getat('axis', 0) + keepdims = self._getat('keepdims', 1) + select_last_index = self._getat('keepdims', 0) + if select_last_index: + return ( + "%s = argmax_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + if keepdims: + return "%s = numpy.expand_dims(numpy.argmax(%s, axis=%s), -1)" % ( + outs, self.inputs[0], axis) + return "%s = numpy.argmax(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + + if self.op_type == 'ArgMin': + self._make_sure_opsets(12) + self._make_sure_inputs(1) + axis = self._getat('axis', 0) + keepdims = self._getat('keepdims', 1) + select_last_index = self._getat('keepdims', 0) + if select_last_index: + return ( + "%s = argmin_use_numpy_select_last_index(" + "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( + outs, self.inputs[0], axis, keepdims, select_last_index)) + if keepdims: + return "%s = numpy.expand_dims(numpy.argmin(%s, axis=%s), -1)" % ( + outs, self.inputs[0], axis) + return "%s = numpy.argmin(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + + if self.op_type == 'Cast': + from ..onnx2py_helper import _elem_type_as_str + self._make_sure_inputs(1) + to = int(self._getat('to', 1)) + dtype = _elem_type_as_str(to) + dtype = {'double': 'float64', 'float': 'float32'}.get(dtype, dtype) + return "%s = %s.astype(numpy.%s)" % (outs, self.inputs[0], dtype) + + if self.op_type == 'Concat': + axis = self._getat('axis', 0) + return "%s = numpy.concatenate([%s], %s)" % ( + outs, ", ".join(self.inputs), axis) + + if self.op_type == 'ConstantOfShape': + self._make_sure_opsets(9) + self._make_sure_inputs(1) + value = self._getat('value', 0, format='listfloat') + shape = self._simplify(self.inputs[0], kind='tuple') + return "%s = numpy.full(%s, %s)" % ( + outs, shape, value) + + if self.op_type == 'Exp': + return "%s = numpy.exp(%s)" % (outs, self.inputs[0]) + + if self.op_type == 'Max': + return "%s = numpy.maximum(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Gather': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + axis = self._getat('axis', 0) + return "%s = numpy.take(%s, %s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'list'), axis) + + if self.op_type == 'Gemm': + self._make_sure_inputs(2, 3) + alpha = self._getat('alpha', 0.) + transA = self._getat('transA', 0) + transB = self._getat('transB', 0) + ta = ".T" if transA in ('1', 1, True) else "" + tb = ".T" if transB in ('1', 1, True) else "" + if len(self.inputs) == 2: + return "%s = %s%s @ %s%s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha) + beta = self._getat('beta', 0.) + return "%s = %s%s @ %s%s * %s + %s * %s" % ( + outs, self.inputs[0], ta, self.inputs[1], tb, alpha, + self.inputs[2], beta) + + if self.op_type == 'Identity': + return "%s = %s" % (outs, self.inputs[0]) + + if self.op_type == 'ReduceProd': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'ReduceSum': + self._make_sure_opsets(11) + self._make_sure_inputs(2) + keepdims = self._getat('keepdims', 0) + return "%s = %s.sum(axis=%s, keepdims=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'), + keepdims) + + if self.op_type == 'ReduceSumSquare': + self._make_sure_inputs(1) + axes = self._getat('axes', "[0]") + keepdims = self._getat('keepdims', 0) + return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( + outs, self.inputs[0], axes, keepdims) + + if self.op_type == 'Reshape': + self._make_sure_inputs(2) + simp = self._simplify(self.inputs[1], 'tuple') + return "%s = %s.reshape(%s)" % ( + outs, self.inputs[0], simp) + + if self.op_type == 'Shape': + self._make_sure_inputs(1) + return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % ( + outs, self.inputs[0]) + + if self.op_type == 'Slice': + return "%s = make_slice(%s)" % (outs, ", ".join(self.inputs)) + + if self.op_type == 'Softmax': + self._make_sure_inputs(1) + axis = self._getat('axis', -1) + return "%s = scipy_special.softmax(%s, axis=%s)" % ( + outs, self.inputs[0], axis) + + if self.op_type == 'Squeeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.squeeze(%s, axis=%s)" % ( + outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple')) + + if self.op_type == 'Transpose': + self._make_sure_inputs(1) + perm = self._getat('perm', None) + return "%s = numpy.transpose(%s, axes=%s)" % ( + outs, self.inputs[0], self._make_tuple(perm)) + + if self.op_type == 'Unsqueeze': + self._make_sure_opsets(13) + self._make_sure_inputs(2) + return "%s = numpy.expand_dims(%s, axis=%s)" % ( + outs, self.inputs[0], + self._simplify(self.inputs[1], 'tuple')) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r name=%r." % ( + self.op_type, self.name)) + + def _make_numpy_code_onnxml(self): + outs = ", ".join(self.outputs) + + if self.op_type == 'ArrayFeatureExtractor': + self._make_sure_inputs(2) + return "%s = array_feature_extrator(%s, %s)" % ( + outs, self.inputs[0], self.inputs[1]) + + if self.op_type == 'LinearClassifier': + multi_class = self._getat('targets', 0) + if multi_class != 0: + raise NotImplementedError( + "Conversion of operator %r with multi_class=%r " + "is not implemented." % (self.op_type, multi_class)) + self._make_sure_inputs(1) + coefficients = self._getat('coefficients', None) + intercepts = self._getat('intercepts', None) + post_transform = self._getat( + 'post_transform', 'NONE').strip('"\'b') + classlabels_strings = self._getat('classlabels_strings', None) + if classlabels_strings is not None: + raise NotImplementedError( + "Conversion of operator %r with classlabels_strings=%r " + "is not implemented." % (self.op_type, classlabels_strings)) + classlabels_ints = self._getat( + 'classlabels_ints', None, format="listint") + if classlabels_ints != list(range(len(classlabels_ints))): + raise NotImplementedError( + "Conversion of operator %r with classlabels_ints=%r!=%r " + "is not implemented." % ( + self.op_type, classlabels_ints, + list(range(len(classlabels_ints))))) + targets = len(classlabels_ints) + rows = [ + "coefs = numpy.array(%s, dtype=numpy.float32)." + "reshape((%d, -1)).T" % (coefficients, targets), + "%sinter = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (self.indent, intercepts, targets)] + + if post_transform == "SOFTMAX": + rows.append( + "%s%s = scipy_special.softmax" + "(%s @ coefs + inter, axis=1)" % ( + self.indent, self.outputs[1], self.inputs[0])) + elif post_transform == 'NONE': + rows.append( + "%s%s = %s @ coefs + inter" % ( + self.indent, self.outputs[1], self.inputs[0])) + elif post_transform != "NONE": + raise NotImplementedError( + "Conversion of operator %r with post_transform=%r " + "is not implemented." % (self.op_type, post_transform)) + rows.append("%s%s = numpy.argmax(%s, axis=1)" % ( + self.indent, self.outputs[0], self.outputs[1])) + return "\n".join(rows) + + if self.op_type == 'LinearRegressor': + self._make_sure_inputs(1) + coefficients = self._getat('coefficients', None) + intercepts = self._getat('intercepts', None) + post_transform = self._getat( + 'post_transform', 'NONE').strip('"\'b') + targets = self._getat('targets', 1) + if post_transform != "NONE": + raise NotImplementedError( + "Conversion of operator %r with post_transform=%r " + "is not implemented." % (self.op_type, post_transform)) + rows = [ + "coefs = numpy.array(%s, dtype=numpy.float32)." + "reshape((%d, -1)).T" % (coefficients, targets), + "%sinter = numpy.array(%s, dtype=numpy.float32)." + "reshape((-1, %d))" % (self.indent, intercepts, targets), + "%s%s = %s @ coefs + inter" % ( + self.indent, outs, self.inputs[0])] + return "\n".join(rows) + + if self.op_type == 'Normalizer': + self._make_sure_inputs(1) + post_transform = self._getat('norm', 'MAX').strip('"\'b') + if post_transform == 'L2': + return "%s = %s / (%s ** 2).sum(axis=1) ** 0.5" % ( + outs, self.inputs[0], self.inputs[0]) + if post_transform == 'L1': + post_transform = 'sum' + return "%s = %s / %s.%s(axis=1, keepdims=1)" % ( + outs, self.inputs[0], self.inputs[0], post_transform.lower()) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r name=%r (onnxml)." % ( + self.op_type, self.name)) + + def _make_numpy_code_others(self): + outs = ", ".join(self.outputs) + + if self.op_type == 'CDist': + self._make_sure_inputs(2) + metric = self._getat('metric', 'euclidean').strip("'b") + return "%s = scipy_distance.cdist(%s, %s, metric=%r)" % ( + outs, self.inputs[0], self.inputs[1], metric) + + raise NotImplementedError( # pragma: no cover + "Unable to convert operator type %r (domain=%r) " + "name=%r (onnxml)." % ( + self.op_type, self.domain, self.name)) + + +def make_numpy_code(opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, + indent="", **unused): + """ + Converts an ONNX operators into :epkg:`numpy` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: indentation of the second line and following + :return: code as str + """ + cl = NumpyCode( + opset=opset, name=name, op_type=op_type, domain=domain, + inputs=inputs, outputs=outputs, attributes=attributes, + used=used, context=context, mark_inits=mark_inits, + indent=indent, **unused) + return cl.make_numpy_code() diff --git a/mlprodict/onnx_tools/exports/skl2onnx_helper.py b/mlprodict/onnx_tools/exports/skl2onnx_helper.py new file mode 100644 index 000000000..41aafde1c --- /dev/null +++ b/mlprodict/onnx_tools/exports/skl2onnx_helper.py @@ -0,0 +1,89 @@ +""" +@file +@brief Helpers to run examples created with :epkg:`sklearn-onnx`. +""" +from onnx import helper, TensorProto + + +def _copy_inout(inout, scope, new_name): + shape = [s.dim_value for s in inout.type.tensor_type.shape.dim] + value_info = helper.make_tensor_value_info( + new_name, inout.type.tensor_type.elem_type, shape) + return value_info + + +def _clean_variable_name(name, scope): + return scope.get_unique_variable_name(name) + + +def _clean_operator_name(name, scope): + return scope.get_unique_operator_name(name) + + +def _clean_initializer_name(name, scope): + return scope.get_unique_variable_name(name) + + +def add_onnx_graph(scope, operator, container, onx): + """ + Adds a whole ONNX graph to an existing one following + :epkg:`skl2onnx` API assuming this ONNX graph implements + an `operator `_. + + :param scope: scope (to get unique names) + :param operator: operator + :param container: container + :param onx: ONNX graph + """ + graph = onx.graph + name_mapping = {} + node_mapping = {} + for node in graph.node: + name = node.name + if name is not None: + node_mapping[node.name] = _clean_initializer_name( + node.name, scope) + for o in node.input: + name_mapping[o] = _clean_variable_name(o, scope) + for o in node.output: + name_mapping[o] = _clean_variable_name(o, scope) + for o in graph.initializer: + name_mapping[o.name] = _clean_operator_name(o.name, scope) + + inputs = [_copy_inout(o, scope, name_mapping[o.name]) + for o in graph.input] + outputs = [_copy_inout(o, scope, name_mapping[o.name]) + for o in graph.output] + + for inp, to in zip(operator.inputs, inputs): + n = helper.make_node('Identity', [inp.onnx_name], [to.name], + name=_clean_operator_name('Identity', scope)) + container.nodes.append(n) + + for inp, to in zip(outputs, operator.outputs): + n = helper.make_node('Identity', [inp.name], [to.onnx_name], + name=_clean_operator_name('Identity', scope)) + container.nodes.append(n) + + for node in graph.node: + n = helper.make_node( + node.op_type, + [name_mapping[o] for o in node.input], + [name_mapping[o] for o in node.output], + name=node_mapping[node.name] if node.name else None, + domain=node.domain if node.domain else None) + n.attribute.extend(node.attribute) # pylint: disable=E1101 + container.nodes.append(n) + + for o in graph.initializer: + as_str = o.SerializeToString() + tensor = TensorProto() + tensor.ParseFromString(as_str) + tensor.name = name_mapping[o.name] + container.initializers.append(tensor) + + # opset + for oimp in onx.opset_import: + container.node_domain_version_pair_sets.add( + (oimp.domain, oimp.version)) diff --git a/mlprodict/onnx_tools/exports/tf2onnx_helper.py b/mlprodict/onnx_tools/exports/tf2onnx_helper.py index 1a4b272d0..9d912c816 100644 --- a/mlprodict/onnx_tools/exports/tf2onnx_helper.py +++ b/mlprodict/onnx_tools/exports/tf2onnx_helper.py @@ -17,6 +17,86 @@ _make_name_id = 0 +def make_tf2onnx_code(opset, name=None, op_type=None, domain='', + inputs=None, outputs=None, attributes=None, + used=None, context=None, mark_inits=None, indent=8, + **unused): + """ + Converts an ONNX operators into :epkg:`tf2onnx` code. + + :param opset: target opset for the conversion (usually unused) + :param name: node name + :param op_type: operator type + :param domain: domain + :param inputs: inputs + :param outputs: outputs + :param attributes: attributes + :param used: dictionary `{k: v}`, + list of nodes taking *k* as input + :param context: whole context + :param mark_inits: marks initializer as replaced + :param indent: number of spaces to add on the second + and following rows + :return: code as str + """ + def simplify(name, kind, force=True): + value = None + if (used is not None and name in used and + len(used[name]) == 1 and context is not None): + inits = context['initializers_dict'] + if name in inits: + v = inits[name] + if v.dtype == numpy.int64 and v.size < 10: + value = v + if name not in mark_inits: + mark_inits[name] = [] + mark_inits[name].append(v) + + if value is None and force: + inits = context['initializers_dict'] + value = inits[name] + if kind == 'list': + if value is None: + return name + if len(value.shape) == 0: + return str(value) + return str(list(value)) + raise NotImplementedError( + "Unknown scenario to simplify (%r)." % kind) + + rows = [] + if op_type == 'Unsqueeze': + if len(inputs) == 2: + rows.append( + "node = GraphBuilder(ctx).make_unsqueeze(" + "{'data': varx[%r], 'axes': %s}, return_node=True)" + "" % (inputs[0], simplify(inputs[1], 'list'))) + else: + raise NotImplementedError( # pragma: no cover + "Unable to create code for operator %r (opset <= 12)" + "." % op_type) + else: + if len(attributes) > 0: + attributes_str = ", ".join("%s=%s" % (k, v) for k, v in attributes) + attr = ", attr=dict(%s)" % attributes_str + else: + attr = "" + rows.append( + "inputs = [%s]" % ", ".join("varx[%r]" % n for n in inputs)) + sdomain = '' if domain == '' else ("domain=%r, " % domain) + rows.append( + "node = ctx.make_node(%r, inputs=inputs%s, %s" + "name=make_name(%r))" % ( + op_type, attr, sdomain, name)) + for i, n in enumerate(outputs): + rows.append("varx[%r] = node.output[%d]" % (n, i)) + if indent > 0: + sind = " " * indent + for i in range(1, len(rows)): + rows[i] = sind + rows[i] + return "\n".join(rows) + + def make_name(name): "Creates a unique name." global _make_name_id # pylint: disable=W0603 @@ -446,13 +526,15 @@ def make_slice(self, kwargs, name=None, shapes=None, dtypes=None, return_node=Fa make_sure(dtype == self.graph.get_dtype( input_data), "dtype should be same") - node = self.graph.make_node(op_type="Slice", inputs=inputs, attr=attr, name=name, - outputs=outputs, shapes=shapes, dtypes=dtypes) + node = self.graph.make_node(op_type="Slice", inputs=inputs, attr=attr, + name=name, outputs=outputs, shapes=shapes, + dtypes=dtypes) if return_node: return node raise NotImplementedError("return_node must be True") - def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node=False, op_name_scope=None): + def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, + return_node=False, op_name_scope=None): """ Squeeze changes its schema at opset 13: it treats axes as a dynamic input kwargs: key could be ["data", "axes"]. @@ -487,13 +569,15 @@ def make_squeeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node= while inputs[-1] == "": inputs = inputs[:-1] - node = self.graph.make_node(op_type="Squeeze", inputs=inputs, attr=attr, name=name, - outputs=outputs) + node = self.graph.make_node( + op_type="Squeeze", inputs=inputs, attr=attr, name=name, + outputs=outputs) if return_node: return node raise NotImplementedError("return_node must be True") - def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, return_node=False, op_name_scope=None): + def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, + return_node=False, op_name_scope=None): """ Unsqueeze changes its schema at opset 13: it treats axes as a dynamic input kwargs: key could be ["data", "axes"]. @@ -528,8 +612,9 @@ def make_unsqueeze(self, kwargs, name=None, shapes=None, dtypes=None, return_nod while inputs[-1] == "": inputs = inputs[:-1] - node = self.graph.make_node(op_type="Unsqueeze", inputs=inputs, attr=attr, name=name, - outputs=outputs) + node = self.graph.make_node( + op_type="Unsqueeze", inputs=inputs, attr=attr, name=name, + outputs=outputs) if return_node: return node raise NotImplementedError("return_node must be True") diff --git a/mlprodict/onnx_tools/onnx2py_helper.py b/mlprodict/onnx_tools/onnx2py_helper.py index c6576c866..e4d3164bb 100644 --- a/mlprodict/onnx_tools/onnx2py_helper.py +++ b/mlprodict/onnx_tools/onnx2py_helper.py @@ -9,6 +9,7 @@ from scipy.sparse import coo_matrix from onnx import onnx_pb as onnx_proto, TensorProto from onnx.numpy_helper import to_array, from_array +from skl2onnx.common.data_types import _guess_numpy_type def to_bytes(val): @@ -158,7 +159,7 @@ def guess_numpy_type_from_dtype(dt): if dt == numpy.dtype('float32'): return numpy.float32 if dt == numpy.dtype('float64'): - return numpy.floa64 + return numpy.float64 if dt == numpy.dtype('int64'): return numpy.int64 if dt == numpy.dtype('int8'): @@ -380,6 +381,19 @@ def _var_as_dict(var): "Unable to guess which object it is.\n{}\n---".format(var)) +def onnx_model_opsets(onnx_model): + """ + Extracts opsets in a dictionary. + + :param onnx_model: ONNX graph + :return: dictionary `{domain: version}` + """ + res = {} + for oimp in onnx_model.opset_import: + res[oimp.domain] = oimp.version + return res + + def _type_to_string(dtype): """ Converts a type into a readable string. @@ -552,3 +566,18 @@ def guess_dtype(proto_type): raise ValueError( "Unable to convert proto_type {} to numpy type.".format( proto_type)) + + +def to_skl2onnx_type(name, elem_type, shape): + """ + Converts *name*, *elem_type*, *shape* into a + :epkg:`sklearn-onnx` type. + + :param name: string + :param elem_type: tensor of elements of this type + :param shape: expected shape + :return: data type + """ + elem = guess_numpy_type_from_string(elem_type) + shape = list(None if d == 0 else d for d in shape) + return (name, _guess_numpy_type(elem, shape)) diff --git a/mlprodict/onnx_tools/onnx_export.py b/mlprodict/onnx_tools/onnx_export.py index 5afd6729c..2e44de6cb 100644 --- a/mlprodict/onnx_tools/onnx_export.py +++ b/mlprodict/onnx_tools/onnx_export.py @@ -14,294 +14,8 @@ _var_as_dict, guess_proto_dtype, guess_proto_dtype_name) from .onnx_export_templates import ( get_onnx_template, get_tf2onnx_template, get_numpy_template) - - -def make_tf2onnx_code(opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, indent=8, - **unused): - """ - Converts an ONNX operators into :epkg:`tf2onnx` code. - - :param opset: target opset for the conversion (usually unused) - :param name: node name - :param op_type: operator type - :param domain: domain - :param inputs: inputs - :param outputs: outputs - :param attributes: attributes - :param used: dictionary `{k: v}`, - list of nodes taking *k* as input - :param context: whole context - :param mark_inits: marks initializer as replaced - :param indent: number of spaces to add on the second - and following rows - :return: code as str - """ - def simplify(name, kind, force=True): - value = None - if (used is not None and name in used and - len(used[name]) == 1 and context is not None): - inits = context['initializers_dict'] - if name in inits: - v = inits[name] - if v.dtype == numpy.int64 and v.size < 10: - value = v - if name not in mark_inits: - mark_inits[name] = [] - mark_inits[name].append(v) - - if value is None and force: - inits = context['initializers_dict'] - value = inits[name] - if kind == 'list': - if value is None: - return name - if len(value.shape) == 0: - return str(value) - return str(list(value)) - raise NotImplementedError( - "Unknown scenario to simplify (%r)." % kind) - - rows = [] - if op_type == 'Unsqueeze': - if len(inputs) == 2: - rows.append( - "node = GraphBuilder(ctx).make_unsqueeze(" - "{'data': varx[%r], 'axes': %s}, return_node=True)" - "" % (inputs[0], simplify(inputs[1], 'list'))) - else: - raise NotImplementedError( # pragma: no cover - "Unable to create code for operator %r (opset <= 12)" - "." % op_type) - else: - if len(attributes) > 0: - attributes_str = ", ".join("%s=%s" % (k, v) for k, v in attributes) - attr = ", attr=dict(%s)" % attributes_str - else: - attr = "" - rows.append( - "inputs = [%s]" % ", ".join("varx[%r]" % n for n in inputs)) - sdomain = '' if domain == '' else ("domain=%r, " % domain) - rows.append( - "node = ctx.make_node(%r, inputs=inputs%s, %s" - "name=make_name(%r))" % ( - op_type, attr, sdomain, name)) - for i, n in enumerate(outputs): - rows.append("varx[%r] = node.output[%d]" % (n, i)) - if indent > 0: - sind = " " * indent - for i in range(1, len(rows)): - rows[i] = sind + rows[i] - return "\n".join(rows) - - -def make_numpy_code(opset, name=None, op_type=None, domain='', - inputs=None, outputs=None, attributes=None, - used=None, context=None, mark_inits=None, - **unused): - """ - Converts an ONNX operators into :epkg:`numpy` code. - - :param opset: target opset for the conversion (usually unused) - :param name: node name - :param op_type: operator type - :param domain: domain - :param inputs: inputs - :param outputs: outputs - :param attributes: attributes - :param used: dictionary `{k: v}`, - list of nodes taking *k* as input - :param context: whole context - :param mark_inits: marks initializer as replaced - :return: code as str - """ - def make_sure_inputs(n, m=None): - if m is None: - m = n - if len(inputs) < n: - raise RuntimeError( # pragma: no cover - "Expecting at least %d inputs for operator %r not %r." % ( - n, op_type, inputs)) - if len(inputs) > m: - raise RuntimeError( # pragma: no cover - "Expecting at most %d inputs for operator %r not %r." % ( - m, op_type, inputs)) - - def make_sure_opsets(mi, ma=None): - if mi is not None and opset < mi: - raise RuntimeError( # pragma: no cover - "Cannot convert operator type %d, opset %d < %d." % ( - op_type, opset, mi)) - if ma is not None and opset > ma: - raise RuntimeError( # pragma: no cover - "Cannot convert operator type %d, opset %d > %d." % ( - op_type, opset, mi)) - - def getat(name, defval=None): - for n, val in attributes: - if name == n: - return val - return defval - - def simplify(name, kind): - value = None - if (used is not None and name in used and - len(used[name]) == 1 and context is not None): - inits = context['initializers_dict'] - if name in inits: - v = inits[name] - if v.dtype == numpy.int64 and v.size < 10: - value = v - if name not in mark_inits: - mark_inits[name] = [] - mark_inits[name].append(v) - - if kind == 'tuple': - if value is None: - return "tuple(%s)" % name - if value.size == 1: - return str(tuple(value)[0]) - return str(tuple(value)) - elif kind == 'list': - if value is None: - return name - if len(value.shape) == 0: - return str(value) - return str(list(value)) - raise NotImplementedError( - "Unknown scenario to simplify (%r)." % kind) - - def make_tuple(val): - if isinstance(val, tuple): - return val - if isinstance(val, list): - return tuple(val) - if isinstance(val, int): - return val - if isinstance(val, str): - return tuple(map(int, val.strip('()[]').replace(" ", "").split(","))) - raise NotImplementedError( - "Unable to convert %r into tuple." % val) - - if domain != '': - raise NotImplementedError( - "Unable to convert any operator from domain %r." % domain) - - binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', - Pow='**') - unary_ops = dict(Neg='-') - unary_ops_ = dict(Sqrt='** 0.5') - - outs = ", ".join(outputs) - - if op_type in binary_ops: - make_sure_inputs(2) - return "%s = %s %s %s" % (outs, inputs[0], binary_ops[op_type], inputs[1]) - - if op_type in unary_ops: - make_sure_inputs(1) - return "%s = %s %s" % (outs, unary_ops[op_type], inputs[0]) - - if op_type in unary_ops_: - make_sure_inputs(1) - return "%s = %s %s" % (outs, inputs[0], unary_ops_[op_type]) - - if op_type == 'ArgMin': - make_sure_opsets(12) - make_sure_inputs(1) - axis = getat('axis', 0) - keepdims = getat('keepdims', 1) - select_last_index = getat('keepdims', 0) - return ( - "%s = argmin_use_numpy_select_last_index(" - "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( - outs, inputs[0], axis, keepdims, select_last_index)) - - if op_type == 'Concat': - axis = getat('axis', 0) - return "%s = numpy.concatenate([%s], %s)" % (outs, ", ".join(inputs), axis) - - if op_type == 'Max': - return "%s = numpy.maximum(%s)" % (outs, ", ".join(inputs)) - - if op_type == 'Gather': - make_sure_opsets(11) - make_sure_inputs(2) - axis = getat('axis', 0) - return "%s = numpy.take(%s, %s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'list'), axis) - - if op_type == 'Gemm': - make_sure_inputs(2, 3) - alpha = getat('alpha', 0.) - transA = getat('transA', 0) - transB = getat('transB', 0) - ta = ".T" if transA in ('1', 1, True) else "" - tb = ".T" if transB in ('1', 1, True) else "" - if len(inputs) == 2: - return "%s = %s%s @ %s%s * %s" % ( - outs, inputs[0], ta, inputs[1], tb, alpha) - beta = getat('beta', 0.) - return "%s = %s%s @ %s%s * %s + %s * %s" % ( - outs, inputs[0], ta, inputs[1], tb, alpha, inputs[2], beta) - - if op_type == 'Identity': - return "%s = %s" % (outs, inputs[0]) - - if op_type == 'ReduceProd': - make_sure_inputs(1) - axes = getat('axes', "[0]") - keepdims = getat('keepdims', 0) - return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( - outs, inputs[0], axes, keepdims) - - if op_type == 'ReduceSum': - make_sure_opsets(11) - make_sure_inputs(2) - keepdims = getat('keepdims', 0) - return "%s = %s.sum(axis=%s, keepdims=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple'), keepdims) - - if op_type == 'ReduceSumSquare': - make_sure_inputs(1) - axes = getat('axes', "[0]") - keepdims = getat('keepdims', 0) - return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( - outs, inputs[0], axes, keepdims) - - if op_type == 'Reshape': - make_sure_inputs(2) - return "%s = %s.reshape(%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - - if op_type == 'Shape': - make_sure_inputs(1) - return "%s = numpy.array(%s.shape, dtype=numpy.int64)" % (outs, inputs[0]) - - if op_type == 'Slice': - return "%s = make_slice(%s)" % (outs, ", ".join(inputs)) - - if op_type == 'Squeeze': - make_sure_opsets(13) - make_sure_inputs(2) - return "%s = numpy.squeeze(%s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - - if op_type == 'Transpose': - make_sure_inputs(1) - perm = getat('perm', None) - return "%s = numpy.transpose(%s, axes=%s)" % ( - outs, inputs[0], make_tuple(perm)) - - if op_type == 'Unsqueeze': - make_sure_opsets(13) - make_sure_inputs(2) - return "%s = numpy.expand_dims(%s, axis=%s)" % ( - outs, inputs[0], simplify(inputs[1], 'tuple')) - - raise NotImplementedError( - "Unable to convert operator type %r name=%r." % (op_type, name)) +from .exports.numpy_helper import make_numpy_code +from .exports.tf2onnx_helper import make_tf2onnx_code def export_template(model_onnx, templates, opset=None, verbose=True, name=None, @@ -406,7 +120,8 @@ def rename_name(name): # node nodes = [] for node in model_onnx.graph.node: - for i in node.input: + for i_raw_name in node.input: + i = rename_name(i_raw_name) if i not in used: used[i] = [] used[i].append(node) @@ -414,6 +129,10 @@ def rename_name(name): for at in node.attribute: temp = _var_as_dict(at) value = temp['value'] + if node.op_type in {'Scan', 'Loop', 'If'}: + raise NotImplementedError( + "Subgraph are not yet implemented (operator=%r)." + "" % node.op_type) if use_onnx_tensor: if node.op_type == 'Cast' and at.name == 'to': attributes.append( @@ -447,6 +166,7 @@ def rename_name(name): # graph context['name'] = name or model_onnx.graph.name + context['name'] = context['name'].replace("(", "_").replace(")", "") context['ir_version'] = model_onnx.ir_version context['producer_name'] = model_onnx.producer_name context['domain'] = model_onnx.domain @@ -456,7 +176,7 @@ def rename_name(name): context['skip_inits'] = {} mark_inits = {} - # final + # First rendering to detect any unused or replaced initializer. template = Template(templates) final = template.render( enumerate=enumerate, sorted=sorted, len=len, @@ -475,6 +195,8 @@ def rename_name(name): skip_inits.add(k) if len(skip_inits) > 0: + # Second rendering if needed when an initializer was replaced + # or removed. context['skip_inits'] = skip_inits # Again with skip_inits. final = template.render( @@ -513,6 +235,7 @@ def export2onnx(model_onnx, opset=None, verbose=True, name=None, rename=False, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -554,6 +277,7 @@ def export2tf2onnx(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -597,6 +321,7 @@ def export2numpy(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from sklearn.cluster import KMeans @@ -617,6 +342,7 @@ def export2numpy(model_onnx, opset=None, verbose=True, name=None, .. runpython:: :showcode: + :process: import numpy from mlprodict.testing.einsum import decompose_einsum_equation diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py index 763e3a430..47e3a16d2 100644 --- a/mlprodict/onnxrt/onnx_inference.py +++ b/mlprodict/onnxrt/onnx_inference.py @@ -221,6 +221,7 @@ def _init(self): self.to_dot = self.exporters_.to_dot self.to_python = self.exporters_.to_python self.to_text = self.exporters_.to_text + self.to_onnx_code = self.exporters_.to_onnx_code if self.runtime in ('python_compiled', 'python_compiled_debug'): # switch the inference method to the compiled one diff --git a/mlprodict/onnxrt/onnx_inference_exports.py b/mlprodict/onnxrt/onnx_inference_exports.py index 91ead76b5..76ebe7173 100644 --- a/mlprodict/onnxrt/onnx_inference_exports.py +++ b/mlprodict/onnxrt/onnx_inference_exports.py @@ -11,6 +11,7 @@ from onnx import numpy_helper from ..onnx_tools.onnx2py_helper import _var_as_dict, _type_to_string from ..tools.graphs import onnx2bigraph +from ..onnx_tools.onnx_export import export2onnx class OnnxInferenceExport: @@ -599,3 +600,12 @@ def to_text(self, recursive=False, grid=5, distance=5): bigraph = onnx2bigraph(self.oinf.obj, recursive=recursive) graph = bigraph.display_structure(grid=grid, distance=distance) return graph.to_text() + + def to_onnx_code(self): + """ + Exports the ONNX graph into an :epkg:`onnx` code + which replicates it. + + :return: string + """ + return export2onnx(self.oinf.obj) diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py index c94311d5c..5a11bd8a5 100644 --- a/mlprodict/sklapi/__init__.py +++ b/mlprodict/sklapi/__init__.py @@ -1,7 +1,12 @@ # -*- encoding: utf-8 -*- """ @file -@brief Shortcut to *onnxrt*. +@brief Shortcut to *sklapi*. """ -from .onnx_transformer import OnnxTransformer from .onnx_pipeline import OnnxPipeline +from .onnx_transformer import OnnxTransformer +from .onnx_speed_up import ( + OnnxSpeedupClassifier, + OnnxSpeedupCluster, + OnnxSpeedupRegressor, + OnnxSpeedupTransformer) diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py new file mode 100644 index 000000000..87ee1e8f4 --- /dev/null +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -0,0 +1,666 @@ +# coding: utf-8 +""" +@file +@brief Speeding up :epkg:`scikit-learn` with :epkg:`onnx`. + +.. versionadded:: 0.7 +""" +import collections +import inspect +import io +from contextlib import redirect_stdout, redirect_stderr +import numpy +from numpy.testing import assert_almost_equal +import scipy.special as scipy_special +import scipy.spatial.distance as scipy_distance +from onnx import helper, load +from sklearn.base import ( + BaseEstimator, clone, + TransformerMixin, RegressorMixin, ClassifierMixin, + ClusterMixin) +from sklearn.preprocessing import FunctionTransformer +from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin +from ..tools.code_helper import print_code +from ..tools.asv_options_helper import get_opset_number_from_onnx +from ..onnx_tools.onnx_export import export2numpy +from ..onnx_tools.onnx2py_helper import ( + onnx_model_opsets, _var_as_dict, to_skl2onnx_type) +from ..onnx_tools.exports.numpy_helper import ( + array_feature_extrator, + argmax_use_numpy_select_last_index, + argmin_use_numpy_select_last_index, + make_slice) +from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph +from ..onnx_conv import to_onnx +from .onnx_transformer import OnnxTransformer + + +class _OnnxPipelineStepSpeedup(BaseEstimator, OnnxOperatorMixin): + """ + Speeds up inference by replacing methods *transform* or + *predict* by a runtime for :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: options for conversions, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + BaseEstimator.__init__(self) + self.estimator = estimator + self.runtime = runtime + self.enforce_float32 = enforce_float32 + self.target_opset = target_opset + self.conv_options = conv_options + self.nopython = nopython + + def _check_fitted_(self): + if not hasattr(self, 'onnxrt_'): + raise AttributeError("Object must be be fit.") + + def _to_onnx(self, fitted_estimator, inputs): + """ + Converts an estimator inference into :epkg:`ONNX`. + + :param estimator: any estimator following :epkg:`scikit-learn` API + :param inputs: example of inputs + :return: ONNX + """ + return to_onnx( + self.estimator_, inputs, target_opset=self.target_opset, + options=self.conv_options) + + def _build_onnx_runtime(self, onx): + """ + Returns an instance of @see cl OnnxTransformer which + executes the ONNX graph. + + :param onx: ONNX graph + :param runtime: runtime type (see @see cl OnnxInference) + :return: instance of @see cl OnnxInference + """ + if self.runtime in ('numpy', 'numba'): + return self._build_onnx_runtime_numpy(onx) + tr = OnnxTransformer( + onx, runtime=self.runtime, + enforce_float32=self.enforce_float32) + tr.fit() + return tr + + def _build_onnx_runtime_numpy(self, onx): + """ + Builds a runtime based on numpy. + Exports the ONNX graph into python code + based on numpy and then dynamically compiles + it with method @see me _build_onnx_runtime_numpy_compile. + """ + model_onnx = load(io.BytesIO(onx)) + self.onnx_io_names_ = {'inputs': [], 'outputs': []} + for inp in model_onnx.graph.input: # pylint: disable=E1101 + d = _var_as_dict(inp) + self.onnx_io_names_['inputs'].append((d['name'], d['type'])) + for inp in model_onnx.graph.output: # pylint: disable=E1101 + d = _var_as_dict(inp) + self.onnx_io_names_['outputs'].append((d['name'], d['type'])) + self.onnx_io_names_['skl2onnx_inputs'] = [ + to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape']) + for d in self.onnx_io_names_['inputs']] + self.onnx_io_names_['skl2onnx_outputs'] = [ + to_skl2onnx_type(d[0], d[1]['elem'], d[1]['shape']) + for d in self.onnx_io_names_['outputs']] + self.numpy_code_ = export2numpy(model_onnx, rename=True) + opsets = onnx_model_opsets(model_onnx) + return self._build_onnx_runtime_numpy_compile(opsets) + + def _build_onnx_runtime_numpy_compile(self, opsets): + """ + Second part of @see me _build_onnx_runtime_numpy. + """ + try: + compiled_code = compile( + self.numpy_code_, '', 'exec') + except SyntaxError as e: + raise AssertionError( + "Unable to compile a script due to %r. " + "\n--CODE--\n%s" + "" % (e, print_code(self.numpy_code_))) from e + + glo = globals().copy() + loc = { + 'numpy': numpy, 'dict': dict, 'list': list, + 'print': print, 'sorted': sorted, + 'collections': collections, 'inspect': inspect, + 'helper': helper, 'scipy_special': scipy_special, + 'scipy_distance': scipy_distance, + 'array_feature_extrator': array_feature_extrator, + 'argmin_use_numpy_select_last_index': + argmin_use_numpy_select_last_index, + 'argmax_use_numpy_select_last_index': + argmax_use_numpy_select_last_index, + 'make_slice': make_slice} + out = io.StringIO() + err = io.StringIO() + with redirect_stdout(out): + with redirect_stderr(err): + try: + exec(compiled_code, glo, loc) # pylint: disable=W0122 + except Exception as e: + raise AssertionError( + "Unable to execute a script due to %r. " + "\n--OUT--\n%s\n--ERR--\n%s\n--CODE--\n%s" + "" % (e, out.getvalue(), err.getvalue(), + print_code(self.numpy_code_))) from e + names = [k for k in loc if k.startswith('numpy_')] + if len(names) != 1: + raise RuntimeError( + "Unable to guess which function is the one, names=%r." + "" % list(sorted(names))) + fct = loc[names[0]] + if self.runtime == 'numba': + from numba import jit + jitter = jit(nopython=self.nopython) + fct = jitter(fct) + cl = FunctionTransformer(fct, accept_sparse=True) + cl.op_version = opsets.get('', get_opset_number_from_onnx()) + return cl + + def __getstate__(self): + """ + :epkg:`pickle` does not support functions. + This method removes any link to function + when the runtime is `'numpy'`. + """ + state = BaseEstimator.__getstate__(self) + if 'numpy_code_' in state: + del state['onnxrt_'] + return state + + def __setstate__(self, state): + """ + :epkg:`pickle` does not support functions. + This method restores the function created when + the runtime is `'numpy'`. + """ + BaseEstimator.__setstate__(self, state) + if 'numpy_code_' in state: + model_onnx = load(io.BytesIO(state['onnx_'])) + opsets = onnx_model_opsets(model_onnx) + self.onnxrt_ = self._build_onnx_runtime_numpy_compile(opsets) + + def fit(self, X, y=None, sample_weight=None, **kwargs): + """ + Fits the estimator, converts to ONNX. + + :param X: features + :param args: other arguments + :param kwargs: fitting options + """ + if not hasattr(self, 'estimator_'): + self.estimator_ = clone(self.estimator) + if y is None: + if sample_weight is None: + self.estimator_.fit(X, **kwargs) + else: + self.estimator_.fit(X, sample_weight=sample_weight, **kwargs) + else: + if sample_weight is None: + self.estimator_.fit(X, y, **kwargs) + else: + self.estimator_.fit( + X, y, sample_weight=sample_weight, **kwargs) + + if self.enforce_float32: + X = X.astype(numpy.float32) + self.onnx_ = self._to_onnx(self.estimator_, X).SerializeToString() + self.onnxrt_ = self._build_onnx_runtime(self.onnx_) + return self + + @property + def op_version(self): + """ + Returns the opset version. + """ + self._check_fitted_() + return self.onnxrt_.op_version + + def onnx_parser(self, scope=None, inputs=None): + """ + Returns a parser for this model. + """ + self._check_fitted_() + if isinstance(self.onnxrt_, FunctionTransformer): + def parser(): + # Types should be included as well. + return [r[0] for r in self.onnx_io_names_['skl2onnx_outputs']] + return parser + return self.onnxrt_.onnx_parser(scope, inputs) + + def onnx_shape_calculator(self): + """ + Returns a shape calculator for this transform. + """ + self._check_fitted_() + + if isinstance(self.onnxrt_, FunctionTransformer): + def fct_shape_calculator(operator): + # Types should be included as well. + outputs = self.onnx_io_names_['skl2onnx_outputs'] + if len(operator.outputs) != len(outputs): + raise RuntimeError( # pragma: no cover + "Mismatch between parser and shape calculator, " + "%r != %r." % (outputs, operator.outputs)) + for a, b in zip(operator.outputs, outputs): + a.type = b[1] + return fct_shape_calculator + + calc = self.onnxrt_.onnx_shape_calculator() + + def shape_calculator(operator): + return calc(operator) + + return shape_calculator + + def onnx_converter(self): + """ + Returns a converter for this transform. + """ + self._check_fitted_() + + if isinstance(self.onnxrt_, FunctionTransformer): + + def fct_converter(scope, operator, container): + op = operator.raw_operator + onnx_model = load(io.BytesIO(op.onnx_)) + add_onnx_graph(scope, operator, container, onnx_model) + + return fct_converter + + conv = self.onnxrt_.onnx_converter() + + def converter(scope, operator, container): + op = operator.raw_operator + onnx_model = op.onnxrt_.onnxrt_.obj + conv(scope, operator, container, onnx_model=onnx_model) + + return converter + + +class OnnxSpeedupTransformer(TransformerMixin, + _OnnxPipelineStepSpeedup): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + _OnnxPipelineStepSpeedup.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y=None, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedup.fit(self, X, y) + else: + _OnnxPipelineStepSpeedup.fit( + self, X, y, sample_weight=sample_weight) + return self + + def transform(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + return self.onnxrt_.transform(X) + + def raw_transform(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.transform(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = self.raw_transform(X) + got = self.transform(X) + assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedupRegressor(RegressorMixin, + _OnnxPipelineStepSpeedup): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + _OnnxPipelineStepSpeedup.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedup.fit(self, X, y) + else: + _OnnxPipelineStepSpeedup.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + return self.onnxrt_.transform(X) + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedupClassifier(ClassifierMixin, + _OnnxPipelineStepSpeedup): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + if conv_options is None: + conv_options = {'zipmap': False} + _OnnxPipelineStepSpeedup.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedup.fit(self, X, y) + else: + _OnnxPipelineStepSpeedup.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[0] + return pred.iloc[:, 0].values + + def predict_proba(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[1] + return pred.iloc[:, 1:].values + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def raw_predict_proba(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict_proba(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_predict_proba(X)) + got = numpy.squeeze(self.predict_proba(X)) + assert_almost_equal(expected, got, **kwargs) + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) + + +class OnnxSpeedupCluster(ClusterMixin, + _OnnxPipelineStepSpeedup): + """ + Trains with :epkg:`scikit-learn`, transform with :epkg:`ONNX`. + + :param estimator: estimator to train + :param enforce_float32: boolean + :epkg:`onnxruntime` only supports *float32*, + :epkg:`scikit-learn` usually uses double floats, this parameter + ensures that every array of double floats is converted into + single floats + :param runtime: string, defined the runtime to use + as described in @see cl OnnxInference. + :param target_opset: targetted ONNX opset + :param conv_options: conversion options, see @see fn to_onnx + :param nopython: used by :epkg:`numba` jitter + + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + + .. versionadded:: 0.7 + """ + + def __init__(self, estimator, runtime='python', enforce_float32=True, + target_opset=None, conv_options=None, nopython=True): + _OnnxPipelineStepSpeedup.__init__( + self, estimator, runtime=runtime, enforce_float32=enforce_float32, + target_opset=target_opset, conv_options=conv_options, + nopython=nopython) + + def fit(self, X, y, sample_weight=None): # pylint: disable=W0221 + """ + Trains based estimator. + """ + if sample_weight is None: + _OnnxPipelineStepSpeedup.fit(self, X, y) + else: + _OnnxPipelineStepSpeedup.fit( + self, X, y, sample_weight=sample_weight) + return self + + def predict(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[0] + return pred.iloc[:, 0].values + + def transform(self, X): + """ + Transforms with *ONNX*. + + :param X: features + :return: transformed features + """ + pred = self.onnxrt_.transform(X) + if isinstance(pred, tuple): + return pred[1] + return pred.iloc[:, 1:].values + + def raw_predict(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.predict(X) + + def raw_transform(self, X): + """ + Transforms with *scikit-learn*. + + :param X: features + :return: transformed features + """ + return self.estimator_.transform(X) + + def assert_almost_equal(self, X, **kwargs): + """ + Checks that ONNX and scikit-learn produces the same + outputs. + """ + expected = numpy.squeeze(self.raw_transform(X)) + got = numpy.squeeze(self.transform(X)) + assert_almost_equal(expected, got, **kwargs) + expected = numpy.squeeze(self.raw_predict(X)) + got = numpy.squeeze(self.predict(X)) + assert_almost_equal(expected, got, **kwargs) diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index c84d0f255..b57c044ee 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -7,16 +7,16 @@ import numpy import pandas import onnx -from onnx import helper from sklearn.base import BaseEstimator, TransformerMixin from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin -from skl2onnx.proto import TensorProto -from skl2onnx.helpers.onnx_helper import load_onnx_model, enumerate_model_node_outputs +from skl2onnx.helpers.onnx_helper import ( + load_onnx_model, enumerate_model_node_outputs) from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs from skl2onnx.common.data_types import ( FloatTensorType, DoubleTensorType, Int64TensorType) -from ..onnx_tools.onnx2py_helper import _var_as_dict +from ..onnx_tools.onnx2py_helper import _var_as_dict, onnx_model_opsets +from ..onnx_tools.exports.skl2onnx_helper import add_onnx_graph from ..onnxrt import OnnxInference @@ -83,6 +83,7 @@ def fit(self, X=None, y=None, **fit_params): """ from ..onnx_tools.optim.onnx_helper import change_input_first_dimension onx = onnx.load(BytesIO(self.onnx_bytes)) + self.op_version = onnx_model_opsets(onx) output_names = set( o.name for o in onx.graph.output) # pylint: disable=E1101 @@ -103,6 +104,7 @@ def fit(self, X=None, y=None, **fit_params): onx.SerializeToString() if updated else self.onnx_bytes) self.onnxrt_ = OnnxInference(onnx_bytes, runtime=self.runtime) self.inputs_ = self.onnxrt_.input_names + self.inputs_shape_types_ = self.onnxrt_.input_names_shapes_types return self def _check_arrays(self, inputs): @@ -110,8 +112,8 @@ def _check_arrays(self, inputs): Ensures that double floats are converted into single floats if *enforce_float32* is True or raises an exception. """ - sht = self.onnxrt_.input_names_shapes_types if hasattr( - self, "onnxrt_") else None + has = hasattr(self, "onnxrt_") + sht = self.inputs_shape_types_ if has else None if sht is not None and len(sht) < len(inputs): raise RuntimeError( # pragma: no cover "Unexpected number of inputs {} > {} (expected).".format( @@ -122,7 +124,7 @@ def _check_arrays(self, inputs): if v.dtype == numpy.float64 and self.enforce_float32: inputs[k] = v.astype(numpy.float32) continue - if not hasattr(self, "onnxrt_"): + if not has: continue exp = sht[i] if exp[1] != ('?', ) and exp[1][1:] != v.shape[1:]: @@ -157,11 +159,11 @@ def transform(self, X, y=None, **inputs): raise AttributeError( # pragma: no cover "Transform OnnxTransformer must be fit first.") rt_inputs = {} - if isinstance(X, pandas.DataFrame): + if isinstance(X, numpy.ndarray): + rt_inputs[self.inputs_[0]] = X + elif isinstance(X, pandas.DataFrame): for c in X.columns: rt_inputs[c] = X[c] - elif isinstance(X, numpy.ndarray): - rt_inputs[self.inputs_[0]] = X elif isinstance(X, dict) and len(inputs) == 0: for k, v in X.items(): rt_inputs[k] = v @@ -192,7 +194,33 @@ def transform(self, X, y=None, **inputs): names = self.output_name if self.output_name else [ o for o in self.onnxrt_.output_names] - return pandas.DataFrame({k: v for k, v in zip(names, outputs)}) + concat = [] + colnames = [] + for k, v in zip(names, outputs): + if isinstance(v, numpy.ndarray): + if len(v.shape) == 1: + v = v.reshape((-1, 1)) + colnames.append(k) + elif len(v.shape) == 2: + colnames.extend("%s%d" % (k, i) for i in range(v.shape[1])) + else: + raise RuntimeError( # pragma: no cover + "Unexpected shape for results %r: %r." % (k, v.shape)) + if isinstance(v, list): + if len(v) == 0: + raise RuntimeError( # pragma: no cover + "Output %r is empty." % k) + if not isinstance(v[0], dict): + raise RuntimeError( # pragma: no cover + "Unexpected type for output %r - value=%r." + "" % (k, v[0])) + df = pandas.DataFrame(v) + cols = list(sorted(df.columns)) + v = df[cols].copy().values + colnames.extend("%s%d" % (k, i) for i in range(v.shape[1])) + concat.append(v) + res = numpy.hstack(concat) + return pandas.DataFrame(res, columns=colnames) def fit_transform(self, X, y=None, **inputs): """ @@ -284,75 +312,10 @@ def onnx_converter(self): mapped to the first *scikit-learn* parent it can find. """ - def copy_inout(inout, scope, new_name): - shape = [s.dim_value for s in inout.type.tensor_type.shape.dim] - value_info = helper.make_tensor_value_info( - new_name, inout.type.tensor_type.elem_type, shape) - return value_info - - def clean_variable_name(name, scope): - return scope.get_unique_variable_name(name) - - def clean_operator_name(name, scope): - return scope.get_unique_operator_name(name) - - def clean_initializer_name(name, scope): - return scope.get_unique_variable_name(name) - - def converter(scope, operator, container): + def converter(scope, operator, container, onnx_model=None): op = operator.raw_operator - - graph = op.onnxrt_.obj.graph - name_mapping = {} - node_mapping = {} - for node in graph.node: - name = node.name - if name is not None: - node_mapping[node.name] = clean_initializer_name( - node.name, scope) - for o in node.input: - name_mapping[o] = clean_variable_name(o, scope) - for o in node.output: - name_mapping[o] = clean_variable_name(o, scope) - for o in graph.initializer: - name_mapping[o.name] = clean_operator_name(o.name, scope) - - inputs = [copy_inout(o, scope, name_mapping[o.name]) - for o in graph.input] - outputs = [copy_inout(o, scope, name_mapping[o.name]) - for o in graph.output] - - for inp, to in zip(operator.inputs, inputs): - n = helper.make_node('Identity', [inp.onnx_name], [to.name], - name=clean_operator_name('Identity', scope)) - container.nodes.append(n) - - for inp, to in zip(outputs, operator.outputs): - n = helper.make_node('Identity', [inp.name], [to.onnx_name], - name=clean_operator_name('Identity', scope)) - container.nodes.append(n) - - for node in graph.node: - n = helper.make_node( - node.op_type, - [name_mapping[o] for o in node.input], - [name_mapping[o] for o in node.output], - name=node_mapping[node.name] if node.name else None, - domain=node.domain if node.domain else None) - n.attribute.extend(node.attribute) # pylint: disable=E1101 - container.nodes.append(n) - - for o in graph.initializer: - as_str = o.SerializeToString() - tensor = TensorProto() - tensor.ParseFromString(as_str) - tensor.name = name_mapping[o.name] - container.initializers.append(tensor) - - # opset - for oimp in op.onnxrt_.obj.opset_import: - container.node_domain_version_pair_sets.add( - (oimp.domain, oimp.version)) + onx = onnx_model or op.onnxrt_.obj + add_onnx_graph(scope, operator, container, onx) return converter