From 03c550c2c30d493acdb5272efa88f12d052e362b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sun, 29 Aug 2021 16:50:49 +0200 Subject: [PATCH] add runtime numba --- _doc/examples/plot_speedup_pca.py | 128 ++++++++++++++++++ _doc/sphinxdoc/source/api/tools.rst | 14 +- .../test_onnx_speedup_transformer.py | 46 ++++++- .../onnx_tools/exports/skl2onnx_helper.py | 5 +- mlprodict/sklapi/onnx_speed_up.py | 16 ++- mlprodict/sklapi/onnx_transformer.py | 13 +- 6 files changed, 204 insertions(+), 18 deletions(-) create mode 100644 _doc/examples/plot_speedup_pca.py diff --git a/_doc/examples/plot_speedup_pca.py b/_doc/examples/plot_speedup_pca.py new file mode 100644 index 000000000..35c800f6b --- /dev/null +++ b/_doc/examples/plot_speedup_pca.py @@ -0,0 +1,128 @@ +""" +.. _l-speedup-pca: + +Speed up scikit-learn inference with ONNX +========================================= + +Is it possible to make :epkg:`scikit-learn` faster with ONNX? +That's question this example tries to answer. The scenario is +is the following: + +* a model is trained +* it is converted into ONNX for inference +* it selects a runtime to compute the prediction + +The following runtime are tested: + +* `python`: python runtime for ONNX +* `onnxruntime1`: :epkg:`onnxruntime` +* `numpy`: the ONNX graph is converted into numpy code +* `numba`: the numpy code is accelerated with :epkg:`numba`. + +.. contents:: + :local: + +PCA ++++ + +Let's look at a very simple model, a PCA. +""" + +import numpy +from pandas import DataFrame +import matplotlib.pyplot as plt +from sklearn.datasets import make_regression +from sklearn.decomposition import PCA +from pyquickhelper.pycode.profiling import profile +from mlprodict.sklapi import OnnxSpeedUpTransformer +from mlprodict.tools.speed_measure import measure_time +from tqdm import tqdm + +################################ +# Data and models to test. + +data, _ = make_regression(1000, n_features=20) +data = data.astype(numpy.float32) +models = [ + ('sklearn', PCA(n_components=10)), + ('python', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='python')), + ('onnxruntime1', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='onnxruntime1')), + ('numpy', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='numpy')), + ('numba', OnnxSpeedUpTransformer( + PCA(n_components=10), runtime='numba'))] + +################################# +# Training. + +for name, model in tqdm(models): + model.fit(data) + +################################# +# Profiling of runtime `onnxruntime1`. + + +def fct(): + for i in range(1000): + models[2][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + + +################################# +# Profiling of runtime `numpy`. + +def fct(): + for i in range(1000): + models[3][1].transform(data) + + +res = profile(fct, pyinst_format="text") +print(res[1]) + +################################# +# The class *OnnxSpeedUpTransformer* converts the PCA +# into ONNX and then converts it into a python code using +# *numpy*. The code is the following. + +print(models[3][1].numpy_code_) + +################################# +# Benchmark. + +bench = [] +for name, model in tqdm(models): + for size in (1, 10, 100, 1000, 10000, 100000, 200000): + data, _ = make_regression(size, n_features=20) + data = data.astype(numpy.float32) + + # We run it a first time (numba compiles + # the function during the first execution). + model.transform(data) + res = measure_time( + "model.transform(data)", div_by_number=True, + context={'data': data, 'model': model}) + res['name'] = name + res['size'] = size + bench.append(res) + +df = DataFrame(bench) +piv = df.pivot("size", "name", "average") +piv + +###################################### +# Graph. +fig, ax = plt.subplots(1, 2, figsize=(10, 4)) +piv.plot(title="Speedup PCA with ONNX (lower better)", + logx=True, logy=True, ax=ax[0]) +piv2 = piv.copy() +for c in piv2.columns: + piv2[c] /= piv['sklearn'] +print(piv2) +piv2.plot(title="baseline=scikit-learn (lower better)", + logx=True, logy=True, ax=ax[1]) +plt.show() diff --git a/_doc/sphinxdoc/source/api/tools.rst b/_doc/sphinxdoc/source/api/tools.rst index ea4b12ae4..9ec143f5c 100644 --- a/_doc/sphinxdoc/source/api/tools.rst +++ b/_doc/sphinxdoc/source/api/tools.rst @@ -189,31 +189,24 @@ The last example summarizes all the possibilities. from mlprodict.onnx_tools.onnx2py_helper import guess_proto_dtype_name from mlprodict.onnx_tools.onnx2py_helper import guess_dtype - def guess_initial_types0(t): return guess_initial_types(numpy.array([[0, 1]], dtype=t), None) - def guess_initial_types1(t): return guess_initial_types(None, [('X', t)]) - def guess_initial_types_mlprodict0(t): return guess_initial_types_mlprodict(numpy.array([[0, 1]], dtype=t), None) - def guess_initial_types_mlprodict1(t): return guess_initial_types_mlprodict(None, [('X', t)]) - def _guess_type_proto1(t): return _guess_type_proto(t, [None, 4]) - def _guess_numpy_type1(t): return _guess_numpy_type(t, [None, 4]) - fcts = [guess_initial_types0, guess_initial_types1, guess_data_type, guess_numpy_type, guess_proto_type, guess_tensor_type, @@ -221,7 +214,7 @@ The last example summarizes all the possibilities. _guess_numpy_type1, guess_initial_types_mlprodict0, guess_initial_types_mlprodict1, - guess_numpy_type_from_string, + guess_numpy_type_from_string, guess_numpy_type_from_dtype, guess_proto_dtype_name, guess_dtype] @@ -246,3 +239,8 @@ The last example summarizes all the possibilities. print() for e in errors: print(e) + +skl2onnx +======== + +.. autosignature:: mlprodict.onnx_tools.exports.skl2onnx_helper.add_onnx_graph diff --git a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py index db7e5491d..7a2a93c72 100644 --- a/_unittests/ut_sklapi/test_onnx_speedup_transformer.py +++ b/_unittests/ut_sklapi/test_onnx_speedup_transformer.py @@ -5,7 +5,7 @@ import pickle import unittest from logging import getLogger -# import numpy as np +import numpy # import pandas # from sklearn.pipeline import make_pipeline from sklearn.decomposition import PCA @@ -51,6 +51,17 @@ def test_speedup_transform32_numpy(self): spd.fit(X) spd.assert_almost_equal(X, decimal=5) + def test_speedup_transform32_numba(self): + data = load_iris() + X, _ = data.data, data.target + X = X.astype(numpy.float32) + spd = OnnxSpeedUpTransformer( + PCA(), target_opset=self.opset(), + runtime="numba") + spd.fit(X) + spd.assert_almost_equal(X, decimal=5) + self.assertIn("CPUDispatch", str(spd.onnxrt_.func)) + def test_speedup_transform64(self): data = load_iris() X, _ = data.data, data.target @@ -107,6 +118,26 @@ def test_speedup_transform64_numpy_pickle(self): got = spd2.raw_transform(X) self.assertEqualArray(expected, got) + def test_speedup_transform64_numba_pickle(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime="numba") + spd.fit(X) + + st = BytesIO() + pickle.dump(spd, st) + st2 = BytesIO(st.getvalue()) + spd2 = pickle.load(st2) + + expected = spd.transform(X) + got = spd2.transform(X) + self.assertEqualArray(expected, got) + expected = spd.raw_transform(X) + got = spd2.raw_transform(X) + self.assertEqualArray(expected, got) + def test_speedup_transform64_onnx(self): data = load_iris() X, _ = data.data, data.target @@ -132,6 +163,19 @@ def test_speedup_transform64_onnx_numpy(self): got = oinf.run({'X': X})['variable'] self.assertEqualArray(expected, got) + def test_speedup_transform64_onnx_numba(self): + data = load_iris() + X, _ = data.data, data.target + spd = OnnxSpeedUpTransformer(PCA(), target_opset=self.opset(), + enforce_float32=False, + runtime='numba') + spd.fit(X) + expected = spd.transform(X) + onx = to_onnx(spd, X[:1]) + oinf = OnnxInference(onx) + got = oinf.run({'X': X})['variable'] + self.assertEqualArray(expected, got) + if __name__ == '__main__': unittest.main() diff --git a/mlprodict/onnx_tools/exports/skl2onnx_helper.py b/mlprodict/onnx_tools/exports/skl2onnx_helper.py index 74b1c1742..41aafde1c 100644 --- a/mlprodict/onnx_tools/exports/skl2onnx_helper.py +++ b/mlprodict/onnx_tools/exports/skl2onnx_helper.py @@ -2,7 +2,6 @@ @file @brief Helpers to run examples created with :epkg:`sklearn-onnx`. """ -import numpy from onnx import helper, TensorProto @@ -28,7 +27,9 @@ def _clean_initializer_name(name, scope): def add_onnx_graph(scope, operator, container, onx): """ Adds a whole ONNX graph to an existing one following - :epkg:`skl2onnx` API. + :epkg:`skl2onnx` API assuming this ONNX graph implements + an `operator `_. :param scope: scope (to get unique names) :param operator: operator diff --git a/mlprodict/sklapi/onnx_speed_up.py b/mlprodict/sklapi/onnx_speed_up.py index 5c49ddd89..c864acb35 100644 --- a/mlprodict/sklapi/onnx_speed_up.py +++ b/mlprodict/sklapi/onnx_speed_up.py @@ -42,6 +42,16 @@ class _OnnxPipelineStepSpeedUp(BaseEstimator, OnnxOperatorMixin): :param target_opset: targetted ONNX opset :param conv_options: options for covnersions, see @see fn to_onnx + Attributes created by method *fit*: + + * `estimator_`: cloned and trained version of *estimator* + * `onnxrt_`: objet of type @see cl OnnxInference, + :epkg:`sklearn:preprocessing:FunctionTransformer` + * `numpy_code_`: python code equivalent to the inference + method if the runtime is `'numpy'` or `'numba'` + * `onnx_io_names_`: dictionary, additional information + if the runtime is `'numpy'` or `'numba'` + .. versionadded:: 0.7 """ @@ -80,7 +90,7 @@ def _build_onnx_runtime(self, onx): :param runtime: runtime type (see @see cl OnnxInference) :return: instance of @see cl OnnxInference """ - if self.runtime == 'numpy': + if self.runtime in ('numpy', 'numba'): return self._build_onnx_runtime_numpy(onx) tr = OnnxTransformer( onx, runtime=self.runtime, @@ -153,6 +163,10 @@ def _build_onnx_runtime_numpy_compile(self, opsets): "Unable to guess which function is the one, names=%r." "" % list(sorted(names))) fct = loc[names[0]] + if self.runtime == 'numba': + from numba import jit + jitter = jit(nopython=True) + fct = jitter(fct) cl = FunctionTransformer(fct, accept_sparse=True) cl.op_version = opsets[''] return cl diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py index eb6586918..9a969022c 100644 --- a/mlprodict/sklapi/onnx_transformer.py +++ b/mlprodict/sklapi/onnx_transformer.py @@ -104,6 +104,7 @@ def fit(self, X=None, y=None, **fit_params): onx.SerializeToString() if updated else self.onnx_bytes) self.onnxrt_ = OnnxInference(onnx_bytes, runtime=self.runtime) self.inputs_ = self.onnxrt_.input_names + self.inputs_shape_types_ = self.onnxrt_.input_names_shapes_types return self def _check_arrays(self, inputs): @@ -111,8 +112,8 @@ def _check_arrays(self, inputs): Ensures that double floats are converted into single floats if *enforce_float32* is True or raises an exception. """ - sht = self.onnxrt_.input_names_shapes_types if hasattr( - self, "onnxrt_") else None + has = hasattr(self, "onnxrt_") + sht = self.inputs_shape_types_ if has else None if sht is not None and len(sht) < len(inputs): raise RuntimeError( # pragma: no cover "Unexpected number of inputs {} > {} (expected).".format( @@ -123,7 +124,7 @@ def _check_arrays(self, inputs): if v.dtype == numpy.float64 and self.enforce_float32: inputs[k] = v.astype(numpy.float32) continue - if not hasattr(self, "onnxrt_"): + if not has: continue exp = sht[i] if exp[1] != ('?', ) and exp[1][1:] != v.shape[1:]: @@ -158,11 +159,11 @@ def transform(self, X, y=None, **inputs): raise AttributeError( # pragma: no cover "Transform OnnxTransformer must be fit first.") rt_inputs = {} - if isinstance(X, pandas.DataFrame): + if isinstance(X, numpy.ndarray): + rt_inputs[self.inputs_[0]] = X + elif isinstance(X, pandas.DataFrame): for c in X.columns: rt_inputs[c] = X[c] - elif isinstance(X, numpy.ndarray): - rt_inputs[self.inputs_[0]] = X elif isinstance(X, dict) and len(inputs) == 0: for k, v in X.items(): rt_inputs[k] = v