Skip to content
This repository was archived by the owner on Jan 13, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,548 changes: 1,548 additions & 0 deletions _doc/notebooks/onnx_discrepencies.ipynb

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions _doc/sphinxdoc/source/api/sklapi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,17 @@ This is the main class which makes it easy to insert
to use the prediction from an :epkg:`ONNX` files into a :epkg:`scikit-learn`
pipeline.

.. contents::
:local:

OnnxTransformer
+++++++++++++++

.. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer
:members:

OnnxPipeline
++++++++++++

.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline
:members:
2 changes: 2 additions & 0 deletions _doc/sphinxdoc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/',
'make_scorer': 'https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html',
'Minkowski distance': 'https://en.wikipedia.org/wiki/Minkowski_distance',
'mlinsights': '',
'mlprodict': 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/index.html',
'openmp': 'https://www.openmp.org/',
'ONNX': 'https://onnx.ai/',
Expand All @@ -97,5 +98,6 @@
'run_asv.sh': 'https://github.com/sdpython/mlprodict/blob/master/bin/run_asv.sh',
'Rust': 'https://www.rust-lang.org/',
'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx',
'TransferTransformer': 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/transfer_transformer.html',
'xgboost': "https://xgboost.readthedocs.io/en/latest/",
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=30s)
"""
import os
import unittest
from pyquickhelper.loghelper import fLOG
from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
from pyquickhelper.pycode import (
add_missing_development_version, ExtTestCase
)
import mlprodict


class TestNotebookOnnxDiscrepencies(ExtTestCase):

def setUp(self):
add_missing_development_version(["jyquickhelper"], __file__, hide=True)

def test_notebook_onnx_discrenpencies(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

self.assertNotEmpty(mlprodict is not None)
folder = os.path.join(os.path.dirname(__file__),
"..", "..", "_doc", "notebooks")
test_notebook_execution_coverage(__file__, "onnx_discrepencies", folder,
this_module_name="mlprodict", fLOG=fLOG)


if __name__ == "__main__":
unittest.main()
5 changes: 4 additions & 1 deletion _unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy
import pandas
from lightgbm import LGBMClassifier, Dataset, train as lgb_train
from pyquickhelper.pycode import ExtTestCase, skipif_circleci
from pyquickhelper.pycode import ExtTestCase, skipif_circleci, ignore_warnings
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from skl2onnx.common.data_types import (
Expand All @@ -26,6 +26,7 @@ def setUp(self):
register_converters()

@skipif_circleci('stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical(self):

X = pandas.DataFrame({"A": numpy.random.permutation(['a', 'b', 'c', 'd'] * 75), # str
Expand Down Expand Up @@ -76,6 +77,7 @@ def test_onnxrt_python_lightgbm_categorical(self):
# self.assertEqualArray(exp, df.values, decimal=6)

@skipif_circleci('stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris(self):
iris = load_iris()
X, y = iris.data, iris.target
Expand Down Expand Up @@ -131,6 +133,7 @@ def test_onnxrt_python_lightgbm_categorical_iris(self):
self.assertEqualArray(exp, values[:, 1], decimal=5)

@skipif_circleci('stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris_dataframe(self):
iris = load_iris()
X, y = iris.data, iris.target
Expand Down
6 changes: 3 additions & 3 deletions _unittests/ut_onnxrt/test_optim_onnx_identity.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def test_onnx_remove_identities(self):
'input', op_version=get_opset_number_from_onnx())
cdist = onnx_squareform_pdist(
cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx())
cop2 = OnnxIdentity(cdist, output_names=[
'cdist'], op_version=get_opset_number_from_onnx())
cop2 = OnnxIdentity(cdist, output_names=['cdist'],
op_version=get_opset_number_from_onnx())

model_def = cop2.to_onnx(
{'input': FloatTensorType()},
Expand Down Expand Up @@ -143,7 +143,7 @@ def onnx_test_knn_single_regressor(self, dtype, n_targets=1, debug=False,
self.assertIn('subgraphs_optim', stats)

def test_onnx_test_knn_single_regressor32(self):
self.onnx_test_knn_single_regressor(numpy.float32, expected=[2, 1])
self.onnx_test_knn_single_regressor(numpy.float32, expected=[1, 1])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_onnxrt/test_sklearn_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_statistics_pipeline_sgd(self):
clr.fit(X_train, y_train)
onx = to_onnx(clr, X_train[:1].astype(numpy.float32))
ostats = onnx_statistics(onx)
for k, v in {'nnodes': 9, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
for k, v in {'nnodes': 8, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
'producer_name': 'skl2onnx', 'ai.onnx.ml': 1}.items():
self.assertEqual(ostats[k], v)
self.assertIn('', ostats)
Expand Down
138 changes: 138 additions & 0 deletions _unittests/ut_sklapi/test_onnx_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
@brief test log(time=4s)
"""
import unittest
import numpy
import onnxruntime
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.mixture import GaussianMixture
from pyquickhelper.pycode import ExtTestCase, ignore_warnings
from mlinsights.mlmodel import TransferTransformer
from mlprodict.onnx_conv import to_onnx
from mlprodict.onnx_conv.register import _register_converters_mlinsights
from mlprodict.onnxrt import OnnxInference
from mlprodict.sklapi import OnnxPipeline, OnnxTransformer
from mlprodict.tools import get_opset_number_from_onnx


class TestOnnxPipeline(ExtTestCase):

def test_pipeline_iris(self):
iris = load_iris()
X, y = iris.data, iris.target
pipe = OnnxPipeline([
('pca', PCA(n_components=2)),
('no', StandardScaler()),
('lr', LogisticRegression())],
enforce_float32=True,
op_version=get_opset_number_from_onnx())
pipe.fit(X, y)
pipe.fit(X, y)
self.assertTrue(hasattr(pipe, 'raw_steps_'))
self.assertEqual(len(pipe.steps), 3)
self.assertEqual(len(pipe.raw_steps_), 3)
self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
self.assertIsInstance(pipe.steps[1][1], OnnxTransformer)

X = X.astype(numpy.float32)
model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
options={id(pipe): {'zipmap': False}})
sess = OnnxInference(model_def)
res = sess.run({'X': X})
self.assertEqualArray(res["label"], pipe.predict(X))
self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))

def test_transfer_transformer(self):
_register_converters_mlinsights(True)
iris = load_iris()
X, y = iris.data, iris.target
pipe = TransferTransformer(StandardScaler(), trainable=True)
pipe.fit(X, y)
model_def = to_onnx(pipe, X[:1])
sess = OnnxInference(model_def)
res = sess.run({'X': X})
exp = pipe.transform(X)
self.assertEqualArray(exp, res['variable'], decimal=5)

def test_transfer_logistic_regression(self):
_register_converters_mlinsights(True)
iris = load_iris()
X, y = iris.data, iris.target
pipe = TransferTransformer(
LogisticRegression(solver='liblinear'), trainable=True)
pipe.fit(X, y)
model_def = to_onnx(pipe, X[:1])
sess = OnnxInference(model_def)
res = sess.run({'X': X})
exp = pipe.transform(X)
self.assertEqualArray(exp, res['probabilities'], decimal=5)

def test_pipeline_pickable(self):
_register_converters_mlinsights(True)
iris = load_iris()
X, y = iris.data, iris.target
pipe = OnnxPipeline([
('gm', TransferTransformer(StandardScaler(), trainable=True)),
('lr', LogisticRegression())],
enforce_float32=True,
op_version=get_opset_number_from_onnx(),
options={'gm__score_samples': True})
pipe.fit(X, y)
pipe.fit(X, y)

self.assertTrue(hasattr(pipe, 'raw_steps_'))
self.assertEqual(len(pipe.steps), 2)
self.assertEqual(len(pipe.raw_steps_), 2)
self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)

X = X.astype(numpy.float32)
model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
options={id(pipe): {'zipmap': False}})
sess = OnnxInference(model_def)
res = sess.run({'X': X})
self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
self.assertEqualArray(res["label"], pipe.predict(X))
self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))

@ignore_warnings(warns=FutureWarning)
def test_pipeline_pickable_options(self):
_register_converters_mlinsights(True)
iris = load_iris()
X, y = iris.data, iris.target
pipe = OnnxPipeline([
('gm', TransferTransformer(
GaussianMixture(n_components=2),
trainable=True, method='predict_proba')),
('lr', LogisticRegression())],
enforce_float32=True,
op_version=get_opset_number_from_onnx(),
options={'gm__score_samples': True,
'lr__zipmap': False})
pipe.fit(X, y)
pipe.fit(X, y)

self.assertTrue(hasattr(pipe, 'raw_steps_'))
self.assertEqual(len(pipe.steps), 2)
self.assertEqual(len(pipe.raw_steps_), 2)
self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)

X = X.astype(numpy.float32)
model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
options={id(pipe): {'zipmap': False}})
sess = OnnxInference(model_def, runtime="python_compiled")
self.assertIn("'probabilities': probabilities,", str(sess))
sess = onnxruntime.InferenceSession(model_def.SerializeToString())
r = sess.run(None, {'X': X})
self.assertEqual(len(r), 2)
sess = OnnxInference(model_def)
res = sess.run({'X': X})
self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
self.assertEqualArray(res["label"], pipe.predict(X))
self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion _unittests/ut_sklapi/test_onnx_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from mlprodict.tools import get_opset_number_from_onnx


class TestInferenceSessionSklearn(ExtTestCase):
class TestOnnxTransformer(ExtTestCase):

def setUp(self):
logger = getLogger('skl2onnx')
Expand Down
53 changes: 29 additions & 24 deletions mlprodict/asv_benchmark/_create_asv_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,13 @@ def _sklearn_subfolder(model):
Returns the list of subfolders for a model.
"""
mod = model.__module__
if mod is not None and mod.startswith('mlinsights'):
return ['mlinsights', model.__name__]
spl = mod.split('.')
pos = spl.index('sklearn')
try:
pos = spl.index('sklearn')
except ValueError: # pragma: no cover
raise ValueError("Unable to find 'sklearn' in '{}'.".format(mod))
res = spl[pos + 1: -1]
if len(res) == 0:
if spl[-1] == 'sklearn':
Expand All @@ -177,31 +182,31 @@ def _handle_init_files(model, flat, location, verbose, location_pyspy, fLOG):
if flat:
return ([], location, ".",
(None if location_pyspy is None else location_pyspy))

created = []
subf = _sklearn_subfolder(model)
subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
location_model = os.path.join(location, *subf)
prefix_import = "." * (len(subf) + 1)
if not os.path.exists(location_model):
os.makedirs(location_model)
for fold in [location_model, os.path.dirname(location_model),
os.path.dirname(os.path.dirname(location_model))]:
init = os.path.join(fold, '__init__.py')
if not os.path.exists(init):
with open(init, 'w') as _:
pass
created.append(init)
if verbose > 1 and fLOG is not None:
fLOG("[create_asv_benchmark] create '{}'.".format(init))
if location_pyspy is not None:
location_pyspy_model = os.path.join(location_pyspy, *subf)
if not os.path.exists(location_pyspy_model):
os.makedirs(location_pyspy_model)
else:
created = []
subf = _sklearn_subfolder(model)
subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
location_model = os.path.join(location, *subf)
prefix_import = "." * (len(subf) + 1)
if not os.path.exists(location_model):
os.makedirs(location_model)
for fold in [location_model, os.path.dirname(location_model),
os.path.dirname(os.path.dirname(location_model))]:
init = os.path.join(fold, '__init__.py')
if not os.path.exists(init):
with open(init, 'w') as _:
pass
created.append(init)
if verbose > 1 and fLOG is not None:
fLOG("[create_asv_benchmark] create '{}'.".format(init))
if location_pyspy is not None:
location_pyspy_model = os.path.join(location_pyspy, *subf)
if not os.path.exists(location_pyspy_model):
os.makedirs(location_pyspy_model)
else:
location_pyspy_model = None
location_pyspy_model = None

return created, location_model, prefix_import, location_pyspy_model
return created, location_model, prefix_import, location_pyspy_model


def _asv_class_name(model, scenario, optimisation,
Expand Down
Loading