sdpython · sdpython · Jun 8, 2020 · Jun 7, 2020 · Jun 8, 2020 · Jun 8, 2020
diff --git a/_doc/notebooks/onnx_discrepencies.ipynb b/_doc/notebooks/onnx_discrepencies.ipynb
diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst
@@ -6,5 +6,17 @@ This is the main class which makes it easy to insert
 to use the prediction from an :epkg:`ONNX` files into a :epkg:`scikit-learn`
 pipeline.
 
+.. contents::
+    :local:
+
+OnnxTransformer
++++++++++++++++
+
 .. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer
     :members:
+
+OnnxPipeline
+++++++++++++
+
+.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline
+    :members:
diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py
@@ -78,6 +78,7 @@
     'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/',
     'make_scorer': 'https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html',
     'Minkowski distance': 'https://en.wikipedia.org/wiki/Minkowski_distance',
+    'mlinsights': '',
     'mlprodict': 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/index.html',
     'openmp': 'https://www.openmp.org/',
     'ONNX': 'https://onnx.ai/',
@@ -97,5 +98,6 @@
     'run_asv.sh': 'https://github.com/sdpython/mlprodict/blob/master/bin/run_asv.sh',
     'Rust': 'https://www.rust-lang.org/',
     'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx',
+    'TransferTransformer': 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/transfer_transformer.html',
     'xgboost': "https://xgboost.readthedocs.io/en/latest/",
 })
diff --git a/_unittests/ut_documentation/test_run_notebooks_onnx_discrepencies.py b/_unittests/ut_documentation/test_run_notebooks_onnx_discrepencies.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""
+@brief      test log(time=30s)
+"""
+import os
+import unittest
+from pyquickhelper.loghelper import fLOG
+from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
+from pyquickhelper.pycode import (
+    add_missing_development_version, ExtTestCase
+)
+import mlprodict
+
+
+class TestNotebookOnnxDiscrepencies(ExtTestCase):
+
+    def setUp(self):
+        add_missing_development_version(["jyquickhelper"], __file__, hide=True)
+
+    def test_notebook_onnx_discrenpencies(self):
+        fLOG(
+            __file__,
+            self._testMethodName,
+            OutputPrint=__name__ == "__main__")
+
+        self.assertNotEmpty(mlprodict is not None)
+        folder = os.path.join(os.path.dirname(__file__),
+                              "..", "..", "_doc", "notebooks")
+        test_notebook_execution_coverage(__file__, "onnx_discrepencies", folder,
+                                         this_module_name="mlprodict", fLOG=fLOG)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py b/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
@@ -6,7 +6,7 @@
 import numpy
 import pandas
 from lightgbm import LGBMClassifier, Dataset, train as lgb_train
-from pyquickhelper.pycode import ExtTestCase, skipif_circleci
+from pyquickhelper.pycode import ExtTestCase, skipif_circleci, ignore_warnings
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from skl2onnx.common.data_types import (
@@ -26,6 +26,7 @@ def setUp(self):
         register_converters()
 
     @skipif_circleci('stuck')
+    @ignore_warnings((RuntimeWarning, UserWarning))
     def test_onnxrt_python_lightgbm_categorical(self):
 
         X = pandas.DataFrame({"A": numpy.random.permutation(['a', 'b', 'c', 'd'] * 75),  # str
@@ -76,6 +77,7 @@ def test_onnxrt_python_lightgbm_categorical(self):
         # self.assertEqualArray(exp, df.values, decimal=6)
 
     @skipif_circleci('stuck')
+    @ignore_warnings((RuntimeWarning, UserWarning))
     def test_onnxrt_python_lightgbm_categorical_iris(self):
         iris = load_iris()
         X, y = iris.data, iris.target
@@ -131,6 +133,7 @@ def test_onnxrt_python_lightgbm_categorical_iris(self):
         self.assertEqualArray(exp, values[:, 1], decimal=5)
 
     @skipif_circleci('stuck')
+    @ignore_warnings((RuntimeWarning, UserWarning))
     def test_onnxrt_python_lightgbm_categorical_iris_dataframe(self):
         iris = load_iris()
         X, y = iris.data, iris.target

diff --git a/_unittests/ut_onnxrt/test_optim_onnx_identity.py b/_unittests/ut_onnxrt/test_optim_onnx_identity.py
@@ -30,8 +30,8 @@ def test_onnx_remove_identities(self):
             'input', op_version=get_opset_number_from_onnx())
         cdist = onnx_squareform_pdist(
             cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx())
-        cop2 = OnnxIdentity(cdist, output_names=[
-                            'cdist'], op_version=get_opset_number_from_onnx())
+        cop2 = OnnxIdentity(cdist, output_names=['cdist'],
+                            op_version=get_opset_number_from_onnx())
 
         model_def = cop2.to_onnx(
             {'input': FloatTensorType()},
@@ -143,7 +143,7 @@ def onnx_test_knn_single_regressor(self, dtype, n_targets=1, debug=False,
         self.assertIn('subgraphs_optim', stats)
 
     def test_onnx_test_knn_single_regressor32(self):
-        self.onnx_test_knn_single_regressor(numpy.float32, expected=[2, 1])
+        self.onnx_test_knn_single_regressor(numpy.float32, expected=[1, 1])
 
 
 if __name__ == "__main__":

diff --git a/_unittests/ut_onnxrt/test_sklearn_helper.py b/_unittests/ut_onnxrt/test_sklearn_helper.py
@@ -122,7 +122,7 @@ def test_statistics_pipeline_sgd(self):
         clr.fit(X_train, y_train)
         onx = to_onnx(clr, X_train[:1].astype(numpy.float32))
         ostats = onnx_statistics(onx)
-        for k, v in {'nnodes': 9, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
+        for k, v in {'nnodes': 8, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
                      'producer_name': 'skl2onnx', 'ai.onnx.ml': 1}.items():
             self.assertEqual(ostats[k], v)
         self.assertIn('', ostats)

diff --git a/_unittests/ut_sklapi/test_onnx_pipeline.py b/_unittests/ut_sklapi/test_onnx_pipeline.py
@@ -0,0 +1,138 @@
+"""
+@brief      test log(time=4s)
+"""
+import unittest
+import numpy
+import onnxruntime
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import load_iris
+from sklearn.linear_model import LogisticRegression
+from sklearn.mixture import GaussianMixture
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlinsights.mlmodel import TransferTransformer
+from mlprodict.onnx_conv import to_onnx
+from mlprodict.onnx_conv.register import _register_converters_mlinsights
+from mlprodict.onnxrt import OnnxInference
+from mlprodict.sklapi import OnnxPipeline, OnnxTransformer
+from mlprodict.tools import get_opset_number_from_onnx
+
+
+class TestOnnxPipeline(ExtTestCase):
+
+    def test_pipeline_iris(self):
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        pipe = OnnxPipeline([
+            ('pca', PCA(n_components=2)),
+            ('no', StandardScaler()),
+            ('lr', LogisticRegression())],
+            enforce_float32=True,
+            op_version=get_opset_number_from_onnx())
+        pipe.fit(X, y)
+        pipe.fit(X, y)
+        self.assertTrue(hasattr(pipe, 'raw_steps_'))
+        self.assertEqual(len(pipe.steps), 3)
+        self.assertEqual(len(pipe.raw_steps_), 3)
+        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+        self.assertIsInstance(pipe.steps[1][1], OnnxTransformer)
+
+        X = X.astype(numpy.float32)
+        model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+                            options={id(pipe): {'zipmap': False}})
+        sess = OnnxInference(model_def)
+        res = sess.run({'X': X})
+        self.assertEqualArray(res["label"], pipe.predict(X))
+        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+    def test_transfer_transformer(self):
+        _register_converters_mlinsights(True)
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        pipe = TransferTransformer(StandardScaler(), trainable=True)
+        pipe.fit(X, y)
+        model_def = to_onnx(pipe, X[:1])
+        sess = OnnxInference(model_def)
+        res = sess.run({'X': X})
+        exp = pipe.transform(X)
+        self.assertEqualArray(exp, res['variable'], decimal=5)
+
+    def test_transfer_logistic_regression(self):
+        _register_converters_mlinsights(True)
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        pipe = TransferTransformer(
+            LogisticRegression(solver='liblinear'), trainable=True)
+        pipe.fit(X, y)
+        model_def = to_onnx(pipe, X[:1])
+        sess = OnnxInference(model_def)
+        res = sess.run({'X': X})
+        exp = pipe.transform(X)
+        self.assertEqualArray(exp, res['probabilities'], decimal=5)
+
+    def test_pipeline_pickable(self):
+        _register_converters_mlinsights(True)
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        pipe = OnnxPipeline([
+            ('gm', TransferTransformer(StandardScaler(), trainable=True)),
+            ('lr', LogisticRegression())],
+            enforce_float32=True,
+            op_version=get_opset_number_from_onnx(),
+            options={'gm__score_samples': True})
+        pipe.fit(X, y)
+        pipe.fit(X, y)
+
+        self.assertTrue(hasattr(pipe, 'raw_steps_'))
+        self.assertEqual(len(pipe.steps), 2)
+        self.assertEqual(len(pipe.raw_steps_), 2)
+        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+
+        X = X.astype(numpy.float32)
+        model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+                            options={id(pipe): {'zipmap': False}})
+        sess = OnnxInference(model_def)
+        res = sess.run({'X': X})
+        self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
+        self.assertEqualArray(res["label"], pipe.predict(X))
+        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+    @ignore_warnings(warns=FutureWarning)
+    def test_pipeline_pickable_options(self):
+        _register_converters_mlinsights(True)
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        pipe = OnnxPipeline([
+            ('gm', TransferTransformer(
+                GaussianMixture(n_components=2),
+                trainable=True, method='predict_proba')),
+            ('lr', LogisticRegression())],
+            enforce_float32=True,
+            op_version=get_opset_number_from_onnx(),
+            options={'gm__score_samples': True,
+                     'lr__zipmap': False})
+        pipe.fit(X, y)
+        pipe.fit(X, y)
+
+        self.assertTrue(hasattr(pipe, 'raw_steps_'))
+        self.assertEqual(len(pipe.steps), 2)
+        self.assertEqual(len(pipe.raw_steps_), 2)
+        self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+
+        X = X.astype(numpy.float32)
+        model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+                            options={id(pipe): {'zipmap': False}})
+        sess = OnnxInference(model_def, runtime="python_compiled")
+        self.assertIn("'probabilities': probabilities,", str(sess))
+        sess = onnxruntime.InferenceSession(model_def.SerializeToString())
+        r = sess.run(None, {'X': X})
+        self.assertEqual(len(r), 2)
+        sess = OnnxInference(model_def)
+        res = sess.run({'X': X})
+        self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
+        self.assertEqualArray(res["label"], pipe.predict(X))
+        self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/_unittests/ut_sklapi/test_onnx_transformer.py b/_unittests/ut_sklapi/test_onnx_transformer.py
@@ -18,7 +18,7 @@
 from mlprodict.tools import get_opset_number_from_onnx
 
 
-class TestInferenceSessionSklearn(ExtTestCase):
+class TestOnnxTransformer(ExtTestCase):
 
     def setUp(self):
         logger = getLogger('skl2onnx')

diff --git a/mlprodict/asv_benchmark/_create_asv_helper.py b/mlprodict/asv_benchmark/_create_asv_helper.py
@@ -157,8 +157,13 @@ def _sklearn_subfolder(model):
     Returns the list of subfolders for a model.
     """
     mod = model.__module__
+    if mod is not None and mod.startswith('mlinsights'):
+        return ['mlinsights', model.__name__]
     spl = mod.split('.')
-    pos = spl.index('sklearn')
+    try:
+        pos = spl.index('sklearn')
+    except ValueError:  # pragma: no cover
+        raise ValueError("Unable to find 'sklearn' in '{}'.".format(mod))
     res = spl[pos + 1: -1]
     if len(res) == 0:
         if spl[-1] == 'sklearn':
@@ -177,31 +182,31 @@ def _handle_init_files(model, flat, location, verbose, location_pyspy, fLOG):
     if flat:
         return ([], location, ".",
                 (None if location_pyspy is None else location_pyspy))
+
+    created = []
+    subf = _sklearn_subfolder(model)
+    subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
+    location_model = os.path.join(location, *subf)
+    prefix_import = "." * (len(subf) + 1)
+    if not os.path.exists(location_model):
+        os.makedirs(location_model)
+        for fold in [location_model, os.path.dirname(location_model),
+                     os.path.dirname(os.path.dirname(location_model))]:
+            init = os.path.join(fold, '__init__.py')
+            if not os.path.exists(init):
+                with open(init, 'w') as _:
+                    pass
+                created.append(init)
+                if verbose > 1 and fLOG is not None:
+                    fLOG("[create_asv_benchmark] create '{}'.".format(init))
+    if location_pyspy is not None:
+        location_pyspy_model = os.path.join(location_pyspy, *subf)
+        if not os.path.exists(location_pyspy_model):
+            os.makedirs(location_pyspy_model)
     else:
-        created = []
-        subf = _sklearn_subfolder(model)
-        subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
-        location_model = os.path.join(location, *subf)
-        prefix_import = "." * (len(subf) + 1)
-        if not os.path.exists(location_model):
-            os.makedirs(location_model)
-            for fold in [location_model, os.path.dirname(location_model),
-                         os.path.dirname(os.path.dirname(location_model))]:
-                init = os.path.join(fold, '__init__.py')
-                if not os.path.exists(init):
-                    with open(init, 'w') as _:
-                        pass
-                    created.append(init)
-                    if verbose > 1 and fLOG is not None:
-                        fLOG("[create_asv_benchmark] create '{}'.".format(init))
-        if location_pyspy is not None:
-            location_pyspy_model = os.path.join(location_pyspy, *subf)
-            if not os.path.exists(location_pyspy_model):
-                os.makedirs(location_pyspy_model)
-        else:
-            location_pyspy_model = None
+        location_pyspy_model = None
 
-        return created, location_model, prefix_import, location_pyspy_model
+    return created, location_model, prefix_import, location_pyspy_model
 
 
 def _asv_class_name(model, scenario, optimisation,