From 8022b72fd213ea3e709dbdaafb77120410a579c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Wed, 26 May 2021 01:38:36 +0200
Subject: [PATCH 01/16] complex scenarios

---
 .gitignore                       |   1 +
 doc/tutorial.rst                 |   1 +
 doc/tutorial_4_complex.rst       |  10 ++
 examples/plot_usparse_xgboost.py | 199 +++++++++++++++++++++++++++++++
 4 files changed, 211 insertions(+)
 create mode 100644 doc/tutorial_4_complex.rst
 create mode 100644 examples/plot_usparse_xgboost.py

diff --git a/.gitignore b/.gitignore
index e3ed7ec6..e64bd609 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ examples/squeezenet1.1-7.onnx
 tests/pipeline*.onnx
 temp_*
 examples/pipeline_lightgbm.onnx
+examples/model.onnx
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 9f30cba5..34867134 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -16,6 +16,7 @@ involving operator not actually implemented in
     tutorial_1-5_external
     tutorial_2_new_converter
     tutorial_3_new_operator
+    tutorial_4_complex
 
 The tutorial was tested with following version:
 
diff --git a/doc/tutorial_4_complex.rst b/doc/tutorial_4_complex.rst
new file mode 100644
index 00000000..4d20ed74
--- /dev/null
+++ b/doc/tutorial_4_complex.rst
@@ -0,0 +1,10 @@
+
+Complex Scenarios
+=================
+
+Discrepencies may happen. Let's see some unexpected cases.
+
+.. toctree::
+    :maxdepth: 1
+
+    auto_examples/plot_usparse_xgboost
diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
new file mode 100644
index 00000000..783a7d84
--- /dev/null
+++ b/examples/plot_usparse_xgboost.py
@@ -0,0 +1,199 @@
+"""
+.. _example-sparse-tfidf:
+
+TfIdf and sparse matrices
+=========================
+
+.. index:: XGBoost, lightgbm, RandomForest
+
+
+.. contents::
+    :local:
+
+Train a RandomForestClassifier after sparse
++++++++++++++++++++++++++++++++++++++++++++
+"""
+from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
+from mlprodict.onnxrt import OnnxInference
+import numpy
+import pandas
+import onnxruntime as rt
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import load_iris
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from sklearn.ensemble import RandomForestClassifier
+from xgboost import XGBClassifier
+from lightgbm import LGBMClassifier
+from skl2onnx.common.data_types import FloatTensorType, StringTensorType
+from skl2onnx import convert_sklearn, update_registered_converter
+from skl2onnx.common.shape_calculator import (
+    calculate_linear_classifier_output_shapes)
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
+    convert_xgboost)
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+    convert_lightgbm)
+
+
+update_registered_converter(
+    XGBClassifier, 'XGBoostXGBClassifier',
+    calculate_linear_classifier_output_shapes, convert_xgboost,
+    options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+update_registered_converter(
+    LGBMClassifier, 'LightGbmLGBMClassifier',
+    calculate_linear_classifier_output_shapes, convert_lightgbm,
+    options={'nocl': [True, False], 'zipmap': [True, False]})
+
+
+cst = ['class zero', 'class one', 'class two']
+
+data = load_iris()
+X = data.data[:, :2]
+y = data.target
+
+df = pandas.DataFrame(X)
+df["text"] = [cst[i] for i in y]
+
+
+ind = numpy.arange(X.shape[0])
+numpy.random.shuffle(ind)
+X = X[ind, :].copy()
+y = y[ind].copy()
+
+
+pipe = Pipeline([
+    ('union', ColumnTransformer([
+        ('scale1', StandardScaler(), [0, 1]),
+        ('subject',
+         Pipeline([
+             ('count', CountVectorizer()),
+             ('tfidf', TfidfTransformer())
+         ]), "text"),
+    ], sparse_threshold=1.)),
+    ('cls', RandomForestClassifier(n_estimators=5, max_depth=3)),
+])
+
+pipe.fit(df, y)
+
+
+# Convert
+
+model_onnx = convert_sklearn(
+    pipe, 'pipeline_xgboost',
+    [('input', FloatTensorType([None, 2])),
+     ('text', StringTensorType([None, 1]))],
+    target_opset=12,
+    options={RandomForestClassifier: {'zipmap': False}})
+
+
+# Compare the predictions
+
+print("predict", pipe.predict(df[:5]))
+print("predict_proba", pipe.predict_proba(df[:2]))
+
+# Predictions with onnxruntime.
+
+sess = rt.InferenceSession(model_onnx.SerializeToString())
+pred_onx = sess.run(None, {
+    "input": df[[0, 1]].values.astype(numpy.float32),
+    "text": df[["text"]].values})
+print("predict", pred_onx[0][:5])
+print("predict_proba", pred_onx[1][:2])
+
+print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
+      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
+
+############################################
+# Train a XGBoost after sparse
+# ++++++++++++++++++++++++++++
+
+pipe = Pipeline([
+    ('union', ColumnTransformer([
+        ('scale1', StandardScaler(), [0, 1]),
+        ('subject',
+         Pipeline([
+             ('count', CountVectorizer(ngram_range=(1, 2))),
+             ('tfidf', TfidfTransformer())
+         ]), "text"),
+    ], sparse_threshold=1.)),
+    ('cls', XGBClassifier(n_estimators=5, max_depth=3)),
+])
+
+pipe.fit(df, y)
+
+model_onnx = convert_sklearn(
+    pipe, 'pipeline_xgboost',
+    [('input', FloatTensorType([None, 2])),
+     ('text', StringTensorType([None, 1]))],
+    target_opset=12,
+    options={XGBClassifier: {'zipmap': False}})
+
+print("predict", pipe.predict(df[:5]))
+print("predict_proba", pipe.predict_proba(df[:2]))
+
+with open('model.onnx', 'wb') as f:
+    f.write(model_onnx.SerializeToString())
+
+sess = rt.InferenceSession(model_onnx.SerializeToString())
+pred_onx = sess.run(None, {
+    "input": df[[0, 1]].values.astype(numpy.float32),
+    "text": df[["text"]].values})
+print("predict", pred_onx[0][:5])
+print("predict_proba", pred_onx[1][:2])
+
+print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
+      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
+
+
+############################################
+# Train a LightGBM after sparse
+# +++++++++++++++++++++++++++++
+
+pipe = Pipeline([
+    ('union', ColumnTransformer([
+        ('scale1', StandardScaler(), [0, 1]),
+        ('subject',
+         Pipeline([
+             ('count', CountVectorizer(ngram_range=(1, 2))),
+             ('tfidf', TfidfTransformer())
+         ]), "text"),
+    ], sparse_threshold=1.)),
+    ('cls', LGBMClassifier(n_estimators=5, max_depth=3)),
+])
+
+pipe.fit(df, y)
+
+model_onnx = convert_sklearn(
+    pipe, 'pipeline_lgb',
+    [('input', FloatTensorType([None, 2])),
+     ('text', StringTensorType([None, 1]))],
+    target_opset=12,
+    options={LGBMClassifier: {'zipmap': False}})
+
+print("predict", pipe.predict(df[:5]))
+print("predict_proba", pipe.predict_proba(df[:2]))
+
+with open('model.onnx', 'wb') as f:
+    f.write(model_onnx.SerializeToString())
+
+sess = rt.InferenceSession(model_onnx.SerializeToString())
+pred_onx = sess.run(None, {
+    "input": df[[0, 1]].values.astype(numpy.float32),
+    "text": df[["text"]].values})
+print("predict", pred_onx[0][:5])
+print("predict_proba", pred_onx[1][:2])
+
+print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
+      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
+
+
+#############################
+# Final graph
+# +++++++++++
+
+
+oinf = OnnxInference(model_onnx)
+ax = plot_graphviz(oinf.to_dot())
+ax.get_xaxis().set_visible(False)
+ax.get_yaxis().set_visible(False)

From 843bf487cc2e216ff24777bfd1ccb7ff71bbd739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Wed, 26 May 2021 20:13:00 +0200
Subject: [PATCH 02/16] complete example

---
 examples/plot_usparse_xgboost.py | 385 +++++++++++++++++++------------
 1 file changed, 243 insertions(+), 142 deletions(-)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index 783a7d84..8449ef38 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -4,30 +4,43 @@
 TfIdf and sparse matrices
 =========================
 
-.. index:: XGBoost, lightgbm, RandomForest
+.. index:: xgboost, lightgbm, sparse, ensemble
 
+`TfidfVectorizer <https://scikit-learn.org/stable/modules/
+generated/sklearn.feature_extraction.text.TfidfVectorizer.html>`_
+usually creates sparse data. If the data is sparse enough, matrices
+usually stays as sparse all along the pipeline until the predictor
+is trained. Sparse matrices do not consider null and missing values
+as they are not present in the datasets. Because some predictors
+do the difference, this ambiguity may introduces discrepencies
+when converter into ONNX. This example looks into several configurations.
 
 .. contents::
     :local:
 
-Train a RandomForestClassifier after sparse
-+++++++++++++++++++++++++++++++++++++++++++
+Imports, setups
++++++++++++++++
+
+All imports. It also registered onnx converters for :epgk:`xgboost`
+and :epkg:`lightgbm`.
 """
-from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
-from mlprodict.onnxrt import OnnxInference
+import warnings
 import numpy
 import pandas
 import onnxruntime as rt
+from tqdm import tqdm
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import load_iris
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
-from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import (
+    RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier
 from lightgbm import LGBMClassifier
 from skl2onnx.common.data_types import FloatTensorType, StringTensorType
-from skl2onnx import convert_sklearn, update_registered_converter
+from skl2onnx import to_onnx, update_registered_converter
+from skl2onnx.sklapi import CastTransformer, ReplaceTransformer
 from skl2onnx.common.shape_calculator import (
     calculate_linear_classifier_output_shapes)
 from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
@@ -46,6 +59,12 @@
     options={'nocl': [True, False], 'zipmap': [True, False]})
 
 
+##########################################
+# Artificial datasets
+# +++++++++++++++++++++++++++
+#
+# Iris + a text column.
+
 cst = ['class zero', 'class one', 'class two']
 
 data = load_iris()
@@ -62,138 +81,220 @@
 y = y[ind].copy()
 
 
-pipe = Pipeline([
-    ('union', ColumnTransformer([
-        ('scale1', StandardScaler(), [0, 1]),
-        ('subject',
-         Pipeline([
-             ('count', CountVectorizer()),
-             ('tfidf', TfidfTransformer())
-         ]), "text"),
-    ], sparse_threshold=1.)),
-    ('cls', RandomForestClassifier(n_estimators=5, max_depth=3)),
-])
-
-pipe.fit(df, y)
-
-
-# Convert
-
-model_onnx = convert_sklearn(
-    pipe, 'pipeline_xgboost',
-    [('input', FloatTensorType([None, 2])),
-     ('text', StringTensorType([None, 1]))],
-    target_opset=12,
-    options={RandomForestClassifier: {'zipmap': False}})
-
-
-# Compare the predictions
-
-print("predict", pipe.predict(df[:5]))
-print("predict_proba", pipe.predict_proba(df[:2]))
-
-# Predictions with onnxruntime.
-
-sess = rt.InferenceSession(model_onnx.SerializeToString())
-pred_onx = sess.run(None, {
-    "input": df[[0, 1]].values.astype(numpy.float32),
-    "text": df[["text"]].values})
-print("predict", pred_onx[0][:5])
-print("predict_proba", pred_onx[1][:2])
-
-print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
-      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
-
-############################################
-# Train a XGBoost after sparse
-# ++++++++++++++++++++++++++++
-
-pipe = Pipeline([
-    ('union', ColumnTransformer([
-        ('scale1', StandardScaler(), [0, 1]),
-        ('subject',
-         Pipeline([
-             ('count', CountVectorizer(ngram_range=(1, 2))),
-             ('tfidf', TfidfTransformer())
-         ]), "text"),
-    ], sparse_threshold=1.)),
-    ('cls', XGBClassifier(n_estimators=5, max_depth=3)),
-])
-
-pipe.fit(df, y)
-
-model_onnx = convert_sklearn(
-    pipe, 'pipeline_xgboost',
-    [('input', FloatTensorType([None, 2])),
-     ('text', StringTensorType([None, 1]))],
-    target_opset=12,
-    options={XGBClassifier: {'zipmap': False}})
-
-print("predict", pipe.predict(df[:5]))
-print("predict_proba", pipe.predict_proba(df[:2]))
-
-with open('model.onnx', 'wb') as f:
-    f.write(model_onnx.SerializeToString())
-
-sess = rt.InferenceSession(model_onnx.SerializeToString())
-pred_onx = sess.run(None, {
-    "input": df[[0, 1]].values.astype(numpy.float32),
-    "text": df[["text"]].values})
-print("predict", pred_onx[0][:5])
-print("predict_proba", pred_onx[1][:2])
-
-print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
-      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
-
-
-############################################
-# Train a LightGBM after sparse
-# +++++++++++++++++++++++++++++
-
-pipe = Pipeline([
-    ('union', ColumnTransformer([
-        ('scale1', StandardScaler(), [0, 1]),
-        ('subject',
-         Pipeline([
-             ('count', CountVectorizer(ngram_range=(1, 2))),
-             ('tfidf', TfidfTransformer())
-         ]), "text"),
-    ], sparse_threshold=1.)),
-    ('cls', LGBMClassifier(n_estimators=5, max_depth=3)),
-])
-
-pipe.fit(df, y)
-
-model_onnx = convert_sklearn(
-    pipe, 'pipeline_lgb',
-    [('input', FloatTensorType([None, 2])),
-     ('text', StringTensorType([None, 1]))],
-    target_opset=12,
-    options={LGBMClassifier: {'zipmap': False}})
-
-print("predict", pipe.predict(df[:5]))
-print("predict_proba", pipe.predict_proba(df[:2]))
-
-with open('model.onnx', 'wb') as f:
-    f.write(model_onnx.SerializeToString())
-
-sess = rt.InferenceSession(model_onnx.SerializeToString())
-pred_onx = sess.run(None, {
-    "input": df[[0, 1]].values.astype(numpy.float32),
-    "text": df[["text"]].values})
-print("predict", pred_onx[0][:5])
-print("predict_proba", pred_onx[1][:2])
-
-print("%s differences:" % pipe.steps[-1][-1].__class__.__name__,
-      numpy.abs(pred_onx[1].ravel() - pipe.predict_proba(df).ravel()).sum())
-
-
-#############################
-# Final graph
-# +++++++++++
-
-
-oinf = OnnxInference(model_onnx)
-ax = plot_graphviz(oinf.to_dot())
-ax.get_xaxis().set_visible(False)
-ax.get_yaxis().set_visible(False)
+##########################################
+# Train ensemble after sparse
+# +++++++++++++++++++++++++++
+#
+# The example use the Iris datasets with artifical text datasets
+# preprocessed with a tf-idf. `sparse_threshold=1.` avoids
+# sparse matrices to be converted into dense matrices.
+
+
+def make_pipelines(df_train, y_train, models=None,
+                   sparse_threshold=1., replace_nan=False,
+                   insert_replace=False, verbose=False):
+
+    if models is None:
+        models = [
+            RandomForestClassifier, HistGradientBoostingClassifier,
+            XGBClassifier, LGBMClassifier]
+
+    pipes = []
+    for model in tqdm(models):
+
+        if model == HistGradientBoostingClassifier:
+            kwargs = dict(max_iter=5)
+        elif model == XGBClassifier:
+            kwargs = dict(n_estimators=5, use_label_encoder=False)
+        else:
+            kwargs = dict(n_estimators=5)
+
+        if insert_replace:
+            pipe = Pipeline([
+                ('union', ColumnTransformer([
+                    ('scale1', StandardScaler(), [0, 1]),
+                    ('subject',
+                     Pipeline([
+                         ('count', CountVectorizer()),
+                         ('tfidf', TfidfTransformer()),
+                         ('repl', ReplaceTransformer()),
+                     ]), "text"),
+                ], sparse_threshold=sparse_threshold)),
+                ('cast', CastTransformer()),
+                ('cls', model(max_depth=3, **kwargs)),
+            ])
+        else:
+            pipe = Pipeline([
+                ('union', ColumnTransformer([
+                    ('scale1', StandardScaler(), [0, 1]),
+                    ('subject',
+                     Pipeline([
+                         ('count', CountVectorizer()),
+                         ('tfidf', TfidfTransformer())
+                     ]), "text"),
+                ], sparse_threshold=sparse_threshold)),
+                ('cast', CastTransformer()),
+                ('cls', model(max_depth=3, **kwargs)),
+            ])
+
+        try:
+            pipe.fit(df_train, y_train)
+        except TypeError as e:
+            obs = dict(model=model.__name__, pipe=pipe, error=e)
+            pipes.append(obs)
+            continue
+
+        options = {model: {'zipmap': False}}
+        if replace_nan:
+            options[TfidfTransformer] = {'nan': True}
+
+        # convert
+        with warnings.catch_warnings(record=False):
+            warnings.simplefilter("ignore", (FutureWarning, UserWarning))
+            model_onnx = to_onnx(
+                pipe,
+                initial_types=[('input', FloatTensorType([None, 2])),
+                               ('text', StringTensorType([None, 1]))],
+                target_opset=12, options=options)
+
+        with open('model.onnx', 'wb') as f:
+            f.write(model_onnx.SerializeToString())
+
+        sess = rt.InferenceSession(model_onnx.SerializeToString())
+        inputs = {"input": df[[0, 1]].values.astype(numpy.float32),
+                  "text": df[["text"]].values}
+        pred_onx = sess.run(None, inputs)
+
+        diff = numpy.abs(
+            pred_onx[1].ravel() -
+            pipe.predict_proba(df).ravel()).sum()
+
+        if verbose:
+            from mlprodict.onnxrt import OnnxInference
+
+            def td(a):
+                if hasattr(a, 'todense'):
+                    b = a.todense()
+                    ind = set(a.indices)
+                    for i in range(b.shape[1]):
+                        if i not in ind:
+                            b[0, i] = numpy.nan
+                    return b
+                return a
+
+            oinf = OnnxInference(model_onnx)
+            pred_onx2 = oinf.run(inputs)
+            diff2 = numpy.abs(
+                pred_onx2['probabilities'].ravel() -
+                pipe.predict_proba(df).ravel()).sum()
+
+        if diff > 0.1:
+            for i, (l1, l2) in enumerate(
+                    zip(pipe.predict_proba(df), pred_onx[1])):
+                d = numpy.abs(l1 - l2).sum()
+                if verbose and d > 0.1:
+                    print("\nDISCREPENCY DETAILS")
+                    print(d, i, l1, l2)
+                    pre = pipe.steps[0][-1].transform(df)
+                    print("idf", pre[i].dtype, td(pre[i]))
+                    pre2 = pipe.steps[1][-1].transform(pre)
+                    print("cas", pre2[i].dtype, td(pre2[i]))
+                    inter = oinf.run(inputs, intermediate=True)
+                    onx = inter['tfidftr_norm']
+                    print("onx", onx.dtype, onx[i])
+                    onx = inter['variable3']
+
+        obs = dict(model=model.__name__,
+                   discrepencies=diff,
+                   model_onnx=model_onnx, pipe=pipe)
+        if verbose:
+            obs['discrepency2'] = diff2
+        pipes.append(obs)
+
+    return pipes
+
+
+data_sparse = make_pipelines(df, y)
+stat = pandas.DataFrame(data_sparse).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+############################
+# Sparse data hurts.
+#
+# Dense data
+# ++++++++++
+#
+# Let's replace sparse data with dense by using `sparse_threshold=0.`
+
+
+data_dense = make_pipelines(df, y, sparse_threshold=0.)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+####################################
+# This is much better. Let's compare how the preprocessing
+# applies on the data.
+
+print("sparse")
+print(data_sparse[-1]['pipe'].steps[0][-1].transform(df)[:2])
+print()
+print("dense")
+print(data_dense[-1]['pipe'].steps[0][-1].transform(df)[:2])
+
+####################################
+# This shows `RandomForestClassifier
+# <https://scikit-learn.org/stable/modules/generated/
+# sklearn.ensemble.RandomForestClassifier.html>`_,
+# `XGBClassifier <https://xgboost.readthedocs.io/
+# en/latest/python/python_api.html>`_ do not process
+# the same way sparse and
+# dense matrix as opposed to `LGBMClassifier
+# <https://lightgbm.readthedocs.io/en/latest/
+# pythonapi/lightgbm.LGBMClassifier.html>`_.
+# And `HistGradientBoostingClassifier
+# <https://scikit-learn.org/stable/modules/generated/
+# sklearn.ensemble.HistGradientBoostingClassifier.html>`_
+# fails.
+#
+# Dense data with nan
+# +++++++++++++++++++
+#
+# Let's keep sparse data in the scikit-learn pipeline but
+# replace null values by nan in the onnx graph.
+
+data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=True)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+
+##############################
+# Dense, 0 replaced by nan
+# ++++++++++++++++++++++++
+#
+# Instead of using a specific options to replace null values
+# into nan values, a custom transformer called
+# ReplaceTransformer is explicitely inserted into the pipeline.
+# A new converter is added to the list of supported models.
+# It is equivalent to the previous options except it is
+# more explicit.
+
+data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=False,
+                            insert_replace=True)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+######################################
+# Conclusion
+# ++++++++++
+#
+# Unless dense arrays are used, because :epkg:`onnxruntime`
+# ONNX does not support sparse yet, the conversion needs to be
+# tuned depending on the model which follows the TfIdf preprocessing.

From 5cfaa7348c34ddf8de764030b34c50efbc7c9337 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Thu, 27 May 2021 16:01:55 +0200
Subject: [PATCH 03/16] update travis link

---
 README.rst    | 4 ++--
 doc/index.rst | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 369d5986..6e9e8409 100644
--- a/README.rst
+++ b/README.rst
@@ -2,8 +2,8 @@
 .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg
     :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master
 
-.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master
-    :target: https://travis-ci.org/sdpython/onnxcustom
+.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master
+    :target: https://travis-ci.com/sdpython/onnxcustom
     :alt: Build status
 
 .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true
diff --git a/doc/index.rst b/doc/index.rst
index 08adc8a2..09c8d45f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -5,8 +5,8 @@ onnxcustom: deploy machine learned models
 .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg
     :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master
 
-.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master
-    :target: https://travis-ci.org/sdpython/onnxcustom
+.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master
+    :target: https://travis-ci.com/sdpython/onnxcustom
     :alt: Build status
 
 .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true

From 781b4f321ddb579629fe48b6573de0217decf8bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 11 Jul 2021 17:51:45 +0200
Subject: [PATCH 04/16] Fixes example for scikit-learn>=0.24

---
 examples/plot_cbegin_opset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/plot_cbegin_opset.py b/examples/plot_cbegin_opset.py
index 7bc0d82b..0fa3bdcd 100644
--- a/examples/plot_cbegin_opset.py
+++ b/examples/plot_cbegin_opset.py
@@ -43,7 +43,7 @@
 
 X, y = make_blobs(n_samples=100, n_features=2)
 
-model = IsolationForest(3)
+model = IsolationForest(n_estimators=3)
 model.fit(X)
 labels = model.predict(X)
 

From 75398af1a24952cf734b91f6541fb30ebcb643d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 11 Jul 2021 17:54:53 +0200
Subject: [PATCH 05/16] Update config.yml

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5c9b0d87..0e5eaa33 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,7 +2,7 @@ version: 2
 jobs:
   build:
     docker:
-      - image: circleci/python:3.8.7
+      - image: circleci/python:3.9.5
     
     working_directory: ~/repo
     

From 88fa3aa81e66ce7a231c98c9d9a326446aa984af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 11 Jul 2021 18:08:32 +0200
Subject: [PATCH 06/16] Update requirements-dev.txt

---
 requirements-dev.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index de1ab1a6..4078c9e5 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -9,7 +9,7 @@ mlinsights
 mlprodict>=0.5
 nbsphinx
 onnx>=1.8.0
-git+https://github.com/xadupre/onnxconverter-common.git@jenkins
+onnxconverter-common
 onnxruntime>=1.6.0
 pillow
 py-spy
@@ -19,7 +19,7 @@ pyquickhelper>=1.10
 pytest
 pytest-cov
 scikit-learn>=0.24
-git+https://github.com/xadupre/sklearn-onnx.git@jenkins
+skl2onnx>=1.9.0
 sphinx
 sphinxcontrib-blockdiag
 sphinx-gallery

From f35f2f5e004741e902930172435460f4a06d758d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 11 Jul 2021 18:20:25 +0200
Subject: [PATCH 07/16] Update plot_usparse_xgboost.py

---
 examples/plot_usparse_xgboost.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index 8449ef38..5ea7cfe7 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -34,6 +34,7 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from sklearn.experimental import enable_hist_gradient_boosting 
 from sklearn.ensemble import (
     RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier

From 31d90c0345553d9712fe65ea2099d36ad045f141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Sun, 11 Jul 2021 18:38:13 +0200
Subject: [PATCH 08/16] update example

---
 examples/plot_usparse_xgboost.py | 13 ++++++-------
 requirements-dev.txt             |  4 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index 5ea7cfe7..b556f24c 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -27,7 +27,6 @@
 import warnings
 import numpy
 import pandas
-import onnxruntime as rt
 from tqdm import tqdm
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import load_iris
@@ -48,6 +47,7 @@
     convert_xgboost)
 from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
     convert_lightgbm)
+from mlprodict.onnxrt import OnnxInference
 
 
 update_registered_converter(
@@ -161,18 +161,16 @@ def make_pipelines(df_train, y_train, models=None,
         with open('model.onnx', 'wb') as f:
             f.write(model_onnx.SerializeToString())
 
-        sess = rt.InferenceSession(model_onnx.SerializeToString())
+        oinf = OnnxInference(model_onnx)
         inputs = {"input": df[[0, 1]].values.astype(numpy.float32),
                   "text": df[["text"]].values}
-        pred_onx = sess.run(None, inputs)
+        pred_onx = oinf.run(inputs)
 
         diff = numpy.abs(
-            pred_onx[1].ravel() -
+            pred_onx['probabilities'].ravel() -
             pipe.predict_proba(df).ravel()).sum()
 
         if verbose:
-            from mlprodict.onnxrt import OnnxInference
-
             def td(a):
                 if hasattr(a, 'todense'):
                     b = a.todense()
@@ -191,7 +189,8 @@ def td(a):
 
         if diff > 0.1:
             for i, (l1, l2) in enumerate(
-                    zip(pipe.predict_proba(df), pred_onx[1])):
+                    zip(pipe.predict_proba(df),
+                    pred_onx['probabilities'])):
                 d = numpy.abs(l1 - l2).sum()
                 if verbose and d > 0.1:
                     print("\nDISCREPENCY DETAILS")
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4078c9e5..6d172040 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -6,11 +6,11 @@ lightgbm
 loky
 matplotlib
 mlinsights
-mlprodict>=0.5
+mlprodict>=0.6
 nbsphinx
 onnx>=1.8.0
 onnxconverter-common
-onnxruntime>=1.6.0
+onnxruntime>=1.8.0
 pillow
 py-spy
 pandas

From c750bea40633a58cdda3b43e0596f8321af4e968 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Mon, 12 Jul 2021 16:20:34 +0200
Subject: [PATCH 09/16] Update requirements-dev.txt

---
 requirements-dev.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 6d172040..f60e2f44 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -8,7 +8,6 @@ matplotlib
 mlinsights
 mlprodict>=0.6
 nbsphinx
-onnx>=1.8.0
 onnxconverter-common
 onnxruntime>=1.8.0
 pillow

From e9bbecd6c11f53eaf5f019cf8aaf5248134f94b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Mon, 12 Jul 2021 20:38:53 +0200
Subject: [PATCH 10/16] split unit test

---
 ...les.py => test_documentation_examples1.py} |   6 +-
 tests/test_documentation_examples2.py         | 118 ++++++++++++++++++
 2 files changed, 122 insertions(+), 2 deletions(-)
 rename tests/{test_documentation_examples.py => test_documentation_examples1.py} (96%)
 create mode 100644 tests/test_documentation_examples2.py

diff --git a/tests/test_documentation_examples.py b/tests/test_documentation_examples1.py
similarity index 96%
rename from tests/test_documentation_examples.py
rename to tests/test_documentation_examples1.py
index e6a2c9ce..171afe2e 100644
--- a/tests/test_documentation_examples.py
+++ b/tests/test_documentation_examples1.py
@@ -25,9 +25,9 @@ def import_source(module_file_path, module_name):
     return module_spec.loader.exec_module(module)
 
 
-class TestDocumentationExample(unittest.TestCase):
+class TestDocumentationExample1(unittest.TestCase):
 
-    def test_documentation_examples(self):
+    def test_documentation_examples1(self):
 
         this = os.path.abspath(os.path.dirname(__file__))
         onxc = os.path.normpath(os.path.join(this, '..'))
@@ -40,6 +40,8 @@ def test_documentation_examples(self):
         found = os.listdir(fold)
         tested = 0
         for name in sorted(found):
+            if name.replace("\\", "/").split("/")[-1] >= "m":
+                break
 
             if '-v' in sys.argv:
                 if name.endswith('plot_bbegin_measure_time.py'):
diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py
new file mode 100644
index 00000000..ba6ab119
--- /dev/null
+++ b/tests/test_documentation_examples2.py
@@ -0,0 +1,118 @@
+"""
+Tests examples from the documentation.
+"""
+import unittest
+from distutils.version import StrictVersion
+import os
+import sys
+import importlib
+import subprocess
+from datetime import datetime
+import onnxruntime
+
+
+def import_source(module_file_path, module_name):
+    if not os.path.exists(module_file_path):
+        raise FileNotFoundError(module_file_path)
+    module_spec = importlib.util.spec_from_file_location(
+        module_name, module_file_path)
+    if module_spec is None:
+        raise FileNotFoundError(
+            "Unable to find '{}' in '{}', cwd='{}'.".format(
+                module_name, module_file_path,
+                os.path.abspath(__file__)))
+    module = importlib.util.module_from_spec(module_spec)
+    return module_spec.loader.exec_module(module)
+
+
+class TestDocumentationExample2(unittest.TestCase):
+
+    def test_documentation_examples2(self):
+
+        this = os.path.abspath(os.path.dirname(__file__))
+        onxc = os.path.normpath(os.path.join(this, '..'))
+        pypath = os.environ.get('PYTHONPATH', None)
+        sep = ";" if sys.platform == 'win32' else ':'
+        pypath = "" if pypath in (None, "") else (pypath + sep)
+        pypath += onxc
+        os.environ['PYTHONPATH'] = pypath
+        fold = os.path.normpath(os.path.join(this, '..', 'examples'))
+        found = os.listdir(fold)
+        tested = 0
+        for name in sorted(found):
+            if name.replace("\\", "/").split("/")[-1] < "m":
+                continue
+
+            if '-v' in sys.argv:
+                if name.endswith('plot_bbegin_measure_time.py'):
+                    if __name__ == "__main__":
+                        print("%s: skip %r" % (
+                            datetime.now().strftime("%d-%m-%y %H:%M:%S"),
+                            name))
+                    continue
+
+            with self.subTest(name=name):
+                if name.startswith("plot_") and name.endswith(".py"):
+                    if (name == "plot_pipeline_lightgbm.py" and
+                            StrictVersion(onnxruntime.__version__) <
+                                StrictVersion('1.0.0')):
+                        continue
+                    if __name__ == "__main__" or "-v" in sys.argv:
+                        print("%s: run %r" % (
+                            datetime.now().strftime("%d-%m-%y %H:%M:%S"),
+                            name))
+                    sys.path.insert(0, fold)
+                    try:
+                        mod = import_source(fold, os.path.splitext(name)[0])
+                        assert mod is not None
+                    except FileNotFoundError:
+                        # try another way
+                        cmds = [sys.executable, "-u",
+                                os.path.join(fold, name)]
+                        p = subprocess.Popen(
+                            cmds, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+                        res = p.communicate()
+                        out, err = res
+                        st = err.decode('ascii', errors='ignore')
+                        if len(st) > 0 and 'Traceback' in st:
+                            if "No such file or directory: 'dot': 'dot'" in st:
+                                # dot not installed, this part
+                                # is tested in onnx framework
+                                pass
+                            elif '"dot" not found in path.' in st:
+                                # dot not installed, this part
+                                # is tested in onnx framework
+                                pass
+                            elif "No module named 'xgboost'" in st:
+                                # xgboost not installed on CI
+                                pass
+                            elif ("cannot import name 'LightGbmModelContainer'"
+                                    " from 'onnxmltools.convert.common."
+                                    "_container'") in st:
+                                # onnxmltools not recent enough
+                                pass
+                            elif ('Please fix either the inputs or '
+                                    'the model.') in st:
+                                # onnxruntime datasets changed in master
+                                # branch, still the same in released
+                                # version on pypi
+                                pass
+                            elif 'dot: graph is too large' in st:
+                                # graph is too big
+                                pass
+                            else:
+                                raise RuntimeError(
+                                    "Example '{}' (cmd: {} - exec_prefix="
+                                    "'{}') failed due to\n{}"
+                                    "".format(name, cmds, sys.exec_prefix, st))
+                    finally:
+                        if sys.path[0] == fold:
+                            del sys.path[0]
+                    tested += 1
+        if tested == 0:
+            raise RuntimeError("No example was tested.")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 55132cc8398489f321f2dacf5ec2bcca8cd22b67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Mon, 12 Jul 2021 21:15:35 +0200
Subject: [PATCH 11/16] ut

---
 tests/model.onnx                      | Bin 0 -> 6737 bytes
 tests/test_documentation_examples1.py |   2 +-
 tests/test_documentation_examples2.py |   2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 tests/model.onnx

diff --git a/tests/model.onnx b/tests/model.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..283925c03b95ed1f938ac2fdd27250407e76e97d
GIT binary patch
literal 6737
zcmeGhYit}x`SyIzv3)sjoX9=$(g9U+O-$@0CbsU2FSZgj#!1@5rNNPV?49@QmG^cp
zyX)ALMx<&hq0$s2C{>b%m<Sb#w24#+e&E}LHvFJccoQBHs0AcYK@^Z8(IODc<HkF8
z=X2dyR6?ppp84*Z?>*nl?l&6=$q{=>?=}r%HWBRV?Csp13@chE`MufK?dyADC)$Am
zjymh07JE`DsaZ!gY}1mX1zmA4td1wbM^rkM<UpSfjiR7tl%^dya#FE0C8w*2(9yi2
zt5&iyrq-a4St!`517H5vw=?jG%Y&IYDU@nQL7Nc5P{;XxC>>o*uzjdvRA*5~Gssge
zBg3?cimpA34T&FhEX|lW#4%Q~4wKq|<h)|5+5DtpOsH8U?`Wox3=SWDaNm(RsVUWg
z+B7>$No?$?=4dC?4&R*Q$4+&l1#je5?iZnT5`jV7CuyIGBEMsHNQ~!<gai}isAEp4
z2DLUiQUR@`2d0`)pr}|=RAs6Sp`vDBQ!Qh<NF`!aw4{t@Pb&Jfnv_0BOkRf~RH`~#
zO12(XwmrOe+vxUfz1gvQ@N8de-Yj+&wY+88X2C)CAkPFDEi7VJBD|k1Xp#fi;m`Sz
zKq80CX$P-qV%13Dz<A-~YThv|?247FMWoiERYhesJ7Fostgadp&Lp#b4T=%GJ*Abh
zd2`xun4g4PGwR7(aWKqrHR&JOPaIv1S_~?Zvf3HEEa6lFwKAk;jH|OM#%839Khs3!
zHlvo3Y3f;<=7l{J%<GD64>g&FI@ILMm_q?<y*VdEQkzhCM%5-J9UFYU)BQCa$v_|4
ziW8Ow0EM*iLf&@dm{Y)zV`VFWX*xKqCrO8???*MWato&>DVsn-nhJ0V`%LDO{^4%2
zd}8A7MpYu@7|+3OmV`l)gk6he;Sp4wuc(GqmK{qm>;hJ-j^mZlr;A1+G-w)m#Yt91
zlA6k_X7kMMMT;=WQEt<Y1U097F>_9*kVhv6btR|ja%;&lb4pIr@t#xd#M%){RreXT
zTEvIPAXzu9py6C=fKP2i%{j8M>CUiW<l<A;y^b6#mn0OW>z~!eNtW;?;e27ukMo8#
zK1XtiH$LZI;C%P9OUfk~`@Do&nUMxgd}1h0<xD6O&P4DSs6Yf4q_}sUWps^rYi9{v
z5NoNXXZjbkRvYTzb$aKBHer#h1+WU%f>;%6A*_<MFm{Wz2zHsZXofeG;jKBtT1$qv
z)(mg4Or;S+A8+Cj<s1Cm*|TN%$Didr@W!QPfY{V)Wq9oukGt^plUH5%{>__Z41EO1
z&v_X8@EL$dFTF_UvT0bG{kse2&fZ0cF-gbRvt_v4c^lx`o5#}7^YShiF1PD0Ty44F
z!s+u@%J9wav;q9)>$!RO`6d-$)$T6=Jn`?LG+em#x(nMs<8$H7=VgFX=e}5mts8#`
z@V%x3Y53%=4fBxed)tKzCpNn9(Oatl4*YJ+g=em8apA_FP5|^@cm!bnsoiOK;L)GB
z@Wxv|D?{(M-b%x+8=o)3V_h!+%=K+b!v{hS&BOLzz3M`+<1rT=czUP|TjsCK!z+bf
zr(xtn8vs6b>8EM<)VkNp@ab1S3-Fh_kGl}rbgB$LQl0~d#vV$;UAZ<F*5B@OVP@{>
zd3g3KmJ2`V{c#z3zIHhc$0An&UO90P;BN&z4KLpOt_zP`oGZg$uir?M-M1b;cgZHE
zd)>}zhSRgNa#oX7`l=tvB;)ODLDBJpx03t;oHJz4L~u^9y%WJ16QD51aGDUMBUBu7
zXI!3~ccW`5uF!(b%$G(Uc=mD+7Nn9GUG$@PB}S^^Oj~r<k<*DtWj%vf%m~bzILl~O
z!o?)8o~=c-{%Wn<LXnGG$n9chtj8zva4h5J9CaE=gkA`u7>m%Ph978-y6Drq=HdSS
z&${=W_e1hO4+%LFulUhMmaat&zup)V?BykkQ#U_l<6&A|*x6#<dobS{jWFIm@*vJ5
z?jVly<S=NZTwGqY9D?QYxn?$sbey+BLH73-qNuqNF!HPzLIX$k9vZwiJG}4yDmXB_
zZ}0vg)-Q)<X-ZK-BgrCuX=x&5z2th#ok?pXXK87TxEkS?miFFwT5m%my`mQ)+=yPp
z*K1k6gym2!ho;^J(Lzx|Be)U0h_BbO9BHD2M)V@S5xt0C+7gjclq|v`&7DcB=MrR+
zi-YI<`2K;uxRRdLLG=CszlD-QiWF|-v?ImS|1ai_JRm2w7oPeukstl!(64S@sm&o?
zJbEK9BE69plitXSN^j)Fr8n{-(;IoQ>5aVT^hREO))t}l#b{koDi)`6e$>XYhdk_g
z29azb!6Es>pNk}a2rOb4jjvK0t&~fYvwU4kbMg0lo<ECw(9kE6LEi}CTR*zNDmOb1
zj@`Znq2|(c{FdtTv2N$=)_xfNt$9tr(OEh%bL$U#pr_~b9$~h8{*SKklM=t&y!$%D
z+kd|}9o6fpY-q;#j^pJ1kef!K_tp&R2i05SOs+7|4>TGc38nj|bgC|ND~?8c05233
zubZP{OdCd{Cd8DN+Drxfa@{U*&FL)dsJzA2W3i>6Zl%sVDT<<DT4@Z(8~r*IE19-K
z-r(77YBGHE@cxm5dym!J<;t<chYlUve4kcQb<I$>q&7sPc(@P!hyO_fRo`3id>}3*
z<34;jy=FF;;r|1btLV`+{DVsiLJ>(0As_Nb{G;vc<Bd$PkqTDIwV}$LYXpTNVcCy-
kx%H^}{*`iYIZXor<cl{CwBf5X@oPd{8i*m`hB_|&7s@dpbpQYW

literal 0
HcmV?d00001

diff --git a/tests/test_documentation_examples1.py b/tests/test_documentation_examples1.py
index 171afe2e..8b76954e 100644
--- a/tests/test_documentation_examples1.py
+++ b/tests/test_documentation_examples1.py
@@ -40,7 +40,7 @@ def test_documentation_examples1(self):
         found = os.listdir(fold)
         tested = 0
         for name in sorted(found):
-            if name.replace("\\", "/").split("/")[-1] >= "m":
+            if name >= "plot_u":
                 break
 
             if '-v' in sys.argv:
diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py
index ba6ab119..5eec8e18 100644
--- a/tests/test_documentation_examples2.py
+++ b/tests/test_documentation_examples2.py
@@ -40,7 +40,7 @@ def test_documentation_examples2(self):
         found = os.listdir(fold)
         tested = 0
         for name in sorted(found):
-            if name.replace("\\", "/").split("/")[-1] < "m":
+            if name < "plot_u":
                 continue
 
             if '-v' in sys.argv:

From 2e4aa2f1861732b6926e8c25ecaf3e3c7e90cadd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 13 Jul 2021 00:44:29 +0200
Subject: [PATCH 12/16] skip one test on circleci

---
 .gitignore                            |   1 +
 tests/model.onnx                      | Bin 6737 -> 0 bytes
 tests/test_documentation_examples2.py |   2 ++
 3 files changed, 3 insertions(+)
 delete mode 100644 tests/model.onnx

diff --git a/.gitignore b/.gitignore
index e64bd609..aa155f86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ tests/pipeline*.onnx
 temp_*
 examples/pipeline_lightgbm.onnx
 examples/model.onnx
+tests/model.onnx
diff --git a/tests/model.onnx b/tests/model.onnx
deleted file mode 100644
index 283925c03b95ed1f938ac2fdd27250407e76e97d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6737
zcmeGhYit}x`SyIzv3)sjoX9=$(g9U+O-$@0CbsU2FSZgj#!1@5rNNPV?49@QmG^cp
zyX)ALMx<&hq0$s2C{>b%m<Sb#w24#+e&E}LHvFJccoQBHs0AcYK@^Z8(IODc<HkF8
z=X2dyR6?ppp84*Z?>*nl?l&6=$q{=>?=}r%HWBRV?Csp13@chE`MufK?dyADC)$Am
zjymh07JE`DsaZ!gY}1mX1zmA4td1wbM^rkM<UpSfjiR7tl%^dya#FE0C8w*2(9yi2
zt5&iyrq-a4St!`517H5vw=?jG%Y&IYDU@nQL7Nc5P{;XxC>>o*uzjdvRA*5~Gssge
zBg3?cimpA34T&FhEX|lW#4%Q~4wKq|<h)|5+5DtpOsH8U?`Wox3=SWDaNm(RsVUWg
z+B7>$No?$?=4dC?4&R*Q$4+&l1#je5?iZnT5`jV7CuyIGBEMsHNQ~!<gai}isAEp4
z2DLUiQUR@`2d0`)pr}|=RAs6Sp`vDBQ!Qh<NF`!aw4{t@Pb&Jfnv_0BOkRf~RH`~#
zO12(XwmrOe+vxUfz1gvQ@N8de-Yj+&wY+88X2C)CAkPFDEi7VJBD|k1Xp#fi;m`Sz
zKq80CX$P-qV%13Dz<A-~YThv|?247FMWoiERYhesJ7Fostgadp&Lp#b4T=%GJ*Abh
zd2`xun4g4PGwR7(aWKqrHR&JOPaIv1S_~?Zvf3HEEa6lFwKAk;jH|OM#%839Khs3!
zHlvo3Y3f;<=7l{J%<GD64>g&FI@ILMm_q?<y*VdEQkzhCM%5-J9UFYU)BQCa$v_|4
ziW8Ow0EM*iLf&@dm{Y)zV`VFWX*xKqCrO8???*MWato&>DVsn-nhJ0V`%LDO{^4%2
zd}8A7MpYu@7|+3OmV`l)gk6he;Sp4wuc(GqmK{qm>;hJ-j^mZlr;A1+G-w)m#Yt91
zlA6k_X7kMMMT;=WQEt<Y1U097F>_9*kVhv6btR|ja%;&lb4pIr@t#xd#M%){RreXT
zTEvIPAXzu9py6C=fKP2i%{j8M>CUiW<l<A;y^b6#mn0OW>z~!eNtW;?;e27ukMo8#
zK1XtiH$LZI;C%P9OUfk~`@Do&nUMxgd}1h0<xD6O&P4DSs6Yf4q_}sUWps^rYi9{v
z5NoNXXZjbkRvYTzb$aKBHer#h1+WU%f>;%6A*_<MFm{Wz2zHsZXofeG;jKBtT1$qv
z)(mg4Or;S+A8+Cj<s1Cm*|TN%$Didr@W!QPfY{V)Wq9oukGt^plUH5%{>__Z41EO1
z&v_X8@EL$dFTF_UvT0bG{kse2&fZ0cF-gbRvt_v4c^lx`o5#}7^YShiF1PD0Ty44F
z!s+u@%J9wav;q9)>$!RO`6d-$)$T6=Jn`?LG+em#x(nMs<8$H7=VgFX=e}5mts8#`
z@V%x3Y53%=4fBxed)tKzCpNn9(Oatl4*YJ+g=em8apA_FP5|^@cm!bnsoiOK;L)GB
z@Wxv|D?{(M-b%x+8=o)3V_h!+%=K+b!v{hS&BOLzz3M`+<1rT=czUP|TjsCK!z+bf
zr(xtn8vs6b>8EM<)VkNp@ab1S3-Fh_kGl}rbgB$LQl0~d#vV$;UAZ<F*5B@OVP@{>
zd3g3KmJ2`V{c#z3zIHhc$0An&UO90P;BN&z4KLpOt_zP`oGZg$uir?M-M1b;cgZHE
zd)>}zhSRgNa#oX7`l=tvB;)ODLDBJpx03t;oHJz4L~u^9y%WJ16QD51aGDUMBUBu7
zXI!3~ccW`5uF!(b%$G(Uc=mD+7Nn9GUG$@PB}S^^Oj~r<k<*DtWj%vf%m~bzILl~O
z!o?)8o~=c-{%Wn<LXnGG$n9chtj8zva4h5J9CaE=gkA`u7>m%Ph978-y6Drq=HdSS
z&${=W_e1hO4+%LFulUhMmaat&zup)V?BykkQ#U_l<6&A|*x6#<dobS{jWFIm@*vJ5
z?jVly<S=NZTwGqY9D?QYxn?$sbey+BLH73-qNuqNF!HPzLIX$k9vZwiJG}4yDmXB_
zZ}0vg)-Q)<X-ZK-BgrCuX=x&5z2th#ok?pXXK87TxEkS?miFFwT5m%my`mQ)+=yPp
z*K1k6gym2!ho;^J(Lzx|Be)U0h_BbO9BHD2M)V@S5xt0C+7gjclq|v`&7DcB=MrR+
zi-YI<`2K;uxRRdLLG=CszlD-QiWF|-v?ImS|1ai_JRm2w7oPeukstl!(64S@sm&o?
zJbEK9BE69plitXSN^j)Fr8n{-(;IoQ>5aVT^hREO))t}l#b{koDi)`6e$>XYhdk_g
z29azb!6Es>pNk}a2rOb4jjvK0t&~fYvwU4kbMg0lo<ECw(9kE6LEi}CTR*zNDmOb1
zj@`Znq2|(c{FdtTv2N$=)_xfNt$9tr(OEh%bL$U#pr_~b9$~h8{*SKklM=t&y!$%D
z+kd|}9o6fpY-q;#j^pJ1kef!K_tp&R2i05SOs+7|4>TGc38nj|bgC|ND~?8c05233
zubZP{OdCd{Cd8DN+Drxfa@{U*&FL)dsJzA2W3i>6Zl%sVDT<<DT4@Z(8~r*IE19-K
z-r(77YBGHE@cxm5dym!J<;t<chYlUve4kcQb<I$>q&7sPc(@P!hyO_fRo`3id>}3*
z<34;jy=FF;;r|1btLV`+{DVsiLJ>(0As_Nb{G;vc<Bd$PkqTDIwV}$LYXpTNVcCy-
kx%H^}{*`iYIZXor<cl{CwBf5X@oPd{8i*m`hB_|&7s@dpbpQYW

diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py
index 5eec8e18..4fe88bf6 100644
--- a/tests/test_documentation_examples2.py
+++ b/tests/test_documentation_examples2.py
@@ -9,6 +9,7 @@
 import subprocess
 from datetime import datetime
 import onnxruntime
+from pyquickhelper.pycode import skipif_circleci
 
 
 def import_source(module_file_path, module_name):
@@ -27,6 +28,7 @@ def import_source(module_file_path, module_name):
 
 class TestDocumentationExample2(unittest.TestCase):
 
+    @skipif_circleci('too long')
     def test_documentation_examples2(self):
 
         this = os.path.abspath(os.path.dirname(__file__))

From 39a277832026aae110c0dd9852866affde3dc412 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 13 Jul 2021 01:21:54 +0200
Subject: [PATCH 13/16] Update plot_usparse_xgboost.py

---
 examples/plot_usparse_xgboost.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index b556f24c..2a1d185b 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -33,7 +33,7 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
-from sklearn.experimental import enable_hist_gradient_boosting 
+from sklearn.experimental import enable_hist_gradient_boosting  # pylint: disable=F401
 from sklearn.ensemble import (
     RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier
@@ -190,7 +190,7 @@ def td(a):
         if diff > 0.1:
             for i, (l1, l2) in enumerate(
                     zip(pipe.predict_proba(df),
-                    pred_onx['probabilities'])):
+                        pred_onx['probabilities'])):
                 d = numpy.abs(l1 - l2).sum()
                 if verbose and d > 0.1:
                     print("\nDISCREPENCY DETAILS")

From d9d563bf92834823c6106a6ac450ba3b1afc5831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 13 Jul 2021 01:32:09 +0200
Subject: [PATCH 14/16] Update plot_usparse_xgboost.py

---
 examples/plot_usparse_xgboost.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index 2a1d185b..40f71cca 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -33,7 +33,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
-from sklearn.experimental import enable_hist_gradient_boosting  # pylint: disable=F401
+from sklearn.experimental import (  # pylint: disable=F401
+    enable_hist_gradient_boosting)
 from sklearn.ensemble import (
     RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier

From 436cacdfd2980cc704151beca716fd8c580a2df7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 13 Jul 2021 01:41:56 +0200
Subject: [PATCH 15/16] lint

---
 .circleci/config.yml             | 12 ++++++------
 examples/plot_usparse_xgboost.py |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0e5eaa33..c0af75d8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -39,12 +39,6 @@ jobs:
             . venv/bin/activate
             python setup.py build_ext --inplace
 
-      - run:
-          name: run tests
-          command: |
-            . venv/bin/activate
-            coverage run  --omit=tests/test_*.py -m unittest discover tests -v
-
       - run:
           name: flake8
           command: |
@@ -53,6 +47,12 @@ jobs:
             python -m flake8 onnxcustom
             python -m flake8 examples
 
+      - run:
+          name: run tests
+          command: |
+            . venv/bin/activate
+            coverage run  --omit=tests/test_*.py -m unittest discover tests -v
+
       - run:
           name: coverage
           command: |
diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index 40f71cca..cdbad137 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -34,7 +34,7 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from sklearn.experimental import (  # pylint: disable=F401
-    enable_hist_gradient_boosting)
+    enable_hist_gradient_boosting)  # pylint: disable=F401
 from sklearn.ensemble import (
     RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier

From 9d7f4f2b5387cb5dc49f7b21efd5266ace755fb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 13 Jul 2021 01:43:37 +0200
Subject: [PATCH 16/16] Update plot_usparse_xgboost.py

---
 examples/plot_usparse_xgboost.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
index cdbad137..6b1860e4 100644
--- a/examples/plot_usparse_xgboost.py
+++ b/examples/plot_usparse_xgboost.py
@@ -33,8 +33,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
-from sklearn.experimental import (  # pylint: disable=F401
-    enable_hist_gradient_boosting)  # pylint: disable=F401
+from sklearn.experimental import (  # noqa
+    enable_hist_gradient_boosting)  # noqa
 from sklearn.ensemble import (
     RandomForestClassifier, HistGradientBoostingClassifier)
 from xgboost import XGBClassifier