diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5c9b0d87..c0af75d8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,7 +2,7 @@ version: 2
 jobs:
   build:
     docker:
-      - image: circleci/python:3.8.7
+      - image: circleci/python:3.9.5
     
     working_directory: ~/repo
     
@@ -39,12 +39,6 @@ jobs:
             . venv/bin/activate
             python setup.py build_ext --inplace
 
-      - run:
-          name: run tests
-          command: |
-            . venv/bin/activate
-            coverage run  --omit=tests/test_*.py -m unittest discover tests -v
-
       - run:
           name: flake8
           command: |
@@ -53,6 +47,12 @@ jobs:
             python -m flake8 onnxcustom
             python -m flake8 examples
 
+      - run:
+          name: run tests
+          command: |
+            . venv/bin/activate
+            coverage run  --omit=tests/test_*.py -m unittest discover tests -v
+
       - run:
           name: coverage
           command: |
diff --git a/.gitignore b/.gitignore
index e3ed7ec6..aa155f86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,5 @@ examples/squeezenet1.1-7.onnx
 tests/pipeline*.onnx
 temp_*
 examples/pipeline_lightgbm.onnx
+examples/model.onnx
+tests/model.onnx
diff --git a/README.rst b/README.rst
index 369d5986..6e9e8409 100644
--- a/README.rst
+++ b/README.rst
@@ -2,8 +2,8 @@
 .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg
     :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master
 
-.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master
-    :target: https://travis-ci.org/sdpython/onnxcustom
+.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master
+    :target: https://travis-ci.com/sdpython/onnxcustom
     :alt: Build status
 
 .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true
diff --git a/doc/index.rst b/doc/index.rst
index 08adc8a2..09c8d45f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -5,8 +5,8 @@ onnxcustom: deploy machine learned models
 .. image:: https://circleci.com/gh/sdpython/onnxcustom/tree/master.svg?style=svg
     :target: https://circleci.com/gh/sdpython/onnxcustom/tree/master
 
-.. image:: https://travis-ci.org/sdpython/onnxcustom.svg?branch=master
-    :target: https://travis-ci.org/sdpython/onnxcustom
+.. image:: https://travis-ci.com/sdpython/onnxcustom.svg?branch=master
+    :target: https://travis-ci.com/sdpython/onnxcustom
     :alt: Build status
 
 .. image:: https://ci.appveyor.com/api/projects/status/a3sn45a2fayoxb5q?svg=true
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 9f30cba5..34867134 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -16,6 +16,7 @@ involving operator not actually implemented in
     tutorial_1-5_external
     tutorial_2_new_converter
     tutorial_3_new_operator
+    tutorial_4_complex
 
 The tutorial was tested with following version:
 
diff --git a/doc/tutorial_4_complex.rst b/doc/tutorial_4_complex.rst
new file mode 100644
index 00000000..4d20ed74
--- /dev/null
+++ b/doc/tutorial_4_complex.rst
@@ -0,0 +1,10 @@
+
+Complex Scenarios
+=================
+
+Discrepencies may happen. Let's see some unexpected cases.
+
+.. toctree::
+    :maxdepth: 1
+
+    auto_examples/plot_usparse_xgboost
diff --git a/examples/plot_cbegin_opset.py b/examples/plot_cbegin_opset.py
index 7bc0d82b..0fa3bdcd 100644
--- a/examples/plot_cbegin_opset.py
+++ b/examples/plot_cbegin_opset.py
@@ -43,7 +43,7 @@
 
 X, y = make_blobs(n_samples=100, n_features=2)
 
-model = IsolationForest(3)
+model = IsolationForest(n_estimators=3)
 model.fit(X)
 labels = model.predict(X)
 
diff --git a/examples/plot_usparse_xgboost.py b/examples/plot_usparse_xgboost.py
new file mode 100644
index 00000000..6b1860e4
--- /dev/null
+++ b/examples/plot_usparse_xgboost.py
@@ -0,0 +1,301 @@
+"""
+.. _example-sparse-tfidf:
+
+TfIdf and sparse matrices
+=========================
+
+.. index:: xgboost, lightgbm, sparse, ensemble
+
+`TfidfVectorizer <https://scikit-learn.org/stable/modules/
+generated/sklearn.feature_extraction.text.TfidfVectorizer.html>`_
+usually creates sparse data. If the data is sparse enough, matrices
+usually stays as sparse all along the pipeline until the predictor
+is trained. Sparse matrices do not consider null and missing values
+as they are not present in the datasets. Because some predictors
+do the difference, this ambiguity may introduces discrepencies
+when converter into ONNX. This example looks into several configurations.
+
+.. contents::
+    :local:
+
+Imports, setups
++++++++++++++++
+
+All imports. It also registered onnx converters for :epgk:`xgboost`
+and :epkg:`lightgbm`.
+"""
+import warnings
+import numpy
+import pandas
+from tqdm import tqdm
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import load_iris
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from sklearn.experimental import (  # noqa
+    enable_hist_gradient_boosting)  # noqa
+from sklearn.ensemble import (
+    RandomForestClassifier, HistGradientBoostingClassifier)
+from xgboost import XGBClassifier
+from lightgbm import LGBMClassifier
+from skl2onnx.common.data_types import FloatTensorType, StringTensorType
+from skl2onnx import to_onnx, update_registered_converter
+from skl2onnx.sklapi import CastTransformer, ReplaceTransformer
+from skl2onnx.common.shape_calculator import (
+    calculate_linear_classifier_output_shapes)
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import (
+    convert_xgboost)
+from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+    convert_lightgbm)
+from mlprodict.onnxrt import OnnxInference
+
+
+update_registered_converter(
+    XGBClassifier, 'XGBoostXGBClassifier',
+    calculate_linear_classifier_output_shapes, convert_xgboost,
+    options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})
+update_registered_converter(
+    LGBMClassifier, 'LightGbmLGBMClassifier',
+    calculate_linear_classifier_output_shapes, convert_lightgbm,
+    options={'nocl': [True, False], 'zipmap': [True, False]})
+
+
+##########################################
+# Artificial datasets
+# +++++++++++++++++++++++++++
+#
+# Iris + a text column.
+
+cst = ['class zero', 'class one', 'class two']
+
+data = load_iris()
+X = data.data[:, :2]
+y = data.target
+
+df = pandas.DataFrame(X)
+df["text"] = [cst[i] for i in y]
+
+
+ind = numpy.arange(X.shape[0])
+numpy.random.shuffle(ind)
+X = X[ind, :].copy()
+y = y[ind].copy()
+
+
+##########################################
+# Train ensemble after sparse
+# +++++++++++++++++++++++++++
+#
+# The example use the Iris datasets with artifical text datasets
+# preprocessed with a tf-idf. `sparse_threshold=1.` avoids
+# sparse matrices to be converted into dense matrices.
+
+
+def make_pipelines(df_train, y_train, models=None,
+                   sparse_threshold=1., replace_nan=False,
+                   insert_replace=False, verbose=False):
+
+    if models is None:
+        models = [
+            RandomForestClassifier, HistGradientBoostingClassifier,
+            XGBClassifier, LGBMClassifier]
+
+    pipes = []
+    for model in tqdm(models):
+
+        if model == HistGradientBoostingClassifier:
+            kwargs = dict(max_iter=5)
+        elif model == XGBClassifier:
+            kwargs = dict(n_estimators=5, use_label_encoder=False)
+        else:
+            kwargs = dict(n_estimators=5)
+
+        if insert_replace:
+            pipe = Pipeline([
+                ('union', ColumnTransformer([
+                    ('scale1', StandardScaler(), [0, 1]),
+                    ('subject',
+                     Pipeline([
+                         ('count', CountVectorizer()),
+                         ('tfidf', TfidfTransformer()),
+                         ('repl', ReplaceTransformer()),
+                     ]), "text"),
+                ], sparse_threshold=sparse_threshold)),
+                ('cast', CastTransformer()),
+                ('cls', model(max_depth=3, **kwargs)),
+            ])
+        else:
+            pipe = Pipeline([
+                ('union', ColumnTransformer([
+                    ('scale1', StandardScaler(), [0, 1]),
+                    ('subject',
+                     Pipeline([
+                         ('count', CountVectorizer()),
+                         ('tfidf', TfidfTransformer())
+                     ]), "text"),
+                ], sparse_threshold=sparse_threshold)),
+                ('cast', CastTransformer()),
+                ('cls', model(max_depth=3, **kwargs)),
+            ])
+
+        try:
+            pipe.fit(df_train, y_train)
+        except TypeError as e:
+            obs = dict(model=model.__name__, pipe=pipe, error=e)
+            pipes.append(obs)
+            continue
+
+        options = {model: {'zipmap': False}}
+        if replace_nan:
+            options[TfidfTransformer] = {'nan': True}
+
+        # convert
+        with warnings.catch_warnings(record=False):
+            warnings.simplefilter("ignore", (FutureWarning, UserWarning))
+            model_onnx = to_onnx(
+                pipe,
+                initial_types=[('input', FloatTensorType([None, 2])),
+                               ('text', StringTensorType([None, 1]))],
+                target_opset=12, options=options)
+
+        with open('model.onnx', 'wb') as f:
+            f.write(model_onnx.SerializeToString())
+
+        oinf = OnnxInference(model_onnx)
+        inputs = {"input": df[[0, 1]].values.astype(numpy.float32),
+                  "text": df[["text"]].values}
+        pred_onx = oinf.run(inputs)
+
+        diff = numpy.abs(
+            pred_onx['probabilities'].ravel() -
+            pipe.predict_proba(df).ravel()).sum()
+
+        if verbose:
+            def td(a):
+                if hasattr(a, 'todense'):
+                    b = a.todense()
+                    ind = set(a.indices)
+                    for i in range(b.shape[1]):
+                        if i not in ind:
+                            b[0, i] = numpy.nan
+                    return b
+                return a
+
+            oinf = OnnxInference(model_onnx)
+            pred_onx2 = oinf.run(inputs)
+            diff2 = numpy.abs(
+                pred_onx2['probabilities'].ravel() -
+                pipe.predict_proba(df).ravel()).sum()
+
+        if diff > 0.1:
+            for i, (l1, l2) in enumerate(
+                    zip(pipe.predict_proba(df),
+                        pred_onx['probabilities'])):
+                d = numpy.abs(l1 - l2).sum()
+                if verbose and d > 0.1:
+                    print("\nDISCREPENCY DETAILS")
+                    print(d, i, l1, l2)
+                    pre = pipe.steps[0][-1].transform(df)
+                    print("idf", pre[i].dtype, td(pre[i]))
+                    pre2 = pipe.steps[1][-1].transform(pre)
+                    print("cas", pre2[i].dtype, td(pre2[i]))
+                    inter = oinf.run(inputs, intermediate=True)
+                    onx = inter['tfidftr_norm']
+                    print("onx", onx.dtype, onx[i])
+                    onx = inter['variable3']
+
+        obs = dict(model=model.__name__,
+                   discrepencies=diff,
+                   model_onnx=model_onnx, pipe=pipe)
+        if verbose:
+            obs['discrepency2'] = diff2
+        pipes.append(obs)
+
+    return pipes
+
+
+data_sparse = make_pipelines(df, y)
+stat = pandas.DataFrame(data_sparse).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+############################
+# Sparse data hurts.
+#
+# Dense data
+# ++++++++++
+#
+# Let's replace sparse data with dense by using `sparse_threshold=0.`
+
+
+data_dense = make_pipelines(df, y, sparse_threshold=0.)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+####################################
+# This is much better. Let's compare how the preprocessing
+# applies on the data.
+
+print("sparse")
+print(data_sparse[-1]['pipe'].steps[0][-1].transform(df)[:2])
+print()
+print("dense")
+print(data_dense[-1]['pipe'].steps[0][-1].transform(df)[:2])
+
+####################################
+# This shows `RandomForestClassifier
+# <https://scikit-learn.org/stable/modules/generated/
+# sklearn.ensemble.RandomForestClassifier.html>`_,
+# `XGBClassifier <https://xgboost.readthedocs.io/
+# en/latest/python/python_api.html>`_ do not process
+# the same way sparse and
+# dense matrix as opposed to `LGBMClassifier
+# <https://lightgbm.readthedocs.io/en/latest/
+# pythonapi/lightgbm.LGBMClassifier.html>`_.
+# And `HistGradientBoostingClassifier
+# <https://scikit-learn.org/stable/modules/generated/
+# sklearn.ensemble.HistGradientBoostingClassifier.html>`_
+# fails.
+#
+# Dense data with nan
+# +++++++++++++++++++
+#
+# Let's keep sparse data in the scikit-learn pipeline but
+# replace null values by nan in the onnx graph.
+
+data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=True)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+
+##############################
+# Dense, 0 replaced by nan
+# ++++++++++++++++++++++++
+#
+# Instead of using a specific options to replace null values
+# into nan values, a custom transformer called
+# ReplaceTransformer is explicitely inserted into the pipeline.
+# A new converter is added to the list of supported models.
+# It is equivalent to the previous options except it is
+# more explicit.
+
+data_dense = make_pipelines(df, y, sparse_threshold=1., replace_nan=False,
+                            insert_replace=True)
+stat = pandas.DataFrame(data_dense).drop(['model_onnx', 'pipe'], axis=1)
+if 'error' in stat.columns:
+    print(stat.drop('error', axis=1))
+stat
+
+######################################
+# Conclusion
+# ++++++++++
+#
+# Unless dense arrays are used, because :epkg:`onnxruntime`
+# ONNX does not support sparse yet, the conversion needs to be
+# tuned depending on the model which follows the TfIdf preprocessing.
diff --git a/requirements-dev.txt b/requirements-dev.txt
index de1ab1a6..f60e2f44 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -6,11 +6,10 @@ lightgbm
 loky
 matplotlib
 mlinsights
-mlprodict>=0.5
+mlprodict>=0.6
 nbsphinx
-onnx>=1.8.0
-git+https://github.com/xadupre/onnxconverter-common.git@jenkins
-onnxruntime>=1.6.0
+onnxconverter-common
+onnxruntime>=1.8.0
 pillow
 py-spy
 pandas
@@ -19,7 +18,7 @@ pyquickhelper>=1.10
 pytest
 pytest-cov
 scikit-learn>=0.24
-git+https://github.com/xadupre/sklearn-onnx.git@jenkins
+skl2onnx>=1.9.0
 sphinx
 sphinxcontrib-blockdiag
 sphinx-gallery
diff --git a/tests/test_documentation_examples.py b/tests/test_documentation_examples1.py
similarity index 97%
rename from tests/test_documentation_examples.py
rename to tests/test_documentation_examples1.py
index e6a2c9ce..8b76954e 100644
--- a/tests/test_documentation_examples.py
+++ b/tests/test_documentation_examples1.py
@@ -25,9 +25,9 @@ def import_source(module_file_path, module_name):
     return module_spec.loader.exec_module(module)
 
 
-class TestDocumentationExample(unittest.TestCase):
+class TestDocumentationExample1(unittest.TestCase):
 
-    def test_documentation_examples(self):
+    def test_documentation_examples1(self):
 
         this = os.path.abspath(os.path.dirname(__file__))
         onxc = os.path.normpath(os.path.join(this, '..'))
@@ -40,6 +40,8 @@ def test_documentation_examples(self):
         found = os.listdir(fold)
         tested = 0
         for name in sorted(found):
+            if name >= "plot_u":
+                break
 
             if '-v' in sys.argv:
                 if name.endswith('plot_bbegin_measure_time.py'):
diff --git a/tests/test_documentation_examples2.py b/tests/test_documentation_examples2.py
new file mode 100644
index 00000000..4fe88bf6
--- /dev/null
+++ b/tests/test_documentation_examples2.py
@@ -0,0 +1,120 @@
+"""
+Tests examples from the documentation.
+"""
+import unittest
+from distutils.version import StrictVersion
+import os
+import sys
+import importlib
+import subprocess
+from datetime import datetime
+import onnxruntime
+from pyquickhelper.pycode import skipif_circleci
+
+
+def import_source(module_file_path, module_name):
+    if not os.path.exists(module_file_path):
+        raise FileNotFoundError(module_file_path)
+    module_spec = importlib.util.spec_from_file_location(
+        module_name, module_file_path)
+    if module_spec is None:
+        raise FileNotFoundError(
+            "Unable to find '{}' in '{}', cwd='{}'.".format(
+                module_name, module_file_path,
+                os.path.abspath(__file__)))
+    module = importlib.util.module_from_spec(module_spec)
+    return module_spec.loader.exec_module(module)
+
+
+class TestDocumentationExample2(unittest.TestCase):
+
+    @skipif_circleci('too long')
+    def test_documentation_examples2(self):
+
+        this = os.path.abspath(os.path.dirname(__file__))
+        onxc = os.path.normpath(os.path.join(this, '..'))
+        pypath = os.environ.get('PYTHONPATH', None)
+        sep = ";" if sys.platform == 'win32' else ':'
+        pypath = "" if pypath in (None, "") else (pypath + sep)
+        pypath += onxc
+        os.environ['PYTHONPATH'] = pypath
+        fold = os.path.normpath(os.path.join(this, '..', 'examples'))
+        found = os.listdir(fold)
+        tested = 0
+        for name in sorted(found):
+            if name < "plot_u":
+                continue
+
+            if '-v' in sys.argv:
+                if name.endswith('plot_bbegin_measure_time.py'):
+                    if __name__ == "__main__":
+                        print("%s: skip %r" % (
+                            datetime.now().strftime("%d-%m-%y %H:%M:%S"),
+                            name))
+                    continue
+
+            with self.subTest(name=name):
+                if name.startswith("plot_") and name.endswith(".py"):
+                    if (name == "plot_pipeline_lightgbm.py" and
+                            StrictVersion(onnxruntime.__version__) <
+                                StrictVersion('1.0.0')):
+                        continue
+                    if __name__ == "__main__" or "-v" in sys.argv:
+                        print("%s: run %r" % (
+                            datetime.now().strftime("%d-%m-%y %H:%M:%S"),
+                            name))
+                    sys.path.insert(0, fold)
+                    try:
+                        mod = import_source(fold, os.path.splitext(name)[0])
+                        assert mod is not None
+                    except FileNotFoundError:
+                        # try another way
+                        cmds = [sys.executable, "-u",
+                                os.path.join(fold, name)]
+                        p = subprocess.Popen(
+                            cmds, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+                        res = p.communicate()
+                        out, err = res
+                        st = err.decode('ascii', errors='ignore')
+                        if len(st) > 0 and 'Traceback' in st:
+                            if "No such file or directory: 'dot': 'dot'" in st:
+                                # dot not installed, this part
+                                # is tested in onnx framework
+                                pass
+                            elif '"dot" not found in path.' in st:
+                                # dot not installed, this part
+                                # is tested in onnx framework
+                                pass
+                            elif "No module named 'xgboost'" in st:
+                                # xgboost not installed on CI
+                                pass
+                            elif ("cannot import name 'LightGbmModelContainer'"
+                                    " from 'onnxmltools.convert.common."
+                                    "_container'") in st:
+                                # onnxmltools not recent enough
+                                pass
+                            elif ('Please fix either the inputs or '
+                                    'the model.') in st:
+                                # onnxruntime datasets changed in master
+                                # branch, still the same in released
+                                # version on pypi
+                                pass
+                            elif 'dot: graph is too large' in st:
+                                # graph is too big
+                                pass
+                            else:
+                                raise RuntimeError(
+                                    "Example '{}' (cmd: {} - exec_prefix="
+                                    "'{}') failed due to\n{}"
+                                    "".format(name, cmds, sys.exec_prefix, st))
+                    finally:
+                        if sys.path[0] == fold:
+                            del sys.path[0]
+                    tested += 1
+        if tested == 0:
+            raise RuntimeError("No example was tested.")
+
+
+if __name__ == "__main__":
+    unittest.main()