diff --git a/.gitignore b/.gitignore
index 2441bb9..3efae15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,4 @@ _doc/examples/ort_cpu_ortvalue.csv
 _unittests/ut_documentation/data
 _unittests/ut_documentation/ort_*.csv
 _doc/examples/*splits*.png
+_doc/examples/eager*.png
diff --git a/_doc/examples/data/plot_benchmark_eager_mode.csv b/_doc/examples/data/plot_benchmark_eager_mode.csv
new file mode 100644
index 0000000..53be524
--- /dev/null
+++ b/_doc/examples/data/plot_benchmark_eager_mode.csv
@@ -0,0 +1,211 @@
+name,N,time
+numpy,1,1.2711102034680112e-06
+ort-eager,1,1.13358634176204e-05
+ort,1,6.808883180725622e-06
+ort-ov-eager,1,1.1388225820771748e-05
+ort-ov,1,6.520444517861111e-06
+ort-vect-ov-eager,1,1.2170951596653534e-05
+ort-vect-ov,1,8.38620085684813e-06
+ort-ov-bind-eager,1,1.3180973615906224e-05
+ort-ov-bind,1,8.696321750822233e-06
+ort-ov-eager-gpu,1,7.056212242768735e-05
+ort-ov-gpu,1,3.481692618334281e-05
+ort-vect-ov-eager-gpu,1,3.555464748193922e-05
+ort-vect-ov-gpu,1,2.4811498815044608e-05
+ort-ov-bind-eager-gpu,1,4.045496078652671e-05
+ort-ov-bind-gpu,1,2.7586736226665607e-05
+numpy,2,2.734601275551176e-06
+ort-eager,2,1.2075403671577034e-05
+ort,2,7.642062485847675e-06
+ort-ov-eager,2,1.221340531746166e-05
+ort-ov,2,7.38951311719538e-06
+ort-vect-ov-eager,2,1.2355767047175994e-05
+ort-vect-ov,2,9.376237369151918e-06
+ort-ov-bind-eager,2,1.3248451201744923e-05
+ort-ov-bind,2,9.651492869523173e-06
+ort-ov-eager-gpu,2,7.31769550059523e-05
+ort-ov-gpu,2,3.5939188217001456e-05
+ort-vect-ov-eager-gpu,2,3.82672866985782e-05
+ort-vect-ov-gpu,2,3.82050041403828e-05
+ort-ov-bind-eager-gpu,2,3.984901017352264e-05
+ort-ov-bind-gpu,2,2.7437667967271557e-05
+numpy,5,3.371258473438108e-06
+ort-eager,5,1.315288568274623e-05
+ort,5,7.733609667891108e-06
+ort-ov-eager,5,1.1391451651786244e-05
+ort-ov,5,7.110731346965438e-06
+ort-vect-ov-eager,5,1.2713854214334899e-05
+ort-vect-ov,5,8.936898459814504e-06
+ort-ov-bind-eager,5,1.3004758204068473e-05
+ort-ov-bind,5,9.634384796281332e-06
+ort-ov-eager-gpu,5,7.204147345595518e-05
+ort-ov-gpu,5,3.4798317110824925e-05
+ort-vect-ov-eager-gpu,5,3.641794677098572e-05
+ort-vect-ov-gpu,5,2.5500802780277797e-05
+ort-ov-bind-eager-gpu,5,3.897886624590421e-05
+ort-ov-bind-gpu,5,2.6711501113916796e-05
+numpy,10,3.99789578824459e-06
+ort-eager,10,1.2989837375196468e-05
+ort,10,8.63157538790818e-06
+ort-ov-eager,10,1.188740721009972e-05
+ort-ov,10,7.536177843282319e-06
+ort-vect-ov-eager,10,1.2986518222054369e-05
+ort-vect-ov,10,9.170969528794047e-06
+ort-ov-bind-eager,10,1.3924449184394586e-05
+ort-ov-bind,10,1.0086918655638783e-05
+ort-ov-eager-gpu,10,7.289471841246194e-05
+ort-ov-gpu,10,3.498931489656308e-05
+ort-vect-ov-eager-gpu,10,3.650360053679982e-05
+ort-vect-ov-gpu,10,2.4900201075085014e-05
+ort-ov-bind-eager-gpu,10,3.90811277641589e-05
+ort-ov-bind-gpu,10,2.6648614517192252e-05
+numpy,20,5.098671245506269e-06
+ort-eager,20,1.54399778226383e-05
+ort,20,9.723499106239505e-06
+ort-ov-eager,20,1.3414586822862827e-05
+ort-ov,20,8.937757405819314e-06
+ort-vect-ov-eager,20,1.453850413616068e-05
+ort-vect-ov,20,1.0608864048945493e-05
+ort-ov-bind-eager,20,1.5095967328085041e-05
+ort-ov-bind,20,1.1017649669708521e-05
+ort-ov-eager-gpu,20,7.634075496307745e-05
+ort-ov-gpu,20,3.63207728828633e-05
+ort-vect-ov-eager-gpu,20,3.689129719487773e-05
+ort-vect-ov-gpu,20,2.45928172943865e-05
+ort-ov-bind-eager-gpu,20,3.898421608087886e-05
+ort-ov-bind-gpu,20,2.8260121573261993e-05
+numpy,50,9.188749538155977e-06
+ort-eager,50,1.9810953781562276e-05
+ort,50,1.3596643890503427e-05
+ort-ov-eager,50,1.6675594745339046e-05
+ort-ov,50,1.19556604668197e-05
+ort-vect-ov-eager,50,1.8200560648235596e-05
+ort-vect-ov,50,1.3913243298719186e-05
+ort-ov-bind-eager,50,1.8310926928434794e-05
+ort-ov-bind,50,1.4871118216568608e-05
+ort-ov-eager-gpu,50,8.813127975112625e-05
+ort-ov-gpu,50,3.9916390796898574e-05
+ort-vect-ov-eager-gpu,50,3.693576212247193e-05
+ort-vect-ov-gpu,50,2.4740043233934736e-05
+ort-ov-bind-eager-gpu,50,3.9929252907388496e-05
+ort-ov-bind-gpu,50,2.6972319681159e-05
+numpy,100,1.5112248534907798e-05
+ort-eager,100,2.7329642718366113e-05
+ort,100,1.9632307746985428e-05
+ort-ov-eager,100,2.2391379095478763e-05
+ort-ov,100,1.6873115156259802e-05
+ort-vect-ov-eager,100,2.6304204571192804e-05
+ort-vect-ov,100,1.8689596296323967e-05
+ort-ov-bind-eager,100,2.377623776390361e-05
+ort-ov-bind,100,1.9281802418245318e-05
+ort-ov-eager-gpu,100,0.00010847913871082117
+ort-ov-gpu,100,4.537494692204859e-05
+ort-vect-ov-eager-gpu,100,3.7859098558298876e-05
+ort-vect-ov-gpu,100,2.502607185493015e-05
+ort-ov-bind-eager-gpu,100,4.014694452891318e-05
+ort-ov-bind-gpu,100,2.7043863977353597e-05
+numpy,200,2.917542704614346e-05
+ort-eager,200,4.151081535441569e-05
+ort,200,3.140525549455308e-05
+ort-ov-eager,200,3.316792525208345e-05
+ort-ov,200,2.6052282897360398e-05
+ort-vect-ov-eager,200,3.334714772805632e-05
+ort-vect-ov,200,2.832363693054924e-05
+ort-ov-bind-eager,200,3.367213000414696e-05
+ort-ov-bind,200,2.900986954031129e-05
+ort-ov-eager-gpu,200,0.00014148382873901602
+ort-ov-gpu,200,6.213097129367551e-05
+ort-vect-ov-eager-gpu,200,3.718186268147814e-05
+ort-vect-ov-gpu,200,2.4685012253127547e-05
+ort-ov-bind-eager-gpu,200,3.9644076090321376e-05
+ort-ov-bind-gpu,200,2.7330868070964348e-05
+numpy,500,6.52488797458863e-05
+ort-eager,500,8.870200997398021e-05
+ort,500,6.801239449073611e-05
+ort-ov-eager,500,6.0614623033606835e-05
+ort-ov,500,5.285868684601302e-05
+ort-vect-ov-eager,500,6.496559108347305e-05
+ort-vect-ov,500,5.4099669294624495e-05
+ort-ov-bind-eager,500,6.085498981571537e-05
+ort-ov-bind,500,5.60753888691455e-05
+ort-ov-eager-gpu,500,0.0002297830632507649
+ort-ov-gpu,500,0.00010358515326009876
+ort-vect-ov-eager-gpu,500,3.7701112708672274e-05
+ort-vect-ov-gpu,500,2.5178429152380514e-05
+ort-ov-bind-eager-gpu,500,4.01915926792862e-05
+ort-ov-bind-gpu,500,2.758014015853405e-05
+numpy,1000,0.00012219335462987602
+ort-eager,1000,0.0001355954237847054
+ort,1000,0.00011039443873431181
+ort-ov-eager,1000,8.403399988310412e-05
+ort-ov,1000,6.696581017376497e-05
+ort-vect-ov-eager,1000,6.78829272345523e-05
+ort-vect-ov,1000,6.176580714617249e-05
+ort-ov-bind-eager,1000,7.399409556026926e-05
+ort-ov-bind,1000,8.521743366293715e-05
+ort-ov-eager-gpu,1000,0.00037906445510571405
+ort-ov-gpu,1000,0.00011412395285309153
+ort-vect-ov-eager-gpu,1000,4.747279511276083e-05
+ort-vect-ov-gpu,1000,2.876006557321218e-05
+ort-ov-bind-eager-gpu,1000,4.042265625333283e-05
+ort-ov-bind-gpu,1000,2.8134765074655568e-05
+numpy,2000,0.0002388510924116914
+ort-eager,2000,0.00044227690687553297
+ort,2000,0.00020837497959660012
+ort-ov-eager,2000,0.00013327595543086043
+ort-ov,2000,0.00010776344534693932
+ort-vect-ov-eager,2000,0.0001355220409125456
+ort-vect-ov,2000,7.620716032499271e-05
+ort-ov-bind-eager,2000,0.00017507457412964056
+ort-ov-bind,2000,7.764993359129954e-05
+ort-ov-eager-gpu,2000,0.0006141934830035704
+ort-ov-gpu,2000,0.00018641628445784667
+ort-vect-ov-eager-gpu,2000,3.88771848275694e-05
+ort-vect-ov-gpu,2000,2.6443645550637557e-05
+ort-ov-bind-eager-gpu,2000,4.167438551815132e-05
+ort-ov-bind-gpu,2000,2.8127812820070603e-05
+numpy,5000,0.0006044526570335482
+ort-eager,5000,0.0008715660814956135
+ort,5000,0.00042021945334932547
+ort-ov-eager,5000,0.0001657667077476314
+ort-ov,5000,0.000347322947345674
+ort-vect-ov-eager,5000,0.00019283137195050083
+ort-vect-ov,5000,0.00017095919352986158
+ort-ov-bind-eager,5000,0.00017653256604124022
+ort-ov-bind,5000,0.00037077100113402684
+ort-ov-eager-gpu,5000,0.0010676379005114236
+ort-ov-gpu,5000,0.00037449517998886244
+ort-vect-ov-eager-gpu,5000,4.251485171897168e-05
+ort-vect-ov-gpu,5000,2.990845970852279e-05
+ort-ov-bind-eager-gpu,5000,4.457795402295021e-05
+ort-ov-bind-gpu,5000,3.0521600616767125e-05
+numpy,10000,0.0011958029187683547
+ort-eager,10000,0.003115397562699703
+ort,10000,0.0012505987010143222
+ort-ov-eager,10000,0.0003016990199482635
+ort-ov,10000,0.0002381296440338095
+ort-vect-ov-eager,10000,0.00032288288507102567
+ort-vect-ov,10000,0.0003793603486143226
+ort-ov-bind-eager,10000,0.000622660714379024
+ort-ov-bind,10000,0.00017427370838094048
+ort-ov-eager-gpu,10000,0.0021718234987929464
+ort-ov-gpu,10000,0.0008711783358683953
+ort-vect-ov-eager-gpu,10000,6.672728953785018e-05
+ort-vect-ov-gpu,10000,4.796367579114598e-05
+ort-ov-bind-eager-gpu,10000,9.261158346715901e-05
+ort-ov-bind-gpu,10000,4.8890200975750176e-05
+numpy,20000,0.0027364153356757015
+ort-eager,20000,0.0067275110429719735
+ort,20000,0.003118208863518455
+ort-ov-eager,20000,0.0005569194742877569
+ort-ov,20000,0.001740831701317802
+ort-vect-ov-eager,20000,0.0009374135284145412
+ort-vect-ov,20000,0.0009072552202269435
+ort-ov-bind-eager,20000,0.0008944365921813776
+ort-ov-bind,20000,0.002242563001345843
+ort-ov-eager-gpu,20000,0.00445372259709984
+ort-ov-gpu,20000,0.001619209784881345
+ort-vect-ov-eager-gpu,20000,9.884369165564959e-05
+ort-vect-ov-gpu,20000,8.044799324125051e-05
+ort-ov-bind-eager-gpu,20000,0.00011459709441458637
+ort-ov-bind-gpu,20000,7.989798905327916e-05
diff --git a/_doc/examples/plot_benchmark_eager_mode.py b/_doc/examples/plot_benchmark_eager_mode.py
index ab01136..d82fd1b 100644
--- a/_doc/examples/plot_benchmark_eager_mode.py
+++ b/_doc/examples/plot_benchmark_eager_mode.py
@@ -20,7 +20,8 @@
 It is possible to do the same with :epkg:`onnxruntime`.
 This example compares the performance of a couple of
 scenarios. This work is close to what is done in example
-:ref:`benchmark-ort-api`.
+:ref:`benchmark-ort-api`. The example compares the performance
+of a couple of methods for CPU and GPU.
 
 .. contents::
     :local:
@@ -46,10 +47,17 @@
     make_model, make_node,
     make_graph, make_tensor_value_info)
 from onnxruntime import (
-    get_all_providers, InferenceSession, __version__ as ort_version)
+    get_all_providers, InferenceSession, __version__ as ort_version,
+    RunOptions)
 from onnxruntime.capi._pybind_state import (  # pylint: disable=E0611
     OrtDevice as C_OrtDevice,
-    OrtMemType, OrtValue as C_OrtValue)
+    OrtMemType, OrtValue as C_OrtValue,
+    SessionIOBinding as C_SessionIOBinding)
+try:
+    from onnxruntime.capi._pybind_state import OrtValueVector
+except ImportError:
+    # You need onnxruntime>=1.14
+    OrtValueVector = None
 from mlprodict.testing.experimental_c_impl.experimental_c import code_optimisation
 
 ############################################
@@ -144,6 +152,70 @@ def f_ort_ov(X):
     return Z
 
 
+cpu_device = C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)
+
+
+def f_ort_ov_bind_eager(X):
+    "ort-ov-bind-eager"
+    bind = C_SessionIOBinding(sess_add._sess)
+    bind.bind_ortvalue_input("X", X)
+    bind.bind_output("Z", cpu_device)
+    sess_add._sess.run_with_iobinding(bind, None)
+    T = bind.get_outputs()[0]
+    bind.bind_ortvalue_input("X", T)
+    sess_add._sess.run_with_iobinding(bind, None)
+    return bind.get_outputs()[0]
+
+
+def f_ort_ov_bind(X):
+    "ort-ov-bind"
+    bind = C_SessionIOBinding(sess_add2._sess)
+    bind.bind_ortvalue_input("X", X)
+    bind.bind_output("Z", cpu_device)
+    sess_add2._sess.run_with_iobinding(bind, None)
+    return bind.get_outputs()[0]
+
+#######################################
+# onnxruntime >= 1.14 introduces a vector of OrtValues
+# to bypass the building of a dictionary.
+
+
+if OrtValueVector is not None:
+
+    run_options = RunOptions()
+    devices = [C_OrtDevice(C_OrtDevice.cpu(), OrtMemType.DEFAULT, 0)]
+
+    def f_ort_vect_ov_eager(X):
+        "ort-vect-ov-eager"
+        vect_in = OrtValueVector()
+        vect_in.push_back(X)
+        vect_out = OrtValueVector()
+        temp_vect_out = OrtValueVector()
+        sess_add._sess.run_with_ortvaluevector(
+            run_options, ["X"], vect_in, ["Z"], temp_vect_out, devices)
+        assert len(temp_vect_out) == 1
+        sess_add._sess.run_with_ortvaluevector(
+            run_options, ["X"], temp_vect_out, ["Z"], vect_out, devices)
+        assert len(vect_out) == 1
+        return vect_out[0]
+
+    def f_ort_vect_ov(X):
+        "ort-vect-ov"
+        vect_in = OrtValueVector()
+        vect_in.push_back(X)
+        vect_out = OrtValueVector()
+        sess_add2._sess.run_with_ortvaluevector(
+            run_options, ["X"], vect_in, ["Z"], vect_out, devices)
+        assert len(vect_out) == 1
+        return vect_out[0]
+
+else:
+    f_ort_vect_ov_eager = None
+    f_ort_vect_ov = None
+
+#########################################
+# If GPU is available.
+
 if sess_add_gpu is not None:
 
     def f_ort_ov_eager_gpu(X):
@@ -157,9 +229,71 @@ def f_ort_ov_gpu(X):
         Z = sess_add2_gpu._sess.run_with_ort_values({'X': X}, ['Z'], None)[0]
         return Z
 
+    gpu_device = C_OrtDevice(C_OrtDevice.cuda(), OrtMemType.DEFAULT, 0)
+
+    def f_ort_ov_bind_eager_gpu(X):
+        "ort-ov-bind-eager-gpu"
+        bind = C_SessionIOBinding(sess_add_gpu._sess)
+        bind.bind_ortvalue_input("X", X)
+        bind.bind_output("Z", gpu_device)
+        sess_add_gpu._sess.run_with_iobinding(bind, None)
+        T = bind.get_outputs()[0]
+        bind.bind_ortvalue_input("X", T)
+        sess_add_gpu._sess.run_with_iobinding(bind, None)
+        return bind.get_outputs()[0]
+
+    def f_ort_ov_bind_gpu(X):
+        "ort-ov-bind-gpu"
+        bind = C_SessionIOBinding(sess_add2_gpu._sess)
+        bind.bind_ortvalue_input("X", X)
+        bind.bind_output("Z", gpu_device)
+        sess_add2_gpu._sess.run_with_iobinding(bind, None)
+        return bind.get_outputs()[0]
+
+    if OrtValueVector is not None:
+
+        run_options = RunOptions()
+        devices = [C_OrtDevice(C_OrtDevice.cuda(), OrtMemType.DEFAULT, 0)]
+
+        def f_ort_vect_ov_eager_gpu(X):
+            "ort-vect-ov-eager-gpu"
+            vect_in = OrtValueVector()
+            vect_in.push_back(X)
+            vect_out = OrtValueVector()
+            temp_vect_out = OrtValueVector()
+            sess_add_gpu._sess.run_with_ortvaluevector(
+                run_options, ["X"], vect_in, ["Z"], temp_vect_out, devices)
+            sess_add_gpu._sess.run_with_ortvaluevector(
+                run_options, ["X"], temp_vect_out, ["Z"], vect_out, devices)
+            assert len(vect_out) == 1
+            return vect_out[0]
+
+        def f_ort_vect_ov_gpu(X):
+            "ort-vect-ov-gpu"
+            vect_in = OrtValueVector()
+            vect_in.push_back(X)
+            vect_out = OrtValueVector()
+            # crashes on the next line
+            sess_add2_gpu._sess.run_with_ortvaluevector(
+                run_options, ["X"], vect_in, ["Z"], vect_out, devices)
+            assert len(vect_out) == 1
+            return vect_out[0]
+
+    else:
+        f_ort_vect_ov_eager_gpu = None
+        f_ort_vect_ov_gpu = None
+
 else:
     f_ort_ov_eager_gpu = None
     f_ort_ov_gpu = None
+    f_ort_vect_ov_eager_gpu = None
+    f_ort_vect_ov_gpu = None
+    f_ort_ov_bind_eager_gpu = None
+    f_ort_ov_bind_gpu = None
+
+
+#######################################
+# Let's now check all these functions produces the same results.
 
 X = numpy.random.rand(10, CST.shape[1]).astype(CST.dtype)
 
@@ -167,33 +301,59 @@ def f_ort_ov_gpu(X):
 Xov = C_OrtValue.ortvalue_from_numpy(X, device)
 
 Ys = [
-    f_numpy(X),
-    f_ort_eager(X),
-    f_ort(X),
-    f_ort_ov_eager(Xov),
-    f_ort_ov(Xov),
+    (f_numpy, X),
+    (f_ort_eager, X),
+    (f_ort, X),
+    (f_ort_ov_eager, Xov),
+    (f_ort_ov, Xov),
+    (f_ort_ov_bind_eager, Xov),
+    (f_ort_ov_bind, Xov),
 ]
+
+if OrtValueVector is not None:
+    Ys.extend([
+        (f_ort_vect_ov_eager, Xov),
+        (f_ort_vect_ov, Xov),
+    ])
+
 if sess_add_gpu is not None:
     device_gpu = C_OrtDevice(C_OrtDevice.cuda(), OrtMemType.DEFAULT, 0)
     try:
         Xov_gpu = C_OrtValue.ortvalue_from_numpy(X, device_gpu)
         Ys.extend([
-            f_ort_ov_eager_gpu(Xov_gpu),
-            f_ort_ov_gpu(Xov_gpu),
+            (f_ort_ov_eager_gpu, Xov_gpu),
+            (f_ort_ov_gpu, Xov_gpu),
+            (f_ort_ov_bind_eager_gpu, Xov_gpu),
+            (f_ort_ov_bind_gpu, Xov_gpu),
         ])
+        if OrtValueVector is not None:
+            Ys.extend([
+                (f_ort_vect_ov_gpu, Xov_gpu),
+                (f_ort_vect_ov_eager_gpu, Xov_gpu),
+            ])
     except RuntimeError:
         # cuda is not available
         sess_add_gpu = None
-        sess_add2_gpu
+        sess_add2_gpu = None
         f_ort_ov_eager_gpu = None
         f_ort_ov_gpu = None
-
-for i in range(1, len(Ys)):
+        f_ort_ov_bind_eager_gpu = None
+        f_ort_ov_bind_gpu = None
+        f_ort_vect_ov_eager_gpu = None
+        f_ort_vect_ov_gpu = None
+
+results = []
+for fct, x in Ys:
+    print(
+        f"check function {fct.__name__!r} and input type {x.__class__.__name__!r}")
+    results.append(fct(x))
+
+for i in range(1, len(results)):
     try:
-        assert_allclose(Ys[0], Ys[i])
+        assert_allclose(results[0], results[i])
     except TypeError:
         # OrtValue
-        assert_allclose(Ys[0], Ys[i].numpy())
+        assert_allclose(results[0], results[i].numpy())
 
 ##########################################
 # All outputs are the same.
@@ -203,9 +363,15 @@ def f_ort_ov_gpu(X):
 # +++++++++++++++++++++++
 
 
-def benchmark(repeat=100):
-    fcts = [f_numpy, f_ort_eager, f_ort, f_ort_ov_eager, f_ort_ov,
-            f_ort_ov_eager_gpu, f_ort_ov_gpu]
+def benchmark(repeat=500000):
+    fcts = [
+        f_numpy, f_ort_eager, f_ort, f_ort_ov_eager, f_ort_ov,
+        f_ort_vect_ov_eager, f_ort_vect_ov,
+        f_ort_ov_bind_eager, f_ort_ov_bind,
+        f_ort_ov_eager_gpu, f_ort_ov_gpu,
+        f_ort_vect_ov_eager_gpu, f_ort_vect_ov_gpu,
+        f_ort_ov_bind_eager_gpu, f_ort_ov_bind_gpu,
+    ]
     data = []
     for N in tqdm([1, 2, 5, 10, 20, 50, 100, 200, 500,
                    1000, 2000, 5000, 10000, 20000]):
@@ -216,26 +382,27 @@ def benchmark(repeat=100):
             device_gpu = C_OrtDevice(C_OrtDevice.cuda(), OrtMemType.DEFAULT, 0)
             Xov_gpu = C_OrtValue.ortvalue_from_numpy(X, device_gpu)
 
+        r = min(500, int(repeat / N))
         for f in fcts:
             if f is None:
                 continue
             obs = {'name': f.__doc__, "N": N}
             if "-gpu" in f.__doc__:
                 begin = time.perf_counter()
-                for r in range(repeat):
+                for r in range(r):
                     _ = f(Xov_gpu)
                 end = time.perf_counter() - begin
             elif "-ov" in f.__doc__:
                 begin = time.perf_counter()
-                for r in range(repeat):
+                for r in range(r):
                     _ = f(Xov)
                 end = time.perf_counter() - begin
             else:
                 begin = time.perf_counter()
-                for r in range(repeat):
+                for r in range(r):
                     _ = f(X)
                 end = time.perf_counter() - begin
-            obs['time'] = end / repeat
+            obs['time'] = end / r
             data.append(obs)
 
     return pandas.DataFrame(data)
@@ -251,27 +418,53 @@ def benchmark(repeat=100):
 # ++++++
 
 def make_graph(df):
-    fig, ax = plt.subplots(2, 3, figsize=(12, 8))
+
+    def subgraph(row, cols):
+        if "numpy" not in cols:
+            cols.append("numpy")
+        piv = piv_all[cols].copy()
+        piv.plot(ax=ax[row, 0],
+                 title="Time execution(s)" if row == 0 else "",
+                 logy=True, logx=True)
+        piv2 = piv / piv.index.values.reshape((-1, 1))
+        piv2.plot(ax=ax[row, 1],
+                  title="Time(s) per execution / N" if row == 0 else "",
+                  logx=True)
+        piv3 = piv / piv["numpy"].values.reshape((-1, 1))
+        piv3.plot(ax=ax[row, 2],
+                  title="Ratio against numpy" if row == 0 else "",
+                  logy=True, logx=True)
+        for j in range(0, 3):
+            ax[row, j].legend(fontsize="x-small")
+
+    fig, ax = plt.subplots(5, 3, figsize=(15, 9))
+    fig.suptitle("Time execution Eager Add + Add - lower is better")
 
     piv_all = df.pivot(index="N", columns="name", values="time")
 
-    # no gpu
-    piv = piv_all[[c for c in piv_all.columns if "gpu" not in c]].copy()
-    piv.plot(ax=ax[0, 0], title="Time(s) per execution", logy=True, logx=True)
-    piv2 = piv / piv.index.values.reshape((-1, 1))
-    piv2.plot(ax=ax[0, 1], title="Time(s) per execution / N", logx=True)
-    piv3 = piv / piv["numpy"].values.reshape((-1, 1))
-    piv3.plot(ax=ax[0, 2], title="Ratio against numpy (lower is better)",
-              logy=True, logx=True)
-
-    # ort value
-    piv = piv_all[[c for c in piv_all.columns if "ov" in c or "numpy" in c]].copy()
-    piv.plot(ax=ax[1, 0], title="Time(s) per execution", logy=True, logx=True)
-    piv2 = piv / piv.index.values.reshape((-1, 1))
-    piv2.plot(ax=ax[1, 1], title="Time(s) per execution / N", logx=True)
-    piv3 = piv / piv["numpy"].values.reshape((-1, 1))
-    piv3.plot(ax=ax[1, 2], title="Ratio against numpy (lower is better)",
-              logy=True, logx=True)
+    # no gpu, no vect, no bind
+    subgraph(0, [c for c in piv_all.columns
+                 if "-gpu" not in c and "-vect" not in c and "-bind" not in c])
+
+    # no gpu, ov, no bind
+    subgraph(1, [c for c in piv_all.columns
+                 if "-gpu" not in c and "-ov" in c and "-bind" not in c])
+
+    # no gpu, vect or bind
+    subgraph(2, [c for c in piv_all.columns
+                 if "-gpu" not in c and ("-bind" in c or '-vect' in c)])
+
+    # gpu, no bind
+    cols = [c for c in piv_all.columns
+            if "-gpu" in c and "-ov" in c and "-bind" not in c]
+    subgraph(3, cols)
+
+    # gpu, vect or bind
+    cols = [c for c in piv_all.columns
+            if "-gpu" in c and ("-bind" in c or '-vect' in c)]
+    subgraph(4, cols)
+    fig.savefig("eager_mode_cpu.png" if len(cols) == 0
+                else "eager_mode_gpu.png", dpi=250)
     return fig, ax
 
 
@@ -286,7 +479,16 @@ def make_graph(df):
 # is using the direct python API. This could be improved by using :epkg:`cython`.
 # Eager mode must use :epkg:`OrtValue`. It is faster and it reduces the differences
 # between using two additions in a single graph or two graphs of a single addition
-# on CPU. On GPU, it is still faster but eager mode is significantly slower.
+# on CPU. On GPU, it is still faster but eager mode is slighly slower with
+# method `run_with_ortvaluevector` or `run_with_iobinding`. Both
+# methods show similar performances.
+#
+# However, method `run_with_ort_values` is not recommended
+# because the output device cannot be specified. Therefore,
+# :epkg:`onnxruntime` requests the output on CPU. On eager mode,
+# this output is used again an input for the second call to
+# `run_with_ort_values` and the data needs to be copied from CPU
+# to GPU.
 
 if not has_cuda:
     print("With GPU")
diff --git a/_doc/sphinxdoc/source/tutorials/tutorial_parallel/index.rst b/_doc/sphinxdoc/source/tutorials/tutorial_parallel/index.rst
index 57a3f61..9965c36 100644
--- a/_doc/sphinxdoc/source/tutorials/tutorial_parallel/index.rst
+++ b/_doc/sphinxdoc/source/tutorials/tutorial_parallel/index.rst
@@ -26,19 +26,18 @@ The tutorial was tested with following version:
 .. runpython::
     :showcode:
 
+    import sys
     import numpy
     import scipy
     import onnx
     import onnxruntime
     import onnxcustom
+    import sklearn
     import torch
 
     print("python {}".format(sys.version_info))
-    mods = [numpy, scipy, sklearn, lightgbm, xgboost,
-            onnx, onnxmltools, onnxruntime, onnxcustom,
-            onnxconverter_common,
-            skl2onnx, mlprodict, pyquickhelper,
-            torch]
+    mods = [numpy, scipy, sklearn, onnx,
+            onnxruntime, onnxcustom, torch]
     mods = [(m.__name__, m.__version__) for m in mods]
     mx = max(len(_[0]) for _ in mods) + 1
     for name, vers in sorted(mods):