From 64d31fda37b8bc341e0b22ca276d2402f4022b50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 21 Jul 2020 00:23:45 +0200
Subject: [PATCH 1/9] Fixes #142, add operator BatchNormalization

---
 .../ut_onnxrt/test_onnxrt_python_runtime_.py  | 43 +++++++++++++++++++
 mlprodict/__init__.py                         |  2 +-
 mlprodict/onnxrt/ops_cpu/_op_list.py          |  1 +
 .../onnxrt/ops_cpu/op_batch_normalization.py  | 36 ++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 mlprodict/onnxrt/ops_cpu/op_batch_normalization.py

diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
index 6fec65530..e52e04090 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
@@ -22,6 +22,7 @@
     from sklearn.utils.testing import ignore_warnings
 from skl2onnx.algebra.onnx_ops import (  # pylint: disable=E0611
     OnnxAbs, OnnxAdd, OnnxArgMax, OnnxArgMin,
+    OnnxBatchNormalization,
     OnnxConcat,
     OnnxCeil, OnnxClip, OnnxConstant, OnnxConstantOfShape,
     OnnxDequantizeLinear,
@@ -54,6 +55,7 @@
 from mlprodict.tools.asv_options_helper import (
     get_opset_number_from_onnx, get_ir_version_from_onnx)
 from mlprodict.onnxrt.validate.validate_python import validate_python_inference
+from mlprodict.onnxrt.ops_cpu.op_batch_normalization import _batchnorm_test_mode
 from mlprodict.onnxrt.ops_cpu._op_onnx_numpy import (  # pylint: disable=E0611
     topk_element_min_double, topk_element_max_double, topk_element_fetch_double,
     topk_element_min_float, topk_element_max_float, topk_element_fetch_float,
@@ -393,6 +395,47 @@ def test_onnxt_runtime_argmin_12(self):
         self.assertEqualArray(numpy.array([2, 1], dtype=numpy.int64),
                               got['Y'], decimal=6)
 
+    def test_onnxt_batch_normalization(self):
+        # input size: (1, 2, 1, 3)
+        x = numpy.array([[[[-1, 0, 1]], [[2, 3, 4]]]]).astype(numpy.float32)
+        s = numpy.array([1.0, 1.5]).astype(numpy.float32)
+        bias = numpy.array([0, 1]).astype(numpy.float32)
+        mean = numpy.array([0, 3]).astype(numpy.float32)
+        var = numpy.array([1, 1.5]).astype(numpy.float32)
+        y = _batchnorm_test_mode(x, s, bias, mean, var).astype(numpy.float32)
+
+        onx = OnnxBatchNormalization(
+            'X', s, bias, mean, var, output_names=['Y'],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        got = oinf.run({'X': x})
+        self.assertEqual(list(sorted(got)), ['Y'])
+        self.assertEqualArray(y, got['Y'])
+
+        # input size: (2, 3, 4, 5)
+        x = numpy.random.randn(2, 3, 4, 5).astype(numpy.float32)
+        s = numpy.random.randn(3).astype(numpy.float32)
+        bias = numpy.random.randn(3).astype(numpy.float32)
+        mean = numpy.random.randn(3).astype(numpy.float32)
+        var = numpy.random.rand(3).astype(numpy.float32)
+        epsilon = 1e-2
+        y = _batchnorm_test_mode(
+            x, s, bias, mean, var, epsilon).astype(numpy.float32)
+
+        onx = OnnxBatchNormalization(
+            'X', s, bias, mean, var,
+            output_names=['Y'], epsilon=epsilon,
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        got = oinf.run({'X': x})
+        self.assertEqual(list(sorted(got)), ['Y'])
+        self.assertEqualArray(y, got['Y'])
+        python_tested.append(OnnxBatchNormalization)
+
     def test_onnxt_runtime_ceil(self):
         self.common_test_onnxt_runtime_unary(OnnxCeil, numpy.ceil)
 
diff --git a/mlprodict/__init__.py b/mlprodict/__init__.py
index 2995fb794..50e5777b6 100644
--- a/mlprodict/__init__.py
+++ b/mlprodict/__init__.py
@@ -4,7 +4,7 @@
 @brief Ways to speed up predictions for a machine learned model.
 """
 
-__version__ = "0.4.1215"
+__version__ = "0.4.1207"
 __author__ = "Xavier Dupré"
 
 
diff --git a/mlprodict/onnxrt/ops_cpu/_op_list.py b/mlprodict/onnxrt/ops_cpu/_op_list.py
index 283352162..573d19227 100644
--- a/mlprodict/onnxrt/ops_cpu/_op_list.py
+++ b/mlprodict/onnxrt/ops_cpu/_op_list.py
@@ -11,6 +11,7 @@
 from .op_argmax import ArgMax
 from .op_argmin import ArgMin
 from .op_array_feature_extractor import ArrayFeatureExtractor
+from .op_batch_normalization import BatchNormalization
 from .op_binarizer import Binarizer
 from .op_cast import Cast
 from .op_cdist import CDist
diff --git a/mlprodict/onnxrt/ops_cpu/op_batch_normalization.py b/mlprodict/onnxrt/ops_cpu/op_batch_normalization.py
new file mode 100644
index 000000000..a7f6dd083
--- /dev/null
+++ b/mlprodict/onnxrt/ops_cpu/op_batch_normalization.py
@@ -0,0 +1,36 @@
+# -*- encoding: utf-8 -*-
+# pylint: disable=E0203,E1101,C0111
+"""
+@file
+@brief Runtime operator.
+"""
+import numpy
+from ._op import OpRun
+
+
+def _batchnorm_test_mode(x, s, bias, mean, var, epsilon=1e-5):
+    dims_x = len(x.shape)
+    dim_ones = (1,) * (dims_x - 2)
+    s = s.reshape(-1, *dim_ones)
+    bias = bias.reshape(-1, *dim_ones)
+    mean = mean.reshape(-1, *dim_ones)
+    var = var.reshape(-1, *dim_ones)
+    return s * (x - mean) / numpy.sqrt(var + epsilon) + bias
+
+
+class BatchNormalization(OpRun):
+
+    atts = {'epsilon': 1e-5, 'momentum': 0.9}
+
+    def __init__(self, onnx_node, desc=None, **options):
+        OpRun.__init__(self, onnx_node, desc=desc,
+                       expected_attributes=BatchNormalization.atts,
+                       **options)
+
+    def _run(self, x, scale, bias, mean, var):  # pylint: disable=W0221
+        res = _batchnorm_test_mode(
+            x, scale, bias, mean, var, epsilon=self.epsilon)
+        return (res, )
+
+    def _infer_shapes(self, x, scale, bias, mean, var):  # pylint: disable=W0221
+        return (x, )

From 74ad29fd73cec323253616556cc40007f8283b4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 21 Jul 2020 10:40:46 +0200
Subject: [PATCH 2/9] Fixes #143, add operator GlobalAveragePool

---
 .../ut_onnxrt/test_onnxrt_python_runtime_.py  | 37 ++++++++++++++++++-
 mlprodict/__init__.py                         |  2 +-
 mlprodict/onnxrt/ops_cpu/_op_list.py          |  1 +
 .../onnxrt/ops_cpu/op_global_average_pool.py  | 33 +++++++++++++++++
 4 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 mlprodict/onnxrt/ops_cpu/op_global_average_pool.py

diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
index e52e04090..bbdd667c7 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
@@ -29,7 +29,7 @@
     OnnxDiv,
     OnnxEinsum, OnnxEqual, OnnxErf, OnnxExp, OnnxEyeLike,
     OnnxFlatten, OnnxFloor,
-    OnnxGreater, OnnxGemm,
+    OnnxGreater, OnnxGemm, OnnxGlobalAveragePool,
     OnnxIdentity, OnnxIsNaN,
     OnnxLog, OnnxLpNormalization,
     OnnxMatMul, OnnxMax, OnnxMean, OnnxMin, OnnxMul,
@@ -56,6 +56,7 @@
     get_opset_number_from_onnx, get_ir_version_from_onnx)
 from mlprodict.onnxrt.validate.validate_python import validate_python_inference
 from mlprodict.onnxrt.ops_cpu.op_batch_normalization import _batchnorm_test_mode
+from mlprodict.onnxrt.ops_cpu.op_global_average_pool import _global_average_pool
 from mlprodict.onnxrt.ops_cpu._op_onnx_numpy import (  # pylint: disable=E0611
     topk_element_min_double, topk_element_max_double, topk_element_fetch_double,
     topk_element_min_float, topk_element_max_float, topk_element_fetch_float,
@@ -395,7 +396,7 @@ def test_onnxt_runtime_argmin_12(self):
         self.assertEqualArray(numpy.array([2, 1], dtype=numpy.int64),
                               got['Y'], decimal=6)
 
-    def test_onnxt_batch_normalization(self):
+    def test_onnxt_runtime_batch_normalization(self):
         # input size: (1, 2, 1, 3)
         x = numpy.array([[[[-1, 0, 1]], [[2, 3, 4]]]]).astype(numpy.float32)
         s = numpy.array([1.0, 1.5]).astype(numpy.float32)
@@ -899,6 +900,38 @@ def do_test_onnxt_runtime_gemm(self, runtime):
             self.assertEqualArray(numpy.dot(X, idi.T) +
                                   cst, got['Y'], decimal=6)
 
+    def test_onnxt_runtime_global_average_pool(self):
+        x = x = numpy.random.randn(1, 3, 5, 5).astype(numpy.float32)
+        y = _global_average_pool(x).astype(numpy.float32)
+
+        onx = OnnxGlobalAveragePool(
+            'X', output_names=['Y'],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        got = oinf.run({'X': x})
+        self.assertEqual(list(sorted(got)), ['Y'])
+        self.assertEqualArray(y, got['Y'])
+
+        x = numpy.array([[[
+            [1, 2, 3],
+            [4, 5, 6],
+            [7, 8, 9],
+        ]]]).astype(numpy.float32)
+        y = numpy.array([[[[5]]]]).astype(numpy.float32)
+        onx = OnnxGlobalAveragePool(
+            'X', output_names=['Y'],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        got = oinf.run({'X': x})
+        self.assertEqual(list(sorted(got)), ['Y'])
+        self.assertEqualArray(y, got['Y'])
+
+        python_tested.append(OnnxGlobalAveragePool)
+
     def test_onnxt_runtime_greater(self):
         self.common_test_onnxt_runtime_binary(OnnxGreater, numpy.greater)
 
diff --git a/mlprodict/__init__.py b/mlprodict/__init__.py
index 50e5777b6..dcded8681 100644
--- a/mlprodict/__init__.py
+++ b/mlprodict/__init__.py
@@ -4,7 +4,7 @@
 @brief Ways to speed up predictions for a machine learned model.
 """
 
-__version__ = "0.4.1207"
+__version__ = "0.4.1208"
 __author__ = "Xavier Dupré"
 
 
diff --git a/mlprodict/onnxrt/ops_cpu/_op_list.py b/mlprodict/onnxrt/ops_cpu/_op_list.py
index 573d19227..628ebb734 100644
--- a/mlprodict/onnxrt/ops_cpu/_op_list.py
+++ b/mlprodict/onnxrt/ops_cpu/_op_list.py
@@ -36,6 +36,7 @@
 from .op_gather import Gather
 from .op_gather_elements import GatherElements
 from .op_gemm import Gemm
+from .op_global_average_pool import GlobalAveragePool
 from .op_greater import Greater
 from .op_floor import Floor
 from .op_identity import Identity
diff --git a/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py b/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py
new file mode 100644
index 000000000..dfa6cce69
--- /dev/null
+++ b/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py
@@ -0,0 +1,33 @@
+# -*- encoding: utf-8 -*-
+# pylint: disable=E0203,E1101,C0111
+"""
+@file
+@brief Runtime operator.
+"""
+import numpy
+from ..shape_object import ShapeObject
+from ._op import OpRun
+
+
+def _global_average_pool(x):
+    spatial_shape = numpy.ndim(x) - 2
+    y = numpy.average(
+        x, axis=tuple(range(spatial_shape, spatial_shape + 2)))
+    for _ in range(spatial_shape):
+        y = numpy.expand_dims(y, -1)
+    return y
+
+
+class GlobalAveragePool(OpRun):
+
+    def __init__(self, onnx_node, desc=None, **options):
+        OpRun.__init__(self, onnx_node, desc=desc,
+                       **options)
+
+    def _run(self, x):  # pylint: disable=W0221
+        res = _global_average_pool(x)
+        return (res, )
+
+    def _infer_shapes(self, x):  # pylint: disable=W0221
+        shape = x.shape[:2] + (1, ) * (len(x.shape) - 2)
+        return (ShapeObject(shape, dtype=x.dtype), )

From 5a352e93f636d692913fa5597e04605e8b2f8651 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 21 Jul 2020 13:48:46 +0200
Subject: [PATCH 3/9] add runtime python_compiled_debug

---
 _unittests/ut_module/test_dl_mobilenet.py     | 47 ++++++++++++
 _unittests/ut_onnxrt/test_onnxrt_compiled.py  | 21 ++++++
 .../ut_onnxrt/test_onnxrt_python_runtime_.py  | 31 +++++++-
 mlprodict/__init__.py                         |  2 +-
 mlprodict/onnxrt/onnx_inference.py            | 59 +++++++++++----
 mlprodict/onnxrt/onnx_inference_node.py       |  2 +-
 mlprodict/onnxrt/ops_cpu/op_conv.py           | 14 +++-
 .../onnxrt/ops_cpu/op_global_average_pool.py  |  2 +
 mlprodict/onnxrt/ops_cpu/op_reshape.py        | 11 ++-
 mlprodict/onnxrt/shape_object.py              | 31 ++++++++
 mlprodict/onnxrt/validate/validate_python.py  |  4 +-
 mlprodict/tools/code_helper.py                | 74 ++++++++++++++++++-
 requirements.txt                              |  1 +
 13 files changed, 276 insertions(+), 23 deletions(-)
 create mode 100644 _unittests/ut_module/test_dl_mobilenet.py

diff --git a/_unittests/ut_module/test_dl_mobilenet.py b/_unittests/ut_module/test_dl_mobilenet.py
new file mode 100644
index 000000000..1147580cf
--- /dev/null
+++ b/_unittests/ut_module/test_dl_mobilenet.py
@@ -0,0 +1,47 @@
+"""
+@brief      test log(time=6s)
+"""
+import unittest
+import numpy
+from pyquickhelper.pycode import ExtTestCase
+from pyensae.datasource import download_data
+from mlprodict.onnxrt import OnnxInference
+
+
+class TestLONGMobileNet(ExtTestCase):
+
+    def test_mobilenet(self):
+        src = ("https://s3.amazonaws.com/onnx-model-zoo/mobilenet/"
+               "mobilenetv2-1.0/")
+        model_file = "mobilenetv2-1.0.onnx"
+        download_data(model_file, website=src)
+        X = numpy.random.rand(1, 3, 224, 224).astype(dtype=numpy.float32)
+        res = []
+        for i, rt in enumerate(['python', 'python_compiled_debug',
+                                'python_compiled', 'onnxruntime1']):
+            oinf = OnnxInference(model_file, runtime=rt)
+            self.assertNotEmpty(oinf)
+            self.assertEqual(oinf.input_names[:1], ['data'])
+            if hasattr(oinf, 'inits_'):
+                self.assertIn(
+                    "mobilenetv20_features_conv0_weight", oinf.inits_)
+                self.assertEqualArray(
+                    (0, -1), oinf.inits_["reshape_attr_tensor421"]['value'])
+            name = oinf.input_names[0]
+            out = oinf.output_names[0]
+            Y = oinf.run({name: X})
+            if any(map(numpy.isnan, Y[out].ravel())):
+                raise AssertionError(
+                    "Runtime {}:{} produces NaN.\n{}".format(i, rt, Y[out]))
+            res.append((rt, Y[out]))
+        for rt, r in res[1:]:
+            try:
+                self.assertEqual(r[0].shape, r.shape)
+                self.assertEqualArray(r[0], r)
+            except AssertionError as e:
+                raise AssertionError(
+                    "Issue with runtime: '{}'.".format(rt)) from e
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_onnxrt/test_onnxrt_compiled.py b/_unittests/ut_onnxrt/test_onnxrt_compiled.py
index 0171db0e1..759abad04 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_compiled.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_compiled.py
@@ -41,6 +41,27 @@ def test_onnxt_idi(self):
         self.assertIn('(Y, ) = n0_add(X, Ad_Addcst)', code)
         self.assertIn(' def compiled_run(dict_inputs):', str(oinf))
 
+    def test_onnxt_idi_debug(self):
+        idi = numpy.identity(2)
+        onx = OnnxAdd('X', idi, output_names=['Y'],
+                      op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)})
+
+        oinf = OnnxInference(model_def, runtime="python_compiled_debug")
+        res, out, err = self.capture(
+            lambda: oinf.run({'X': idi.astype(numpy.float32)}))
+        self.assertEmpty(err)
+        self.assertIn("-='i.X'", out)
+        self.assertIn("-='o.Y'", out)
+        self.assertEqual(idi * 2, res['Y'])
+        self.assertIn('_run_compiled', oinf.__dict__)
+        self.assertIn('_run_compiled_code', oinf.__dict__)
+        code = oinf._run_compiled_code  # pylint: disable=W0212,E1101
+        self.assertIsInstance(code, str)
+        self.assertIn('def compiled_run(dict_inputs):', code)
+        self.assertIn('(Y, ) = n0_add(X, Ad_Addcst)', code)
+        self.assertIn(' def compiled_run(dict_inputs):', str(oinf))
+
     @skipif_circleci('fails to finish')
     def test_onnxt_iris_adaboost_regressor_dt(self):
         iris = load_iris()
diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
index bbdd667c7..0e720264c 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
@@ -23,7 +23,7 @@
 from skl2onnx.algebra.onnx_ops import (  # pylint: disable=E0611
     OnnxAbs, OnnxAdd, OnnxArgMax, OnnxArgMin,
     OnnxBatchNormalization,
-    OnnxConcat,
+    OnnxConcat, OnnxConv,
     OnnxCeil, OnnxClip, OnnxConstant, OnnxConstantOfShape,
     OnnxDequantizeLinear,
     OnnxDiv,
@@ -546,6 +546,35 @@ def test_onnxt_runtime_constant_of_shape(self):
         oinfpy = OnnxInference(model_def, runtime="python", inplace=True)
         validate_python_inference(oinfpy, {'X': x})
 
+    def test_onnxt_runtime_conv(self):
+        x = numpy.array([[[[0., 1., 2., 3., 4.],  # (1, 1, 5, 5) input tensor
+                           [5., 6., 7., 8., 9.],
+                           [10., 11., 12., 13., 14.],
+                           [15., 16., 17., 18., 19.],
+                           [20., 21., 22., 23., 24.]]]]).astype(numpy.float32)
+        W = numpy.array([[[[1., 1., 1.],  # (1, 1, 3, 3) tensor for convolution weights
+                           [1., 1., 1.],
+                           [1., 1., 1.]]]]).astype(numpy.float32)
+
+        y_with_padding = numpy.array([[[[12., 21., 27., 33., 24.],  # (1, 1, 5, 5) output tensor
+                                        [33., 54., 63., 72., 51.],
+                                        [63., 99., 108., 117., 81.],
+                                        [93., 144., 153., 162., 111.],
+                                        [72., 111., 117., 123., 84.]]]]).astype(numpy.float32)
+
+        onx = OnnxConv(
+            'X', W, output_names=['Y'],
+            kernel_shape=[3, 3], pads=[1, 1, 1, 1],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        got = oinf.run({'X': x})
+        self.assertEqual(list(sorted(got)), ['Y'])
+        self.assertEqualArray(y_with_padding, got['Y'])
+
+        python_tested.append(OnnxConv)
+
     def test_onnxt_runtime_cum_sum(self):
         from skl2onnx.algebra.onnx_ops import OnnxCumSum  # pylint: disable=E0611
 
diff --git a/mlprodict/__init__.py b/mlprodict/__init__.py
index dcded8681..edf340154 100644
--- a/mlprodict/__init__.py
+++ b/mlprodict/__init__.py
@@ -4,7 +4,7 @@
 @brief Ways to speed up predictions for a machine learned model.
 """
 
-__version__ = "0.4.1208"
+__version__ = "0.4.1209"
 __author__ = "Xavier Dupré"
 
 
diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py
index e4a6d9460..9e26ffbb2 100644
--- a/mlprodict/onnxrt/onnx_inference.py
+++ b/mlprodict/onnxrt/onnx_inference.py
@@ -159,9 +159,9 @@ def _init(self):
         self.to_dot = self.exporters_.to_dot
         self.to_python = self.exporters_.to_python
 
-        if self.runtime == 'python_compiled':
+        if self.runtime in ('python_compiled', 'python_compiled_debug'):
             # switch the inference method to the compiled one
-            _, fct, code = self._build_compile_run()
+            _, fct, code = self._build_compile_run('debug' in self.runtime)
             setattr(self, '_run_compiled', fct)
             setattr(self, '_run_compiled_code', code)
             self._run = self._run_sequence_runtime_compiled
@@ -235,7 +235,8 @@ def input_names_shapes(self):
         Returns the names and shapes of all inputs.
         This method assumes all inputs are tensors.
         """
-        return [(_.name, _var_as_dict(_)['type']['shape']) for _ in self.obj.graph.input]
+        return [(_.name, _var_as_dict(_)['type']['shape'])
+                for _ in self.obj.graph.input]
 
     @property
     def input_names_shapes_types(self):
@@ -260,7 +261,8 @@ def output_names_shapes(self):
         Returns the names and shapes of all outputs.
         This method assumes all inputs are tensors.
         """
-        return [(_.name, _var_as_dict(_)['type']['shape']) for _ in self.obj.graph.output]
+        return [(_.name, _var_as_dict(_)['type']['shape'])
+                for _ in self.obj.graph.output]
 
     def global_index(self, name):
         """
@@ -962,15 +964,16 @@ def _guess_inplace(self, input_inplace=False):
 
         return inplaces
 
-    def _build_compile_run(self):
+    def _build_compile_run(self, debug=False):
         """
         Rewrite the run function in python,
         compiles it, and adds it as a method.
 
-        @return     method name, callable object
+        @param      debug       insert debugging code
+        @return                 method name, callable object
 
         .. exref::
-            :title: Run a model with runtime python_compile
+            :title: Run a model with runtime 'python_compiled'
 
             The following code trains a model and compute
             the predictions with runtime ``'python_compiled'``.
@@ -1008,17 +1011,41 @@ def _build_compile_run(self):
                 print(oinf2)
         """
         # inits
+        inputs = self.input_names
         code = ['def compiled_run(dict_inputs):']
+        if debug:
+            code.append("    printed = {}")
         context = {}
         for k, v in self.inits_.items():
-            context[k] = v['value']
-            code.append("    # init: {0}".format(k))
+            if k.startswith("_OPT_"):
+                raise RuntimeError(  # pragma: no cover
+                    "The runtime cannot handle any constant name "
+                    "starting with '_OPT_': '{}'.".format(k))
+            if k in inputs:
+                context["_OPT_" + k] = v['value']
+                code.append("    # init: _OPT_{0}".format(k))
+                if debug:
+                    code.append(
+                        "    debug_print('c.[_OPT_{0}]', _OPT_{0}, printed)".format(k))
+            else:
+                context[k] = v['value']
+                code.append("    # init: {0}".format(k))
+                if debug:
+                    code.append(
+                        "    debug_print('c.[{0}]', {0}, printed)".format(k))
 
         # method signature
-        inputs = self.input_names
         code.append("    # inputs")
         for inp in inputs:
-            code.append("    {0} = dict_inputs['{0}']".format(inp))
+            if '_OPT_' + inp in context:
+                # optional inputs
+                code.append(
+                    "    {0} = dict_inputs.get('{0}', _OPT_{0})".format(inp))
+            else:
+                code.append("    {0} = dict_inputs['{0}']".format(inp))
+            if debug:
+                code.append(
+                    "    debug_print('i.{0}', {0}, printed)".format(inp))
 
         # code
         for i, node in enumerate(self.sequence_):
@@ -1026,6 +1053,11 @@ def _build_compile_run(self):
             context[name] = node.ops_._run
             code.append('    ({1}, ) = {2}({0})'.format(
                 ', '.join(node.inputs), ', '.join(node.outputs), name))
+            if debug:
+                code.append("    print('''# {}''')".format(code[-1][4:]))
+                for o in node.outputs:
+                    code.append(
+                        "    debug_print('o.{0}', {0}, printed)".format(o))
 
         # return
         code.append('    return {')
@@ -1039,7 +1071,8 @@ def _build_compile_run(self):
         obj = compile(final_code, "<string>", 'exec')
         fcts_obj = [_ for _ in obj.co_consts
                     if _ is not None and not isinstance(_, (bool, str, int))]
-        fct = make_callable("compiled_run", fcts_obj[0], final_code, context)
+        fct = make_callable(
+            "compiled_run", fcts_obj[0], final_code, context, debug)
 
         # end
         return "compiled_run", fct, final_code
@@ -1054,6 +1087,6 @@ def reduce_size(self, pickable=False):
         del self.graph_
         if not pickable:
             del self.obj
-        if self.runtime == 'python_compiled':
+        if self.runtime in ('python_compiled', 'python_compiled_debug'):
             del self.sequence_
         gc.collect()
diff --git a/mlprodict/onnxrt/onnx_inference_node.py b/mlprodict/onnxrt/onnx_inference_node.py
index 17d3e0511..c305dd761 100644
--- a/mlprodict/onnxrt/onnx_inference_node.py
+++ b/mlprodict/onnxrt/onnx_inference_node.py
@@ -102,7 +102,7 @@ def setup_runtime(self, runtime=None, variables=None, rt_class=None,
             self.ops_ = load_op(self.onnx_node, desc=self.desc,
                                 options=options if options else None,
                                 variables=variables, dtype=dtype)
-        elif runtime == 'python_compiled':
+        elif runtime in ('python_compiled', 'python_compiled_debug'):
             options['provider'] = 'python'
             self.ops_ = load_op(self.onnx_node, desc=self.desc,
                                 options=options if options else None,
diff --git a/mlprodict/onnxrt/ops_cpu/op_conv.py b/mlprodict/onnxrt/ops_cpu/op_conv.py
index dece134a7..5c6b3f5f4 100644
--- a/mlprodict/onnxrt/ops_cpu/op_conv.py
+++ b/mlprodict/onnxrt/ops_cpu/op_conv.py
@@ -6,7 +6,7 @@
 """
 import numpy
 from ._op import OpRun
-# from ..shape_object import ShapeObject
+from ..shape_object import ShapeObjectFct
 from .op_conv_ import ConvFloat, ConvDouble  # pylint: disable=E0611
 
 
@@ -41,5 +41,13 @@ def _run(self, X, W, B=None):  # pylint: disable=W0221
         return (self.rt64_.compute(X, W, B), )
 
     def _infer_shapes(self, X, W, B=None):  # pylint: disable=W0221
-        raise NotImplementedError()  # pragma: no cover
-        # return (args[0].concat_columns(self.axis, *(args[1:])), )
+
+        def compute_shape(xshape, wshape, bshape):
+            xs = numpy.ones(xshape, dtype=numpy.float32)
+            ws = numpy.ones(wshape, dtype=numpy.float32)
+            bs = (numpy.ones(bshape, dtype=numpy.float32)
+                  if bshape is not None else None)
+            res = self.rt32_.compute(xs, ws, bs)
+            return res.shape
+
+        return (ShapeObjectFct(compute_shape, X, W, B, name="Conv", dtype=X.dtype), )
diff --git a/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py b/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py
index dfa6cce69..c93b63db9 100644
--- a/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py
+++ b/mlprodict/onnxrt/ops_cpu/op_global_average_pool.py
@@ -29,5 +29,7 @@ def _run(self, x):  # pylint: disable=W0221
         return (res, )
 
     def _infer_shapes(self, x):  # pylint: disable=W0221
+        if x.shape is None:
+            return (ShapeObject(None, dtype=x.dtype), )
         shape = x.shape[:2] + (1, ) * (len(x.shape) - 2)
         return (ShapeObject(shape, dtype=x.dtype), )
diff --git a/mlprodict/onnxrt/ops_cpu/op_reshape.py b/mlprodict/onnxrt/ops_cpu/op_reshape.py
index 222c1564d..544f2ec80 100644
--- a/mlprodict/onnxrt/ops_cpu/op_reshape.py
+++ b/mlprodict/onnxrt/ops_cpu/op_reshape.py
@@ -4,17 +4,26 @@
 @file
 @brief Runtime operator.
 """
+import numpy
 from ._op import OpRun
 from ..shape_object import ShapeObject
 
 
+def reshape_reference_implementation(data, shape):
+    new_shape = numpy.copy(shape)
+    zeros_index = numpy.where(shape == 0)
+    new_shape[zeros_index] = numpy.array(data.shape)[zeros_index]
+    reshaped = numpy.reshape(data, new_shape)
+    return reshaped
+
+
 class Reshape(OpRun):
 
     def __init__(self, onnx_node, desc=None, **options):
         OpRun.__init__(self, onnx_node, desc=desc, **options)
 
     def _run(self, data, shape):  # pylint: disable=W0221
-        return (data.reshape(shape), )
+        return (reshape_reference_implementation(data, shape), )
 
     def _infer_shapes(self, data, shape):  # pylint: disable=W0221
         return (ShapeObject(None, dtype=data.dtype), )
diff --git a/mlprodict/onnxrt/shape_object.py b/mlprodict/onnxrt/shape_object.py
index cf4465d2c..a524389fc 100644
--- a/mlprodict/onnxrt/shape_object.py
+++ b/mlprodict/onnxrt/shape_object.py
@@ -880,3 +880,34 @@ def gather_shape(input, indices, axis):
             shape.append(input[i])
 
         return ShapeObject(shape, dtype=input._dtype)
+
+
+class ShapeObjectFct(ShapeObject):
+    """
+    Computes a shape depending on a user defined function.
+    See @see cl Conv for an example.
+    """
+
+    def __init__(self, fct, *shapes, dtype=None, name=None):
+        """
+        @param      fct         function
+        @param      shapes      shapes sent to fct
+        @param      dtype       dtype
+        @param      name        optional, for debugging purposes
+        """
+        ShapeObject.__init__(self, None, dtype=dtype, name=name)
+        self._fct = fct
+        self._shapes = shapes
+
+    def evaluate(self, **kwargs):
+        """
+        Evaluates the shape.
+        """
+        vs = []
+        for v in self._shapes:
+            d = v.evaluate(**kwargs)
+            vs.append(d)
+        res = self._fct(*vs)
+        if self.name is not None:
+            res.name = self.name
+        return res
diff --git a/mlprodict/onnxrt/validate/validate_python.py b/mlprodict/onnxrt/validate/validate_python.py
index 0e4965906..052229a52 100644
--- a/mlprodict/onnxrt/validate/validate_python.py
+++ b/mlprodict/onnxrt/validate/validate_python.py
@@ -10,7 +10,7 @@
 from ...tools.code_helper import make_callable
 
 
-def _make_callable(fct, obj, code, gl):
+def _make_callable(fct, obj, code, gl, debug):
     """
     Same function as @see fn make_callable but deals with
     function which an undefined number of arguments.
@@ -20,7 +20,7 @@ def pyrt_Concat_(*inputs, axis=0):
 
     if fct == "pyrt_Concat":
         return pyrt_Concat_
-    return make_callable(fct, obj, code, gl)
+    return make_callable(fct, obj, code, gl, debug)
 
 
 def validate_python_inference(oinf, inputs, tolerance=0.):
diff --git a/mlprodict/tools/code_helper.py b/mlprodict/tools/code_helper.py
index 4a13dd8ba..e67e5a4ea 100644
--- a/mlprodict/tools/code_helper.py
+++ b/mlprodict/tools/code_helper.py
@@ -20,7 +20,72 @@ def change_style(name):
     return s2 if not keyword.iskeyword(s2) else s2 + "_"
 
 
-def make_callable(fct, obj, code, gl):
+def numpy_min_max(x, fct):
+    """
+    Returns the minimum of an array.
+    Deals with text as well.
+    """
+    try:
+        if hasattr(x, 'todense'):
+            x = x.todense()
+        if x.dtype.kind.lower() not in 'uc':
+            return fct(x)
+        try:  # pragma: no cover
+            x = x.ravel()
+        except AttributeError:  # pragma: no cover
+            pass
+        keep = list(filter(lambda s: isinstance(s, str), x))
+        if len(keep) == 0:  # pragma: no cover
+            return numpy.nan
+        keep.sort()
+        val = keep[0]
+        if len(val) > 10:  # pragma: no cover
+            val = val[:10] + '...'
+        return "%r" % val
+    except (ValueError, TypeError):
+        return '?'
+
+
+def numpy_min(x):
+    """
+    Returns the maximum of an array.
+    Deals with text as well.
+    """
+    return numpy_min_max(x, lambda x: x.min())
+
+
+def numpy_max(x):
+    """
+    Returns the maximum of an array.
+    Deals with text as well.
+    """
+    return numpy_min_max(x, lambda x: x.max())
+
+
+def debug_print(k, obj, printed):
+    """
+    Displays informations on an object.
+
+    @param      k       name
+    @param      obj     object
+    @param      printed memorizes already printed object
+    """
+    if k not in printed:
+        printed[k] = obj
+        if hasattr(obj, 'shape'):
+            print("-='{}' shape={} dtype={} min={} max={}{}".format(
+                  k, obj.shape, obj.dtype, numpy_min(obj),
+                  numpy_max(obj),
+                  ' (sparse)' if 'coo_matrix' in str(type(obj)) else ''))
+        elif (isinstance(obj, list) and len(obj) > 0 and
+                not isinstance(obj[0], dict)):
+            print("-='{}' list len={} min={} max={}".format(
+                  k, len(obj), min(obj), max(obj)))
+        else:
+            print("-='{}' type={}".format(k, type(obj)))
+
+
+def make_callable(fct, obj, code, gl, debug):
     """
     Creates a callable function able to
     cope with default values as the combination
@@ -31,6 +96,7 @@ def make_callable(fct, obj, code, gl):
     @param      obj     output of function *compile*
     @param      code    code including the signature
     @param      gl      context (local and global)
+    @param      debug   add debug function
     @return             callable functions
     """
     cst = "def " + fct + "("
@@ -62,6 +128,12 @@ def make_callable(fct, obj, code, gl):
         if int(f) == f:
             f = int(f)
         defs.append((name, f))
+
+    # debug
+    if debug:
+        gl = gl.copy()
+        gl['debug_print'] = debug_print
+        gl['print'] = print
     # specific
     if "value=array([0.], dtype=float32)" in sig:
         defs.append(('value', numpy.array([0.], dtype=numpy.float32)))
diff --git a/requirements.txt b/requirements.txt
index b1ef85cfa..346309b5e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,6 +11,7 @@ onnx>=1.7.0
 git+https://github.com/xadupre/onnxconverter-common.git@jenkins
 openpyxl
 pybind11
+pyensae
 py-cpuinfo
 pyinstrument
 pylint>=2.4.2

From b787581a0dd296614ca53a298baafbea6397f4b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 21 Jul 2020 15:52:48 +0200
Subject: [PATCH 4/9] pep8

---
 mlprodict/onnxrt/onnx2py_helper.py           | 4 ++--
 mlprodict/onnxrt/validate/validate_python.py | 2 +-
 mlprodict/tools/code_helper.py               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlprodict/onnxrt/onnx2py_helper.py b/mlprodict/onnxrt/onnx2py_helper.py
index 561ebd848..310a108f5 100644
--- a/mlprodict/onnxrt/onnx2py_helper.py
+++ b/mlprodict/onnxrt/onnx2py_helper.py
@@ -321,7 +321,7 @@ def numpy_min(x):
     try:
         if hasattr(x, 'todense'):
             x = x.todense()
-        if x.dtype.kind.lower() not in 'uc':
+        if x.dtype.kind.lower() not in 'c':
             return x.min()
         try:  # pragma: no cover
             x = x.ravel()
@@ -347,7 +347,7 @@ def numpy_max(x):
     try:
         if hasattr(x, 'todense'):
             x = x.todense()
-        if x.dtype.kind.lower() not in 'uc':
+        if x.dtype.kind.lower() not in 'c':
             return x.max()
         try:  # pragma: no cover
             x = x.ravel()
diff --git a/mlprodict/onnxrt/validate/validate_python.py b/mlprodict/onnxrt/validate/validate_python.py
index 052229a52..e0c5496f6 100644
--- a/mlprodict/onnxrt/validate/validate_python.py
+++ b/mlprodict/onnxrt/validate/validate_python.py
@@ -71,7 +71,7 @@ def validate_python_inference(oinf, inputs, tolerance=0.):
                 continue
             sobj = str(obj)
             if '<string>' in sobj and fct in sobj:
-                fcts_local[fct] = _make_callable(fct, obj, code, gl)
+                fcts_local[fct] = _make_callable(fct, obj, code, gl, False)
 
     gl.update(fcts_local)
     loc = inputs
diff --git a/mlprodict/tools/code_helper.py b/mlprodict/tools/code_helper.py
index e67e5a4ea..b3bc103a1 100644
--- a/mlprodict/tools/code_helper.py
+++ b/mlprodict/tools/code_helper.py
@@ -28,7 +28,7 @@ def numpy_min_max(x, fct):
     try:
         if hasattr(x, 'todense'):
             x = x.todense()
-        if x.dtype.kind.lower() not in 'uc':
+        if x.dtype.kind.lower() not in 'c':
             return fct(x)
         try:  # pragma: no cover
             x = x.ravel()

From 7701e0aac61e5ab8d576b2354f202da1298aa956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Wed, 22 Jul 2020 17:32:16 +0200
Subject: [PATCH 5/9] Update test_onnxrt_python_runtime_custom.py

---
 _unittests/ut_onnxrt/test_onnxrt_python_runtime_custom.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_custom.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_custom.py
index e08d92364..c91c575f8 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_custom.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_custom.py
@@ -90,6 +90,7 @@ def test_onnxt_runtime_solve(self):
                 validate_python_inference(
                     oinfpy, {'A': A.astype(numpy.float32),
                              'Y': Y.astype(numpy.float32)})
+                python_tested.append(OnnxSolve)
 
 
 if __name__ == "__main__":

From e69d73a26ca4aac311ab3dcca3ccb644878b5950 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Tue, 28 Jul 2020 19:00:24 +0200
Subject: [PATCH 6/9] style and unit test

---
 _doc/sphinxdoc/source/conf.py                       | 2 +-
 _doc/sphinxdoc/source/phdoc_static/my-styles.css    | 2 +-
 _unittests/ut_onnxrt/test_onnxrt_python_runtime_.py | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py
index 7487b80f5..67f76cb3d 100644
--- a/_doc/sphinxdoc/source/conf.py
+++ b/_doc/sphinxdoc/source/conf.py
@@ -16,7 +16,7 @@
 try:
     import generate_visual_graphs
     import generate_automated_pages
-except ImportError:
+except ImportError:  # pragma: no cover
     this = os.path.dirname(__file__)
     sys.path.append(os.path.join(this, '_exts'))
     import generate_visual_graphs
diff --git a/_doc/sphinxdoc/source/phdoc_static/my-styles.css b/_doc/sphinxdoc/source/phdoc_static/my-styles.css
index 9839a8e33..785aa4d5e 100644
--- a/_doc/sphinxdoc/source/phdoc_static/my-styles.css
+++ b/_doc/sphinxdoc/source/phdoc_static/my-styles.css
@@ -13,7 +13,7 @@ div.body ul {
 }
 
 div.body li {
-    line-height: 1em;
+    line-height: 1.1em;
 }
 
 .wy-nav-top {
diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
index 0e9916d90..649a1b930 100644
--- a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
+++ b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
@@ -400,7 +400,9 @@ def test_onnxt_runtime_atan(self):
         self.common_test_onnxt_runtime_unary(OnnxAtan, numpy.arctan)
 
     def test_onnxt_runtime_atan2(self):
-        test_pairs = [[y, x] for x in [3., -4., 0.] for y in [5., -6., 0.]]
+        test_pairs = [[y, x]
+                      for x in [3., -4., 0., -1., 1.]
+                      for y in [5., -6., 0., -1., 1.]]
         y_val = numpy.array([y for y, x in test_pairs], dtype=numpy.float32)
         x_val = numpy.array([x for y, x in test_pairs], dtype=numpy.float32)
 

From 95b9c7decd782c40b6c3bf27df3297bafc6d3382 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Wed, 29 Jul 2020 17:00:10 +0200
Subject: [PATCH 7/9] Fixes #150, fix missing initialization of output result

---
 _unittests/ut_module/test_dl_mobilenet.py     |  8 ++-
 _unittests/ut_onnxrt/test_cpu_ops.py          | 36 +++++++++++-
 _unittests/ut_tools/test_code_helper.py       | 35 ++++++++++++
 mlprodict/onnxrt/onnx_inference.py            |  2 +-
 mlprodict/onnxrt/ops_cpu/op_conv.py           |  3 +-
 mlprodict/onnxrt/ops_cpu/op_conv_.cpp         | 57 ++++++++++---------
 .../onnxrt/ops_cpu/op_conv_matrices_.hpp      | 28 +++++----
 mlprodict/tools/code_helper.py                | 47 +++++++++++++++
 8 files changed, 173 insertions(+), 43 deletions(-)
 create mode 100644 _unittests/ut_tools/test_code_helper.py

diff --git a/_unittests/ut_module/test_dl_mobilenet.py b/_unittests/ut_module/test_dl_mobilenet.py
index 1147580cf..a2572b013 100644
--- a/_unittests/ut_module/test_dl_mobilenet.py
+++ b/_unittests/ut_module/test_dl_mobilenet.py
@@ -1,5 +1,5 @@
 """
-@brief      test log(time=6s)
+@brief      test log(time=7s)
 """
 import unittest
 import numpy
@@ -35,9 +35,11 @@ def test_mobilenet(self):
                     "Runtime {}:{} produces NaN.\n{}".format(i, rt, Y[out]))
             res.append((rt, Y[out]))
         for rt, r in res[1:]:
+            exp = numpy.squeeze(r[0])
+            got = numpy.squeeze(r)
             try:
-                self.assertEqual(r[0].shape, r.shape)
-                self.assertEqualArray(r[0], r)
+                self.assertEqual(exp.shape, got.shape)
+                self.assertEqualArray(got, exp)
             except AssertionError as e:
                 raise AssertionError(
                     "Issue with runtime: '{}'.".format(rt)) from e
diff --git a/_unittests/ut_onnxrt/test_cpu_ops.py b/_unittests/ut_onnxrt/test_cpu_ops.py
index 94a7b4a37..7f8e3b574 100644
--- a/_unittests/ut_onnxrt/test_cpu_ops.py
+++ b/_unittests/ut_onnxrt/test_cpu_ops.py
@@ -1,13 +1,17 @@
 """
-@brief      test log(time=2s)
+@brief      test log(time=3s)
 """
 import unittest
 from logging import getLogger
 import numpy
 import onnx
 from pyquickhelper.pycode import ExtTestCase
+from skl2onnx.algebra.onnx_ops import (  # pylint: disable=E0611
+    OnnxConv)
 from mlprodict.onnxrt.ops_cpu.op_conv import Conv
 from mlprodict.onnxrt.onnx2py_helper import _var_as_dict
+from mlprodict.tools.asv_options_helper import get_opset_number_from_onnx
+from mlprodict.onnxrt import OnnxInference
 
 
 class TestCpuOps(ExtTestCase):
@@ -46,6 +50,36 @@ def test_cpu_conv(self):
                              [72., 111., 117., 123., 84.]]]]).astype(numpy.float32)
         self.assertEqualArray(exp, got)
 
+    def test_cpu_conv_init(self):
+        x = numpy.random.rand(1, 96, 56, 56).astype(numpy.float32)
+        W = numpy.random.rand(24, 96, 1, 1).astype(numpy.float32)
+
+        onx = OnnxConv(
+            'X', W, output_names=['Y'],
+            auto_pad='NOTSET', group=1, dilations=[1, 1],
+            kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32),
+                                 'W': W.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        oinfrt = OnnxInference(model_def, runtime='onnxruntime1')
+        for _ in range(0, 3):
+            x = numpy.random.rand(1, 96, 56, 56).astype(numpy.float32)
+            W = numpy.random.rand(24, 96, 1, 1).astype(numpy.float32)
+            got = oinf.run({'X': x, 'W': W})
+            gotrt = oinfrt.run({'X': x, 'W': W})
+            diff = list(numpy.abs((gotrt['Y'] - got['Y']).ravel()))
+            sdiff = list(sorted(diff))
+            if sdiff[-1] > 1e-5:
+                raise AssertionError("runtimes disagree {}".format(sdiff[-5:]))
+            for ii in range(len(diff)):  # pylint: disable=C0200
+                if numpy.isnan(diff[ii]):
+                    raise AssertionError(
+                        "runtimes disagree about nan {}: {} # {} ? {}".format(
+                            ii, diff[ii], gotrt['Y'].ravel()[ii], got['Y'].ravel()[ii]))
+            self.assertEqualArray(gotrt['Y'], got['Y'], decimal=5)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/_unittests/ut_tools/test_code_helper.py b/_unittests/ut_tools/test_code_helper.py
new file mode 100644
index 000000000..1dc2f4fbe
--- /dev/null
+++ b/_unittests/ut_tools/test_code_helper.py
@@ -0,0 +1,35 @@
+"""
+@brief      test log(time=8s)
+"""
+import os
+import unittest
+import pickle
+import numpy
+from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from mlprodict.tools.code_helper import debug_print, debug_dump
+
+
+class TestCodeHelper(ExtTestCase):
+
+    def test_debug_print(self):
+        _, out, err = self.capture(
+            lambda: debug_print('r', numpy.array([0, 1], dtype=numpy.float32), {}))
+        self.assertIn("'r'", out)
+        self.assertEmpty(err)
+
+    def test_debug_dump(self):
+        temp = get_temp_folder(__file__, "temp_debug_dump")
+        obj = {'in': [numpy.array([0, 1]), numpy.array([1, 2])],
+               'out': [numpy.array([0, numpy.nan])]}
+        _, out, __ = self.capture(
+            lambda: debug_dump("rrr", obj, temp))
+        self.assertIn("NAN-notin-out", out)
+        files = os.listdir(temp)
+        self.assertEqual(len(files), 1)
+        with open(os.path.join(temp, files[0]), 'rb') as f:
+            obj2 = pickle.load(f)
+        self.assertEqual(list(obj.keys()), list(obj2.keys()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py
index d5b202455..63f348ce6 100644
--- a/mlprodict/onnxrt/onnx_inference.py
+++ b/mlprodict/onnxrt/onnx_inference.py
@@ -1066,7 +1066,7 @@ def clean_name(name):
                 for o in node.outputs:
                     code.append(
                         "    debug_print('o.{0}', {1}, printed)".format(
-                                clean_name(o), o))
+                            clean_name(o), o))
 
         # return
         code.append('    return {')
diff --git a/mlprodict/onnxrt/ops_cpu/op_conv.py b/mlprodict/onnxrt/ops_cpu/op_conv.py
index 5c6b3f5f4..5b2bcfff6 100644
--- a/mlprodict/onnxrt/ops_cpu/op_conv.py
+++ b/mlprodict/onnxrt/ops_cpu/op_conv.py
@@ -50,4 +50,5 @@ def compute_shape(xshape, wshape, bshape):
             res = self.rt32_.compute(xs, ws, bs)
             return res.shape
 
-        return (ShapeObjectFct(compute_shape, X, W, B, name="Conv", dtype=X.dtype), )
+        return (ShapeObjectFct(
+            compute_shape, X, W, B, name="Conv", dtype=X.dtype), )
diff --git a/mlprodict/onnxrt/ops_cpu/op_conv_.cpp b/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
index 5996e306f..3ae775d28 100644
--- a/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
+++ b/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
@@ -93,7 +93,7 @@ void Conv<T>::init(
     array2vector(pads_, pads, int64_t);
     array2vector(strides_, strides, int64_t);
 }
- 
+
 
 template<typename T>
 void Conv<T>::compute_kernel_shape(const std::vector<int64_t>& weight_shape,
@@ -101,11 +101,13 @@ void Conv<T>::compute_kernel_shape(const std::vector<int64_t>& weight_shape,
     if (kernel_shape_.size() > 0) {
         kernel_shape = kernel_shape_;
         if (kernel_shape.size() + 2 != weight_shape.size())
-            throw std::runtime_error("kernel_shape num_dims is not compatible with W num_dims (1).");
+            throw std::runtime_error(
+                "kernel_shape num_dims is not compatible with W num_dims (1).");
 
         for (size_t i = 0; i < kernel_shape.size(); ++i)
             if (kernel_shape[i] != weight_shape[i + 2])
-                throw std::runtime_error("kernel_shape num_dims is not compatible with W num_dims (2).");
+                throw std::runtime_error(
+                    "kernel_shape num_dims is not compatible with W num_dims (2).");
     }
     else {
         auto& weight_dims = weight_shape;
@@ -116,19 +118,19 @@ void Conv<T>::compute_kernel_shape(const std::vector<int64_t>& weight_shape,
 
 template<typename T>
 py::array_t<T> Conv<T>::compute(py::array_t<T> X, py::array_t<T> W, py::array_t<T> B) const {
-    
+
     std::vector<int64_t> x_dims;
     arrayshape2vector(x_dims, X);
     std::vector<int64_t> w_dims;
     arrayshape2vector(w_dims, W);
-    
+
     const int64_t N = x_dims[0];
     // const int64_t C = x_dims[1];
     const int64_t M = w_dims[0];
 
     std::vector<int64_t> kernel_shape;
     compute_kernel_shape(w_dims, kernel_shape);
-    
+
     std::vector<int64_t> pads(pads_);
     if (pads.empty())
         pads.resize(kernel_shape.size() * 2, 0);
@@ -146,7 +148,7 @@ py::array_t<T> Conv<T>::compute(py::array_t<T> X, py::array_t<T> W, py::array_t<
     std::vector<int64_t> input_shape(x_dims.begin() + 2, x_dims.end());
     infer_output_shape(input_shape, kernel_shape, strides, dilations, pads, y_dims);
     std::vector<int64_t> output_shape(y_dims.begin() + 2, y_dims.end());
-    
+
     // py::array::ShapeContainer shape(y_dims);
     // auto total_size = flattened_dimension(y_dims);
     py::array_t<T> Y(y_dims);
@@ -173,13 +175,13 @@ void Conv<T>::infer_output_shape(
 
     size_t rank = input_shape.size();
     int64_t dim_size;
-                  
+
     for (size_t dim = 0; dim < rank; ++dim) {
         if (dim >= strides_p.size() || dim >= kernel_shape.size() ||
                 dim >= dilations_p.size() || dim >= pads_p.size() ||
                 rank + dim >= pads_p.size())
-            throw std::runtime_error("Failure.");
-        
+            throw std::runtime_error("Failure in infer_output_shape.");
+
         dim_size = 0;
         ComputePadAndOutputShape<T>(
             input_shape[dim], strides_p[dim], kernel_shape[dim],
@@ -187,7 +189,7 @@ void Conv<T>::infer_output_shape(
             &pads_p.at(input_shape.size() + dim),
             &dim_size, ForceSymmetricAutoPadding);
         if (dim_size <= 0)
-            throw std::runtime_error("Invalid argument.");
+            throw std::runtime_error("Invalid argument in infer_output_shape.");
         output_shape.push_back(dim_size);
     }
 }
@@ -216,6 +218,7 @@ void Conv<T>::compute_gil_free(
             
     const int64_t input_image_size = flattened_dimension(input_shape);
     const int64_t output_image_size = flattened_dimension(output_shape);
+    const int64_t y_size = flattened_dimension(y_dims);
     const int64_t kernel_size = flattened_dimension(kernel_shape);
     const int64_t X_offset = C / group_ * input_image_size;
     const int64_t Y_offset = flattened_dimension(y_dims) / y_dims[0] / group_;
@@ -228,6 +231,10 @@ void Conv<T>::compute_gil_free(
  
     const T* Xdata = X.data(0);
     T* Ydata = (T*)Y.data(0);
+    T* yptr;
+    size_t k2;
+
+    std::fill(Ydata, Ydata + y_size, (T)0);
 
     std::vector<int64_t> image_shape(x_dims.begin() + 1, x_dims.end());
     std::vector<int64_t> col_buffer_shape{kernel_dim};
@@ -237,7 +244,7 @@ void Conv<T>::compute_gil_free(
     const size_t kernel_rank = kernel_shape.size();
 
     for (int image_id = 0; image_id < N; ++image_id) {
-            for (int group_id = 0; group_id < group_; ++group_id) {
+        for (int group_id = 0; group_id < group_; ++group_id) {
             if (kernel_rank == 2) {
                 Im2col_NCHW<T>(
                     Xdata + group_id * X_offset,
@@ -274,20 +281,18 @@ void Conv<T>::compute_gil_free(
             gemm<T>(
                 false,
                 false,
-                M / group_,  // m
-                output_image_size,  // n
-                kernel_dim,  // k
-                1, // alpha
-                W.data(0) + group_id * W_offset, // *a
-                col_buffer_data, // *b
-                0,  // beta
+                (size_t)(M / group_),  // m
+                (size_t)(output_image_size),  // n
+                (size_t)kernel_dim,  // k
+                (T)1, // alpha
+                (const T*)W.data(0) + group_id * W_offset, // *a
+                (const T*)col_buffer_data, // *b
+                (T)0,  // beta
                 (T*)Ydata + group_id * Y_offset // *c
             );
         }
 
         if (b_dims.size() != 0 && b_dims[0] != 0) {
-            T* yptr;
-            size_t k2;
             const T* ptrb = B.data(0);
             for(size_t k = 0; k < (size_t)M; ++k, ++ptrb) {
                 yptr = Ydata + k;
@@ -298,7 +303,7 @@ void Conv<T>::compute_gil_free(
 
         Xdata += X_offset * group_;
         Ydata += Y_offset * group_;
-    }
+    }    
 }
 
 
@@ -328,7 +333,7 @@ PYBIND11_MODULE(op_conv_, m) {
 in :epkg:`onnxruntime`.)pbdoc"
     #endif
     ;
-    
+
     py::class_<ConvFloat> clf (m, "ConvFloat",
         R"pbdoc(Implements float runtime for operator Conv. The code is inspired from
 `conv.cc <https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/cpu/nn/conv.cc>`_
@@ -338,8 +343,8 @@ in :epkg:`onnxruntime`. Supports float only.)pbdoc");
     clf.def("init", &ConvFloat::init,
             "Initializes the runtime with the ONNX attributes.");
     clf.def("compute", &ConvFloat::compute,
-            "Computes the output for operator Conv.");    
-    
+            "Computes the output for operator Conv.");
+
     py::class_<ConvDouble> cld (m, "ConvDouble",
         R"pbdoc(Implements float runtime for operator Conv. The code is inspired from
 `conv.cc <https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/cpu/nn/conv.cc>`_
@@ -349,7 +354,7 @@ in :epkg:`onnxruntime`. Supports double only.)pbdoc");
     cld.def("init", &ConvDouble::init,
             "Initializes the runtime with the ONNX attributes.");
     cld.def("compute", &ConvDouble::compute,
-            "Computes the output for operator Conv.");    
+            "Computes the output for operator Conv.");
 }
 
 #endif
diff --git a/mlprodict/onnxrt/ops_cpu/op_conv_matrices_.hpp b/mlprodict/onnxrt/ops_cpu/op_conv_matrices_.hpp
index b0285d883..37e89581d 100644
--- a/mlprodict/onnxrt/ops_cpu/op_conv_matrices_.hpp
+++ b/mlprodict/onnxrt/ops_cpu/op_conv_matrices_.hpp
@@ -136,11 +136,11 @@ void Im2col_NCHW(
         int64_t stride_h, int64_t stride_w, T* data_col,
         T padding_value = 0) {
     const int64_t output_h =
-        (height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1)) / stride_h +
-        1;
+        (height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1))
+        / stride_h + 1;
     const int64_t output_w =
-        (width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1)) / stride_w +
-        1;
+        (width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1))
+        / stride_w + 1;
   
     // Fast path for zero padding and no dilation
     // From Torch, THNN_(unfolded_copy)
@@ -236,7 +236,8 @@ void ComputePadAndOutputShape(
             case AutoPadType::SAME_UPPER:
             case AutoPadType::SAME_LOWER: {
                 if (dilation != 1)
-                    throw std::runtime_error("Dilation not supported for AutoPadType::SAME_UPPER or AutoPadType::SAME_LOWER.");
+                    throw std::runtime_error(
+                        "Dilation not supported for AutoPadType::SAME_UPPER or AutoPadType::SAME_LOWER.");
                 int64_t legacy_target_size = (in_dim + stride - 1) / stride;
                 int64_t pad_needed = (legacy_target_size - 1) * stride + kernel - in_dim;
                 *out_dim = (in_dim + pad_needed - dkernel) / stride + 1;
@@ -251,32 +252,37 @@ void ComputePadAndOutputShape(
                 *pad_tail = pad_needed - *pad_head;
                 } break;
             default:
-                throw std::runtime_error("Invalid argument.");
+                throw std::runtime_error("Invalid argument in ComputePadAndOutputShape.");
         }
     }
 }
 
 
-
+// The function adds value to C, assuming this array
+// was initialized.
 template <typename NTYPE>
 void gemm(bool transA, bool transB,
           size_t M, size_t N, size_t K, NTYPE alpha,
           const NTYPE* A, const NTYPE* B, NTYPE beta,
           NTYPE* C) {
+
     if (!transA && !transB) {
         // a A B + b C, dimension = M * N
         NTYPE* begin;
         register NTYPE val;
-        size_t i, j, k;
+        NTYPE val0;
+        size_t i, j, k, maxc=0;
         const NTYPE *pA, *pB;
         for(i = 0, begin = C; i < M; ++i) {
             for(j = 0; j < N; ++j, ++begin) {
-                val = *begin * beta;
+                val0 = *begin * beta;
+                val = 0;
                 pA = A + i * K;
                 pB = B + j;
                 for(k = K; k > 0; --k, ++pA, pB += N)
-                    val += *pA * *pB * alpha;
-                *begin = val;
+                    val += *pA * *pB;
+                *begin = val0 + val * alpha;
+                maxc = maxc > (size_t)(begin - C) ? maxc : (size_t)(begin - C);
             }
         }
         return;
diff --git a/mlprodict/tools/code_helper.py b/mlprodict/tools/code_helper.py
index b3bc103a1..72b8874df 100644
--- a/mlprodict/tools/code_helper.py
+++ b/mlprodict/tools/code_helper.py
@@ -2,6 +2,7 @@
 @file
 @brief A couple of tools unrelated to what the package does.
 """
+import pickle
 import keyword
 import re
 import types
@@ -62,6 +63,52 @@ def numpy_max(x):
     return numpy_min_max(x, lambda x: x.max())
 
 
+def debug_dump(clname, obj, folder=None, ops=None):
+    """
+    Dumps an object for debug purpose.
+
+    @param      obj     object
+    @param      folder  folder
+    @return             filename
+    """
+    def debug_print_(obj, prefix=''):
+        name = clname
+        if isinstance(obj, dict):
+            if 'in' in obj and 'out' in obj:
+                nan_in = any(map(lambda o: any(map(numpy.isnan, o.ravel())),
+                                 obj['in']))
+                nan_out = any(map(lambda o: any(map(numpy.isnan, o.ravel())),
+                                  obj['out']))
+                if not nan_in and nan_out:
+                    print("NAN-notin-out ", name, prefix,
+                          {k: getattr(ops, k, '?') for k in getattr(ops, 'atts', {})})
+                    return True
+                return False
+            for k, v in obj.items():
+                debug_print_([v], k)
+            return None
+        if isinstance(obj, list):
+            for i, o in enumerate(obj):
+                if o is None:
+                    continue
+                if any(map(numpy.isnan, o.ravel())):
+                    print("NAN", prefix, i, name, o.shape)
+            return None
+        raise NotImplementedError(
+            "Unable to debug object of type {}.".format(type(obj)))
+
+    dump = debug_print_(obj)
+    if dump:
+        name = 'cpu-{}-{}-{}.pkl'.format(
+            clname, id(obj), id(ops))
+        if folder is not None:
+            name = "/".join([folder, name])
+        with open(name, 'wb') as f:
+            pickle.dump(obj, f)
+        return name
+    return None
+
+
 def debug_print(k, obj, printed):
     """
     Displays informations on an object.

From ae1b47216d3e3ec009cff47e01f719157ee8bace Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Thu, 30 Jul 2020 13:08:12 +0200
Subject: [PATCH 8/9] Force operator Conv to use continuous array

---
 _unittests/ut_onnxrt/test_cpu_ops.py  | 54 ++++++++++++++++++++++++++-
 mlprodict/onnxrt/ops_cpu/_op.py       |  7 ++++
 mlprodict/onnxrt/ops_cpu/op_conv_.cpp |  8 +++-
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/_unittests/ut_onnxrt/test_cpu_ops.py b/_unittests/ut_onnxrt/test_cpu_ops.py
index 7f8e3b574..b03808c25 100644
--- a/_unittests/ut_onnxrt/test_cpu_ops.py
+++ b/_unittests/ut_onnxrt/test_cpu_ops.py
@@ -55,7 +55,7 @@ def test_cpu_conv_init(self):
         W = numpy.random.rand(24, 96, 1, 1).astype(numpy.float32)
 
         onx = OnnxConv(
-            'X', W, output_names=['Y'],
+            'X', 'W', output_names=['Y'],
             auto_pad='NOTSET', group=1, dilations=[1, 1],
             kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1],
             op_version=get_opset_number_from_onnx())
@@ -80,6 +80,58 @@ def test_cpu_conv_init(self):
                             ii, diff[ii], gotrt['Y'].ravel()[ii], got['Y'].ravel()[ii]))
             self.assertEqualArray(gotrt['Y'], got['Y'], decimal=5)
 
+    def test_cpu_conv_group(self):
+        x = numpy.random.rand(1, 3, 3, 4).astype(numpy.float32)
+        W = numpy.random.rand(9, 1, 3, 3).astype(numpy.float32)
+
+        onx = OnnxConv(
+            'X', 'W', output_names=['Y'],
+            auto_pad='NOTSET', group=3, dilations=[1, 1],
+            kernel_shape=[3, 3], strides=[1, 1],
+            op_version=get_opset_number_from_onnx())
+        model_def = onx.to_onnx({'X': x.astype(numpy.float32),
+                                 'W': W.astype(numpy.float32)},
+                                target_opset=get_opset_number_from_onnx())
+        oinf = OnnxInference(model_def)
+        oinfrt = OnnxInference(model_def, runtime='onnxruntime1')
+        d = oinf.sequence_[-1].ops_.atts_value
+        self.assertIsInstance(d, dict)
+        self.assertEqual(d['kernel_shape'].tolist(), [3, 3])
+
+        xs = [
+            numpy.random.rand(1, 3, 3, 4).astype(numpy.float32),
+            numpy.array([1.0, 4.0, 7.0, 10.0, 13.0, 16.0, 19.0, 22.0, 25.0, 28.0, 31.0,
+                         34.0, 2.0, 5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0,
+                         32.0, 35.0, 3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0,
+                         30.0, 33.0, 36.0], dtype=numpy.float32).reshape((1, 3, 3, 4))]
+        Ws = [
+            numpy.random.rand(9, 1, 3, 3).astype(numpy.float32),
+            numpy.array([1.0, 10.0, 19.0, 28.0, 37.0, 46.0, 55.0, 64.0,
+                         73.0, 2.0, 11.0, 20.0, 29.0, 38.0, 47.0, 56.0, 65.0, 74.0,
+                         3.0, 12.0, 21.0, 30.0, 39.0, 48.0, 57.0, 66.0, 75.0, 4.0,
+                         13.0, 22.0, 31.0, 40.0, 49.0, 58.0, 67.0, 76.0, 5.0, 14.0,
+                         23.0, 32.0, 41.0, 50.0, 59.0, 68.0, 77.0, 6.0, 15.0, 24.0,
+                         33.0, 42.0, 51.0, 60.0, 69.0, 78.0, 7.0, 16.0, 25.0, 34.0,
+                         43.0, 52.0, 61.0, 70.0, 79.0, 8.0, 17.0, 26.0, 35.0, 44.0,
+                         53.0, 62.0, 71.0, 80.0, 9.0, 18.0, 27.0, 36.0, 45.0, 54.0,
+                         63.0, 72.0, 81.0], dtype=numpy.float32).reshape((9, 1, 3, 3))]
+
+        for x, W in zip(xs, Ws):
+            x = numpy.asfortranarray(x)
+            W = numpy.asfortranarray(W)
+            got = oinf.run({'X': x, 'W': W})
+            gotrt = oinfrt.run({'X': x, 'W': W})
+            diff = list(numpy.abs((gotrt['Y'] - got['Y']).ravel()))
+            sdiff = list(sorted(diff))
+            if sdiff[-1] > 1e-5:
+                raise AssertionError("runtimes disagree {}".format(sdiff[-5:]))
+            for ii in range(len(diff)):  # pylint: disable=C0200
+                if numpy.isnan(diff[ii]):
+                    raise AssertionError(
+                        "runtimes disagree about nan {}: {} # {} ? {}".format(
+                            ii, diff[ii], gotrt['Y'].ravel()[ii], got['Y'].ravel()[ii]))
+            self.assertEqualArray(gotrt['Y'], got['Y'], decimal=5)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/mlprodict/onnxrt/ops_cpu/_op.py b/mlprodict/onnxrt/ops_cpu/_op.py
index 994396791..c0078677a 100644
--- a/mlprodict/onnxrt/ops_cpu/_op.py
+++ b/mlprodict/onnxrt/ops_cpu/_op.py
@@ -274,6 +274,13 @@ def _to_python_numpy(self, inputs, numpy_name):
         return ("import numpy",
                 "return numpy.%s(%s)" % (numpy_name, ", ".join(inputs)))
 
+    @property
+    def atts_value(self):
+        "Returns all parameters in a dictionary."
+        if hasattr(self, 'atts'):
+            return {k: getattr(self, k) for k in self.atts}
+        return None
+
 
 class OpRunUnary(OpRun):
     """
diff --git a/mlprodict/onnxrt/ops_cpu/op_conv_.cpp b/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
index 3ae775d28..7ecf89564 100644
--- a/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
+++ b/mlprodict/onnxrt/ops_cpu/op_conv_.cpp
@@ -44,7 +44,9 @@ class Conv {
                   py::array_t<int64_t> pads,
                   py::array_t<int64_t> strides);
 
-        py::array_t<T> compute(py::array_t<T> X, py::array_t<T> W, py::array_t<T> B) const;
+        py::array_t<T> compute(py::array_t<T, py::array::c_style | py::array::forcecast> X,
+                               py::array_t<T, py::array::c_style | py::array::forcecast> W,
+                               py::array_t<T, py::array::c_style | py::array::forcecast> B) const;
     
     private:
 
@@ -117,7 +119,9 @@ void Conv<T>::compute_kernel_shape(const std::vector<int64_t>& weight_shape,
 
 
 template<typename T>
-py::array_t<T> Conv<T>::compute(py::array_t<T> X, py::array_t<T> W, py::array_t<T> B) const {
+py::array_t<T> Conv<T>::compute(py::array_t<T, py::array::c_style | py::array::forcecast> X,
+                                py::array_t<T, py::array::c_style | py::array::forcecast> W,
+                                py::array_t<T, py::array::c_style | py::array::forcecast> B) const {
 
     std::vector<int64_t> x_dims;
     arrayshape2vector(x_dims, X);

From dd3f6e87836660d2afce40354f67f0d39f56686a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?xavier=20dupr=C3=A9?= <xavier.dupre@gmail.com>
Date: Thu, 30 Jul 2020 13:57:55 +0200
Subject: [PATCH 9/9] pep8

---
 mlprodict/onnxrt/ops_cpu/_op.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlprodict/onnxrt/ops_cpu/_op.py b/mlprodict/onnxrt/ops_cpu/_op.py
index c0078677a..ab898dfe1 100644
--- a/mlprodict/onnxrt/ops_cpu/_op.py
+++ b/mlprodict/onnxrt/ops_cpu/_op.py
@@ -278,7 +278,8 @@ def _to_python_numpy(self, inputs, numpy_name):
     def atts_value(self):
         "Returns all parameters in a dictionary."
         if hasattr(self, 'atts'):
-            return {k: getattr(self, k) for k in self.atts}
+            return {k: getattr(self, k)
+                    for k in self.atts}  # pylint: disable=E1101
         return None