sdpython · xadupre · Mar 10, 2022 · Mar 8, 2022 · Mar 8, 2022 · Mar 9, 2022
diff --git a/_unittests/ut_onnx_conv/test_onnx_conv_tree_ensemble.py b/_unittests/ut_onnx_conv/test_onnx_conv_tree_ensemble.py
@@ -0,0 +1,208 @@
+"""
+@brief      test log(time=20s)
+"""
+import unittest
+import numpy
+from onnx.checker import check_model
+from onnxruntime import __version__ as ort_version
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from pyquickhelper.texthelper.version_helper import compare_module_version
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
+from sklearn.ensemble import (
+    RandomForestRegressor, GradientBoostingRegressor,
+    HistGradientBoostingRegressor,
+    RandomForestClassifier, GradientBoostingClassifier,
+    HistGradientBoostingClassifier)
+from lightgbm import LGBMRegressor, LGBMClassifier
+from xgboost import XGBRegressor, XGBClassifier
+from mlprodict.onnxrt import OnnxInference
+from mlprodict.onnx_conv import to_onnx
+from mlprodict.plotting.text_plot import onnx_simple_text_plot
+# from mlprodict import (
+#     __max_supported_opsets_experimental__ as __max_supported_opsets__)
+from mlprodict import __max_supported_opsets__
+
+ort_version = ".".join(ort_version.split('.')[:2])
+
+
+class TestOnnxConvTreeEnsemble(ExtTestCase):
+
+    def common_test_regressor(self, runtime, models=None, dtypes=None):
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        X_train, X_test, y_train, _ = train_test_split(X, y)
+        if models is None:
+            models = [
+                DecisionTreeRegressor(max_depth=2),
+                HistGradientBoostingRegressor(max_iter=2, max_depth=2),
+                GradientBoostingRegressor(n_estimators=2, max_depth=2),
+                RandomForestRegressor(n_estimators=2, max_depth=2),
+            ]
+
+        if dtypes is None:
+            dtypes = [numpy.float64, numpy.float32]
+        for gbm in models:
+            gbm.fit(X_train, y_train)
+            exp = gbm.predict(X_test).ravel()
+            for dtype in dtypes:
+                decimal = {numpy.float32: 5, numpy.float64: 12}[dtype]
+                if (dtype == numpy.float64 and gbm.__class__ in {
+                        LGBMRegressor}):
+                    decimal = 7
+                elif (dtype == numpy.float64 and gbm.__class__ in {
+                        XGBRegressor}):
+                    decimal = 7
+                xt = X_test.astype(dtype)
+                for opset in [(16, 3), (15, 1)]:
+                    if opset[1] > __max_supported_opsets__['ai.onnx.ml']:
+                        continue
+                    with self.subTest(runtime=runtime, dtype=dtype,
+                                      model=gbm.__class__.__name__,
+                                      opset=opset):
+                        onx = to_onnx(gbm, xt,  # options={'zipmap': False},
+                                      target_opset={
+                                          '': opset[0], 'ai.onnx.ml': opset[1]},
+                                      rewrite_ops=True)
+                        if dtype == numpy.float64:
+                            sonx = str(onx)
+                            if 'double' not in sonx and "_as_tensor" not in sonx:
+                                raise AssertionError(
+                                    "Issue with %s." % str(onx))
+                        try:
+                            check_model(onx)
+                        except Exception as e:
+                            raise AssertionError(
+                                "Issue with %s." % str(onx)) from e
+                        output = onx.graph.output[0].type.tensor_type.elem_type
+                        self.assertEqual(
+                            output, {numpy.float32: 1, numpy.float64: 11}[dtype])
+                        oif = OnnxInference(onx, runtime=runtime)
+                        self.assertEqual({numpy.float32: 'tensor(float)',
+                                          numpy.float64: 'tensor(double)'}[dtype],
+                                         oif.output_names_shapes_types[0][2])
+                        got = oif.run({'X': xt})
+                        try:
+                            self.assertEqualArray(exp, got['variable'].ravel(),
+                                                  decimal=decimal)
+                        except AssertionError as e:
+                            raise AssertionError(
+                                "Discrepancies %s." % str(onx)) from e
+                        self.assertEqual(got['variable'].dtype, dtype)
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_regressor_python(self):
+        self.common_test_regressor('python')
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_regressor_python_lgbm(self):
+        self.common_test_regressor(
+            'python', [LGBMRegressor(max_iter=3, max_depth=2, verbosity=-1)])
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_regressor_python_xgb(self):
+        self.common_test_regressor(
+            'python', [XGBRegressor(max_iter=3, max_depth=2, verbosity=0)],
+            dtypes=[numpy.float32])
+
+    @unittest.skipIf(compare_module_version(ort_version, '1.12') < 0,
+                     reason="missing runtime")
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_regressor_onnxruntime(self):
+        self.common_test_regressor('onnxruntime1')
+
+    def common_test_classifier(self, runtime, models=None, dtypes=None):
+        iris = load_iris()
+        X, y = iris.data, iris.target
+        X_train, X_test, y_train, _ = train_test_split(X, y)
+        if models is None:
+            models = [
+                DecisionTreeClassifier(max_depth=2),
+                RandomForestClassifier(n_estimators=2, max_depth=2),
+                HistGradientBoostingClassifier(max_iter=2, max_depth=2),
+                GradientBoostingClassifier(n_estimators=2, max_depth=2),
+            ]
+
+        if dtypes is None:
+            dtypes = [numpy.float64, numpy.float32]
+        for gbm in models:
+            gbm.fit(X_train, y_train)
+            exp = gbm.predict_proba(X_test).ravel()
+            for dtype in dtypes:
+                decimal = {numpy.float32: 6, numpy.float64: 7}[dtype]
+                if (dtype == numpy.float64 and
+                        gbm.__class__ in {DecisionTreeClassifier,
+                                          GradientBoostingClassifier}):
+                    decimal = 12
+                xt = X_test.astype(dtype)
+                for opset in [(15, 1), (16, 3)]:
+                    if opset[1] > __max_supported_opsets__['ai.onnx.ml']:
+                        continue
+                    with self.subTest(runtime=runtime, dtype=dtype,
+                                      model=gbm.__class__.__name__,
+                                      opset=opset):
+                        onx = to_onnx(gbm, xt, options={'zipmap': False},
+                                      target_opset={
+                                          '': opset[0],
+                                          'ai.onnx.ml': opset[1]},
+                                      rewrite_ops=True)
+                        if dtype == numpy.float64 and (
+                                opset[1] >= 3 or
+                                gbm.__class__ not in {
+                                    RandomForestClassifier,
+                                    HistGradientBoostingClassifier}):
+                            sonx = str(onx)
+                            if 'double' not in sonx and "_as_tensor" not in sonx:
+                                raise AssertionError(
+                                    "Issue with %s." % str(onx))
+                        output = onx.graph.output[1].type.tensor_type.elem_type
+                        self.assertEqual(
+                            output, {numpy.float32: 1, numpy.float64: 11}[dtype])
+                        oif = OnnxInference(onx, runtime=runtime)
+                        self.assertEqual({numpy.float32: 'tensor(float)',
+                                          numpy.float64: 'tensor(double)'}[dtype],
+                                         oif.output_names_shapes_types[1][2])
+                        got = oif.run({'X': xt})
+                        try:
+                            self.assertEqualArray(
+                                exp, got['probabilities'].ravel(), decimal=decimal)
+                        except AssertionError as e:
+                            raise AssertionError(
+                                "Discrepancies with onx=%s\n%s." % (
+                                    onnx_simple_text_plot(onx),
+                                    str(onx))) from e
+                        self.assertEqual(got['probabilities'].dtype, dtype)
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_classifier_python(self):
+        self.common_test_classifier('python')
+
+    @unittest.skipIf(compare_module_version(ort_version, '1.12') < 0,
+                     reason="missing runtime")
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_classifier_onnxruntime(self):
+        self.common_test_classifier('onnxruntime1')
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_classifier_python_lgbm(self):
+        # xgboost is implemented with floats
+        self.common_test_classifier(
+            'python', [LGBMClassifier(max_iter=3, max_depth=2, verbosity=-1)],
+            dtypes=[numpy.float32])
+
+    @ignore_warnings((RuntimeWarning, UserWarning))
+    def test_classifier_python_xgb(self):
+        # xgboost is implemented with floats
+        self.common_test_classifier(
+            'python', [XGBClassifier(max_iter=2, max_depth=2, verbosity=0)],
+            dtypes=[numpy.float32])
+
+
+if __name__ == "__main__":
+    # import logging
+    # logger = logging.getLogger('mlprodict.onnx_conv')
+    # logger.setLevel(logging.DEBUG)
+    # logging.basicConfig(level=logging.DEBUG)
+    # TestOnnxConvTreeEnsemble().test_regressor_python_lgbm()
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py b/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
@@ -65,7 +65,7 @@ def test_onnxrt_python_lightgbm_categorical(self):
         cat_cols_actual = ["A", "B", "C", "D"]
         X[cat_cols_actual] = X[cat_cols_actual].astype('category')
         X_test[cat_cols_actual] = X_test[cat_cols_actual].astype('category')
-        gbm0 = LGBMClassifier().fit(X, y)
+        gbm0 = LGBMClassifier(verbosity=-1).fit(X, y)
         exp = gbm0.predict(X_test, raw_scores=False)
         self.assertNotEmpty(exp)
 
@@ -77,7 +77,7 @@ def test_onnxrt_python_lightgbm_categorical(self):
 
         X = X[['C']].values.astype(numpy.float32)
         X_test = X_test[['C']].values.astype(numpy.float32)
-        gbm0 = LGBMClassifier().fit(X, y, categorical_feature=[0])
+        gbm0 = LGBMClassifier(verbosity=-1).fit(X, y, categorical_feature=[0])
         exp = gbm0.predict_proba(X_test, raw_scores=False)
         model_def = to_onnx(gbm0, X)
         self.assertIn('ZipMap', str(model_def))
@@ -118,7 +118,7 @@ def test_onnxrt_python_lightgbm_categorical3(self):
         cat_cols_actual = ["A", "B", "C", "D"]
         X[cat_cols_actual] = X[cat_cols_actual].astype('category')
         X_test[cat_cols_actual] = X_test[cat_cols_actual].astype('category')
-        gbm0 = LGBMClassifier().fit(X, y)
+        gbm0 = LGBMClassifier(verbosity=-1).fit(X, y)
         exp = gbm0.predict(X_test, raw_scores=False)
         self.assertNotEmpty(exp)
 
@@ -132,7 +132,7 @@ def test_onnxrt_python_lightgbm_categorical3(self):
 
         X = X[['C']].values.astype(numpy.float32)
         X_test = X_test[['C']].values.astype(numpy.float32)
-        gbm0 = LGBMClassifier().fit(X, y, categorical_feature=[0])
+        gbm0 = LGBMClassifier(verbosity=-1).fit(X, y, categorical_feature=[0])
         exp = gbm0.predict_proba(X_test, raw_scores=False)
         model_def = to_onnx(gbm0, X, target_opset=TARGET_OPSET)
         self.assertIn('ZipMap', str(model_def))
@@ -167,7 +167,7 @@ def test_onnxrt_python_lightgbm_categorical_iris(self):
         y_train = y_train % 2
 
         # Classic
-        gbm = LGBMClassifier()
+        gbm = LGBMClassifier(verbosity=-1)
         gbm.fit(X_train, y_train)
         exp = gbm.predict_proba(X_test)
         onx = to_onnx(gbm, initial_types=[
@@ -223,7 +223,7 @@ def test_onnxrt_python_lightgbm_categorical_iris_booster3(self):
         self.assertEqual(y_train.shape, (X_train.shape[0], ))
 
         # Classic
-        gbm = LGBMClassifier()
+        gbm = LGBMClassifier(verbosity=-1)
         gbm.fit(X_train, y_train)
         exp = gbm.predict_proba(X_test)
         onx = to_onnx(gbm, initial_types=[
@@ -272,7 +272,7 @@ def test_onnxrt_python_lightgbm_categorical_iris_booster3_real(self):
             X, y, random_state=11)
 
         # Classic
-        gbm = LGBMClassifier()
+        gbm = LGBMClassifier(verbosity=-1)
         gbm.fit(X_train, y_train)
         exp = gbm.predict_proba(X_test)
         onx = to_onnx(gbm.booster_, initial_types=[
@@ -390,7 +390,8 @@ def test_lightgbm_booster_classifier(self):
         model = lgb_train({'boosting_type': 'rf', 'objective': 'binary',
                            'n_estimators': 3, 'min_child_samples': 1,
                            'subsample_freq': 1, 'bagging_fraction': 0.5,
-                           'feature_fraction': 0.5, 'average_output': True},
+                           'feature_fraction': 0.5, 'average_output': True,
+                           'verbosity': -1},
                           data)
         model_onnx = to_onnx(model, X, verbose=0, rewrite_ops=True,
                              target_opset=TARGET_OPSET)
@@ -441,7 +442,7 @@ def test_missing_values(self):
 
         regressor = LGBMRegressor(
             objective="regression", min_data_in_bin=1, min_data_in_leaf=1,
-            n_estimators=1, learning_rate=1)
+            n_estimators=1, learning_rate=1, verbosity=-1)
         regressor.fit(_X_train, _y)
         regressor_onnx = to_onnx(
             regressor, initial_types=_INITIAL_TYPES, rewrite_ops=True,
@@ -471,7 +472,7 @@ def test_missing_values_rf(self):
             ("input", FloatTensorType([None, _X_train.shape[1]]))]
 
         regressor = LGBMRegressor(
-            objective="regression", boosting_type='rf',
+            objective="regression", boosting_type='rf', verbosity=-2,
             n_estimators=10, bagging_freq=1, bagging_fraction=0.5)
         regressor.fit(_X_train, _y)
         regressor_onnx = to_onnx(
@@ -531,7 +532,7 @@ def test_objective(self):
         for objective in _objectives:
             with self.subTest(X=_X, objective=objective):
                 initial_types = self._calc_initial_types(_X)
-                regressor = LGBMRegressor(objective=objective)
+                regressor = LGBMRegressor(objective=objective, verbosity=-1)
                 regressor.fit(_X, _Y)
                 regressor_onnx = to_onnx(
                     regressor, initial_types=initial_types,
@@ -564,7 +565,8 @@ def test_objective_boosting_rf(self):
                 initial_types = self._calc_initial_types(_X)
                 regressor = LGBMRegressor(
                     objective=objective, boosting='rf', bagging_freq=3,
-                    bagging_fraction=0.5, n_estimators=10)
+                    bagging_fraction=0.5, n_estimators=10,
+                    verbosity=-1)
                 regressor.fit(_X, _Y)
                 regressor_onnx = to_onnx(
                     regressor, initial_types=initial_types,
@@ -582,7 +584,7 @@ def test_lgbm_regressor10(self):
         X, y = data.data, data.target
         X = X.astype(numpy.float32)
         X_train, X_test, y_train, _ = train_test_split(X, y, random_state=0)
-        reg = LGBMRegressor(max_depth=2, n_estimators=4, seed=0)
+        reg = LGBMRegressor(max_depth=2, n_estimators=4, seed=0, verbosity=-1)
         reg.fit(X_train, y_train)
         expected = reg.predict(X_test)
 
@@ -608,7 +610,8 @@ def test_lgbm_regressor(self):
         X, y = data.data, data.target
         X = X.astype(numpy.float32)
         X_train, X_test, y_train, _ = train_test_split(X, y, random_state=0)
-        reg = LGBMRegressor(max_depth=2, n_estimators=100, seed=0)
+        reg = LGBMRegressor(max_depth=2, n_estimators=100,
+                            seed=0, verbosity=-1)
         reg.fit(X_train, y_train)
         expected = reg.predict(X_test)
 
@@ -648,4 +651,4 @@ def test_lgbm_regressor(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm_bug.py b/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm_bug.py
@@ -83,7 +83,7 @@ def test_missing_values(self):
         from lightgbm import LGBMRegressor
         regressor = LGBMRegressor(
             objective="regression", min_data_in_bin=1, min_data_in_leaf=1,
-            n_estimators=1, learning_rate=1)
+            n_estimators=1, learning_rate=1, verbosity=-1)
 
         y = numpy.array([0, 0, 1, 1, 1])
         X_train = numpy.array(
@@ -121,7 +121,7 @@ def test_lightgbm_regressor(self):
                     break
                 model = LGBMRegressor(
                     max_depth=mx, n_estimators=ne, min_child_samples=1,
-                    learning_rate=0.0000001)
+                    learning_rate=0.0000001, verbosity=-1)
                 model.fit(X, y)
                 expected = model.predict(X)
 
@@ -166,7 +166,7 @@ def test_lightgbm_regressor_double(self):
                     break
                 model = LGBMRegressor(
                     max_depth=mx, n_estimators=ne, min_child_samples=1,
-                    learning_rate=0.0000001)
+                    learning_rate=0.0000001, verbosity=-1)
                 model.fit(X, y)
                 expected = model.predict(X)
                 model_onnx = to_onnx(
@@ -198,4 +198,4 @@ def test_lightgbm_regressor_double(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py b/_unittests/ut_onnxrt/test_onnxrt_python_runtime_.py
@@ -1314,7 +1314,6 @@ def test_onnxt_runtime_concat(self):
                    'Y': Y.astype(numpy.float32)},
             got, OnnxConcat, model_def)
 
-        python_tested.append(OnnxConstantOfShape)
         oinfpy = OnnxInference(model_def, runtime="python", inplace=True)
         validate_python_inference(
             oinfpy, {'X': X.astype(numpy.float32),

diff --git a/_unittests/ut_onnxrt/test_onnxrt_validate_documentation.py b/_unittests/ut_onnxrt/test_onnxrt_validate_documentation.py
@@ -38,7 +38,8 @@ def test_validate_sklearn_store_models(self):
 
     @skipif_circleci('too long')
     @ignore_warnings(category=(UserWarning, ConvergenceWarning,
-                               RuntimeWarning, SyntaxWarning))
+                               RuntimeWarning, SyntaxWarning,
+                               ConvergenceWarning))
     def test_write_documentation_converters(self):
         fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
         subs = []