tests/test_sklearn_gradient_boosting_converters.py

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

from logging import getLogger
import unittest
import numpy as np
from distutils.version import StrictVersion
from pandas import DataFrame
from sklearn.datasets import make_classification
from sklearn.ensemble import (
    GradientBoostingClassifier,
    GradientBoostingRegressor
)
from sklearn.model_selection import train_test_split
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx.common.data_types import onnx_built_with_ml
from test_utils import dump_binary_classification, dump_multiple_classification
from test_utils import dump_data_and_model, fit_regression_model
from onnxruntime import InferenceSession, __version__

threshold = "0.4.0"


class TestSklearnGradientBoostingModels(unittest.TestCase):

    def setUp(self):
        log = getLogger('skl2onnx')
        log.disabled = True

    @unittest.skipIf(not onnx_built_with_ml(),
                     reason="Requires ONNX-ML extension.")
    @unittest.skipIf(
        StrictVersion(__version__) <= StrictVersion(threshold),
        reason="Depends on PR #1015 onnxruntime.")
    def test_gradient_boosting_classifier1Deviance(self):
        model = GradientBoostingClassifier(n_estimators=1, max_depth=2)
        X, y = make_classification(10, n_features=4, random_state=42)
        X = X[:, :2]
        model.fit(X, y)

        for cl in [None, 0.231, 1e-6, 0.9]:
            if cl is not None:
                model.init_.class_prior_ = np.array([cl, cl])
            initial_types = [('input', FloatTensorType((None, X.shape[1])))]
            model_onnx = convert_sklearn(model, initial_types=initial_types)
            if "Regressor" in str(model_onnx):
                raise AssertionError(str(model_onnx))
            sess = InferenceSession(model_onnx.SerializeToString())
            res = sess.run(None, {'input': X.astype(np.float32)})
            pred = model.predict_proba(X)
            delta = abs(res[1][0][0] - pred[0, 0])
            if delta > 1e-5:
                rows = ["diff", str(delta),
                        "X", str(X),
                        "base_values_", str(model.init_.class_prior_),
                        "predicted_label", str(model.predict(X)),
                        "expected", str(pred),
                        "onnxruntime", str(DataFrame(res[1])),
                        "model", str(model_onnx)]
                raise AssertionError("\n---\n".join(rows))
        dump_binary_classification(
            model, suffix="1Deviance",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('%s')" % threshold)

    @unittest.skipIf(not onnx_built_with_ml(),
                     reason="Requires ONNX-ML extension.")
    def test_gradient_boosting_classifier3(self):
        model = GradientBoostingClassifier(n_estimators=3)
        dump_binary_classification(
            model, suffix="3",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('%s')" % threshold)

    @unittest.skipIf(not onnx_built_with_ml(),
                     reason="Requires ONNX-ML extension.")
    def test_gradient_boosting_classifier_multi(self):
        model = GradientBoostingClassifier(n_estimators=3)
        dump_multiple_classification(
            model,
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('%s')" % threshold,
        )

    def test_gradient_boosting_regressor_ls_loss(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(n_estimators=3, loss="ls"))
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionLsLoss",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    def test_gradient_boosting_regressor_lad_loss(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(n_estimators=3, loss="lad"))
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionLadLoss",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    def test_gradient_boosting_regressor_huber_loss(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(n_estimators=3, loss="huber"))
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionHuberLoss",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    def test_gradient_boosting_regressor_quantile_loss(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(n_estimators=3, loss="quantile"))
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionQuantileLoss",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    def test_gradient_boosting_regressor_int(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(random_state=42), is_int=True)
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionInt-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    def test_gradient_boosting_regressor_zero_init(self):
        model, X = fit_regression_model(
            GradientBoostingRegressor(n_estimators=30, init="zero",
                                      random_state=42))
        model_onnx = convert_sklearn(
            model,
            "gradient boosting regression",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnGradientBoostingRegressionZeroInit-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')"
        )

    @unittest.skipIf(
        StrictVersion(__version__) <= StrictVersion(threshold),
        reason="Depends on PR #1015 onnxruntime.")
    def test_gradient_boosting_regressor_learning_rate(self):
        X, y = make_classification(
            n_features=100, n_samples=1000, n_classes=2, n_informative=8)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.5, random_state=42)
        model = GradientBoostingClassifier().fit(X_train, y_train)
        onnx_model = convert_sklearn(
            model, 'lr2', [('input', FloatTensorType(X_test.shape))])
        sess = InferenceSession(onnx_model.SerializeToString())
        res = sess.run(None, input_feed={'input': X_test.astype(np.float32)})
        r1 = np.mean(np.isclose(model.predict_proba(X_test),
                     list(map(lambda x: list(map(lambda y: x[y], x)),
                              res[1])), atol=1e-4))
        r2 = np.mean(res[0] == model.predict(X_test))
        assert r1 == r2


if __name__ == "__main__":
    unittest.main()