Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
increase testing coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Jan 8, 2020
1 parent e6ba168 commit 64ede6f
Show file tree
Hide file tree
Showing 14 changed files with 2,073 additions and 32 deletions.
183 changes: 183 additions & 0 deletions _unittests/ut_onnx_conv/test_skl2onnx_ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""
@brief test tree node (time=12s)
"""
import unittest
from logging import getLogger
import numpy
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
from sklearn.datasets import make_regression, make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import (
RandomForestClassifier, RandomForestRegressor,
ExtraTreesClassifier, ExtraTreesRegressor
)
try:
from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611
from sklearn.ensemble import (
HistGradientBoostingClassifier,
HistGradientBoostingRegressor
)
except ImportError:
HistGradientBoostingClassifier = None
HistGradientBoostingRegressor = None
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn
from mlprodict.testing.test_utils import (
dump_binary_classification,
dump_data_and_model,
dump_multiple_classification,
dump_multiple_regression,
dump_single_regression,
fit_multilabel_classification_model,
)
from mlprodict.onnx_conv import register_rewritten_operators


class TestSklearnTreeEnsembleModels(ExtTestCase):

folder = get_temp_folder(__file__, "temp_dump")

def setUp(self):
logger = getLogger('skl2onnx')
logger.disabled = True
register_rewritten_operators()

def test_random_forest_classifier(self):
model = RandomForestClassifier(n_estimators=3)
dump_binary_classification(model, folder=self.folder)
dump_multiple_classification(model, folder=self.folder)

def test_random_forest_regressor(self):
model = RandomForestRegressor(n_estimators=3)
dump_single_regression(model, folder=self.folder)
dump_multiple_regression(model, folder=self.folder)

def test_extra_trees_classifier(self):
model = ExtraTreesClassifier(n_estimators=3)
dump_binary_classification(model, folder=self.folder)
dump_multiple_classification(model, folder=self.folder)

def test_extra_trees_regressor(self):
model = ExtraTreesRegressor(n_estimators=3)
dump_single_regression(model, folder=self.folder)
dump_multiple_regression(model, folder=self.folder)

def common_test_model_hgb_regressor(self, add_nan=False):
model = HistGradientBoostingRegressor(max_iter=5, max_depth=2)
X, y = make_regression(n_features=10, n_samples=1000,
n_targets=1, random_state=42)
if add_nan:
rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3)
cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3)
X[rows, cols] = numpy.nan

X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
random_state=42)
model.fit(X_train, y_train)

model_onnx = convert_sklearn(
model, "unused", [("input", FloatTensorType([None, X.shape[1]]))])
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:5]
dump_data_and_model(X_test, model, model_onnx, folder=self.folder)

def test_model_hgb_regressor_nonan(self):
self.common_test_model_hgb_regressor(False)

def test_model_hgb_regressor_nan(self):
self.common_test_model_hgb_regressor(True)

def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2):
model = HistGradientBoostingClassifier(max_iter=5, max_depth=2)
X, y = make_classification(n_features=10, n_samples=1000,
n_informative=4, n_classes=n_classes,
random_state=42)
if add_nan:
rows = numpy.random.randint(0, X.shape[0] - 1, X.shape[0] // 3)
cols = numpy.random.randint(0, X.shape[1] - 1, X.shape[0] // 3)
X[rows, cols] = numpy.nan

X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
random_state=42)
model.fit(X_train, y_train)

model_onnx = convert_sklearn(
model, "unused", [("input", FloatTensorType([None, X.shape[1]]))])
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:5]

dump_data_and_model(X_test, model, model_onnx, folder=self.folder)

def test_model_hgb_classifier_nonan(self):
self.common_test_model_hgb_classifier(False)

def test_model_hgb_classifier_nan(self):
self.common_test_model_hgb_classifier(True)

def test_model_hgb_classifier_nonan_multi(self):
self.common_test_model_hgb_classifier(False, n_classes=3)

def test_model_hgb_classifier_nan_multi(self):
self.common_test_model_hgb_classifier(True, n_classes=3)

def test_model_random_forest_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
RandomForestClassifier(random_state=42))
options = {id(model): {'zipmap': False}}
model_onnx = convert_sklearn(
model, "scikit-learn RandomForestClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
options=options)
self.assertTrue(model_onnx is not None)
self.assertNotIn('zipmap', str(model_onnx).lower())
dump_data_and_model(X_test, model, model_onnx,
basename="SklearnRandomForestClassifierMultiLabel-Out0",
folder=self.folder)

def test_model_random_forest_classifier_multilabel_low_samples(self):
model, X_test = fit_multilabel_classification_model(
RandomForestClassifier(random_state=42), n_samples=4)
options = {id(model): {'zipmap': False}}
model_onnx = convert_sklearn(
model, "scikit-learn RandomForestClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
options=options)
self.assertTrue(model_onnx is not None)
self.assertNotIn('zipmap', str(model_onnx).lower())
dump_data_and_model(X_test, model, model_onnx,
basename="SklearnRandomForestClassifierMultiLabelLowSamples-Out0",
folder=self.folder)

def test_model_extra_trees_classifier_multilabel(self):
model, X_test = fit_multilabel_classification_model(
ExtraTreesClassifier(random_state=42))
options = {id(model): {'zipmap': False}}
model_onnx = convert_sklearn(
model, "scikit-learn ExtraTreesClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
options=options)
self.assertTrue(model_onnx is not None)
self.assertNotIn('zipmap', str(model_onnx).lower())
dump_data_and_model(X_test, model, model_onnx,
basename="SklearnExtraTreesClassifierMultiLabel-Out0",
folder=self.folder)

def test_model_extra_trees_classifier_multilabel_low_samples(self):
model, X_test = fit_multilabel_classification_model(
ExtraTreesClassifier(random_state=42), n_samples=10)
options = {id(model): {'zipmap': False}}
model_onnx = convert_sklearn(
model, "scikit-learn ExtraTreesClassifier",
[("input", FloatTensorType([None, X_test.shape[1]]))],
options=options)
self.assertTrue(model_onnx is not None)
self.assertNotIn('zipmap', str(model_onnx).lower())
dump_data_and_model(X_test, model, model_onnx,
basename="SklearnExtraTreesClassifierMultiLabelLowSamples-Out0",
folder=self.folder)


if __name__ == "__main__":
TestSklearnTreeEnsembleModels().setUp()
TestSklearnTreeEnsembleModels().test_random_forest_classifier()
unittest.main()
10 changes: 5 additions & 5 deletions mlprodict/asv_benchmark/create_asv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
from onnx.defs import onnx_opset_version # pylint: disable=W0611
try:
from pyquickhelper.pycode.code_helper import remove_extra_spaces_and_pep8
from pyquickhelper.pycode.code_helper import remove_extra_spaces_and_pep8 # pragma: no cover
except ImportError:
remove_extra_spaces_and_pep8 = lambda code, *args, **kwargs: code
try:
Expand All @@ -26,7 +26,7 @@
add_model_import_init,
find_missing_sklearn_imports)
except ImportError:
from mlprodict.asv_benchmark._create_asv_helper import (
from mlprodict.asv_benchmark._create_asv_helper import ( # pragma: no cover
default_asv_conf,
flask_helper,
pyspy_template,
Expand All @@ -44,22 +44,22 @@
from ..onnxrt.validate.validate import (
_retrieve_problems_extra, _get_problem_data, _merge_options)
from ..tools.asv_options_helper import shorten_onnx_options
except (ValueError, ImportError):
except (ValueError, ImportError): # pragma: no cover
from mlprodict.onnxrt.validate.validate_helper import (
get_opset_number_from_onnx, sklearn_operators)
from mlprodict.onnxrt.validate.validate import (
_retrieve_problems_extra, _get_problem_data, _merge_options)
from mlprodict.tools.asv_options_helper import shorten_onnx_options
try:
from ..testing.verify_code import verify_code
except (ValueError, ImportError):
except (ValueError, ImportError): # pragma: no cover
from mlprodict.testing.verify_code import verify_code

# exec function does not import models but potentially
# requires all specific models used to defines scenarios
try:
from ..onnxrt.validate.validate_scenarios import * # pylint: disable=W0614,W0401
except ValueError:
except ValueError: # pragma: no cover
# Skips this step if used in a benchmark.
pass

Expand Down
9 changes: 5 additions & 4 deletions mlprodict/onnxrt/ops_cpu/_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ def __init__(self, onnx_node, desc=None, expected_attributes=None,

for k, v in self._schema.attributes.items():
if not hasattr(self, k):
import pprint
raise RuntimeError(
import pprint # pragma: no cover
raise RuntimeError( # pragma: no cover
"Attribute '{}' is expected based on ONNX specifications "
"for node '{}' and options {}.".format(
k, onnx_node.op_type, pprint.pformat(options)))

def _find_custom_operator_schema(self, op_name):
raise NotImplementedError(
raise NotImplementedError( # pragma: no cover
"This method should be overwritten for operator '{}'.".format(op_name))

def __str__(self):
Expand All @@ -108,7 +108,8 @@ def _run(self, *args, **kwargs):
"""
Should be overwritten.
"""
raise NotImplementedError("This method should be overwritten.")
raise NotImplementedError(
"This method should be overwritten.") # pragma: no cover

def run(self, *args, **kwargs): # pylint: disable=E0202
"""
Expand Down
2 changes: 1 addition & 1 deletion mlprodict/onnxrt/ops_cpu/op_tree_ensemble_classifier_.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void RuntimeTreeEnsembleClassifier<NTYPE>::init(
array2vector(class_weights_, class_weights, NTYPE);
array2vector(base_values_, base_values, NTYPE);
if (classlabels_strings.size() > 0)
throw std::runtime_error("This runtime only handles integers.");
throw std::runtime_error("This runtime only handles integers for class labels.");
// classlabels_strings_ = classlabels_strings;
array2vector(classlabels_int64s_, classlabels_int64s, int64_t);
post_transform_ = to_POST_EVAL_TRANSFORM(post_transform);
Expand Down
20 changes: 18 additions & 2 deletions mlprodict/onnxrt/ops_cpu/op_zipmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
@file
@brief Runtime operator.
"""
import numpy
from ._op import OpRun
from ..shape_object import ShapeObject

Expand All @@ -30,6 +31,11 @@ def __init__(self, rev_keys, values, mat=None):
@param mat matrix if values is a row index,
one or two dimensions
"""
if not isinstance(mat, numpy.ndarray):
raise TypeError('matrix is expected, got {}.'.format(type(mat)))
if len(mat.shape) not in (2, 3):
raise ValueError("matrix must have two or three dimensions but got {}"
".".format(mat.shape))
dict.__init__(self)
self._rev_keys = rev_keys
self._values = values
Expand All @@ -41,8 +47,7 @@ def __getitem__(self, key):
"""
if self._mat is None:
return self._values[self._rev_keys[key]]
else:
return self._mat[self._values, self._rev_keys[key]]
return self._mat[self._values, self._rev_keys[key]]

def __setitem__(self, pos, value):
raise RuntimeError(
Expand Down Expand Up @@ -87,6 +92,9 @@ def asdict(self):
res[k] = v
return res

def __str__(self):
return "ZipMap(%r)" % str(self.asdict())


class ArrayZipMapDictionary(list):
"""
Expand All @@ -104,6 +112,11 @@ def __init__(self, rev_keys, mat):
@param mat matrix if values is a row index,
one or two dimensions
"""
if not isinstance(mat, numpy.ndarray):
raise TypeError('matrix is expected, got {}.'.format(type(mat)))
if len(mat.shape) not in (2, 3):
raise ValueError("matrix must have two or three dimensions but got {}"
".".format(mat.shape))
list.__init__(self)
self._rev_keys = rev_keys
self._mat = mat
Expand Down Expand Up @@ -154,6 +167,9 @@ def columns(self):
def is_zip_map(self):
return True

def __str__(self):
return 'ZipMaps[%s]' % ', '.join(map(str, self))


class ZipMap(OpRun):
"""
Expand Down
11 changes: 6 additions & 5 deletions mlprodict/onnxrt/shape_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def evaluate(self, **kwargs):
Evaluates the object, reduces the expression
to a number or a string.
"""
raise NotImplementedError()
raise NotImplementedError() # pragma: no cover


class ShapeOperator(BaseDimensionShape):
Expand Down Expand Up @@ -61,7 +61,7 @@ def to_string(self, use_x=True):
@return a string
"""
raise NotImplementedError(
raise NotImplementedError( # pragma: no cover
"Operator '{}' does not implement 'to_string': {}.".format(
self.__class__.__name__, repr(self)))

Expand Down Expand Up @@ -98,7 +98,8 @@ def _evaluate_string_(self, args, **kwargs):
@param kwargs value for the variables.
@return string or integer
"""
raise NotImplementedError("This function must be overwritten.")
raise NotImplementedError(
"This function must be overwritten.") # pragma: no cover


class ShapeBinaryOperator(ShapeOperator):
Expand All @@ -117,9 +118,9 @@ def __init__(self, name, fct, fct_string, x, y):
"""
ShapeOperator.__init__(self, name, fct, fct_string, x, y)
if isinstance(x, tuple):
raise TypeError('x cannot be a tuple')
raise TypeError('x cannot be a tuple') # pragma: no cover
if isinstance(y, tuple):
raise TypeError('y cannot be a tuple')
raise TypeError('y cannot be a tuple') # pragma: no cover

def to_string(self, use_x=True):
"""
Expand Down
Loading

0 comments on commit 64ede6f

Please sign in to comment.