Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Checks dump options
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Mar 28, 2020
1 parent ba7b4c8 commit 44b29b2
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 16 deletions.
63 changes: 63 additions & 0 deletions _unittests/ut_onnxrt/test_onnxrt_validate_dump_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
@brief test log(time=5s)
"""
import os
import unittest
import pickle
from logging import getLogger
from pandas import DataFrame
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import (
get_temp_folder, ExtTestCase, skipif_circleci, unittest_require_at_least
)
from sklearn.exceptions import ConvergenceWarning
try:
from sklearn.utils._testing import ignore_warnings
except ImportError:
from sklearn.utils.testing import ignore_warnings
import skl2onnx
from mlprodict.onnxrt.validate import enumerate_validated_operator_opsets


class TestOnnxrtValidateDumpAll(ExtTestCase):

@unittest_require_at_least(skl2onnx, '1.5.9999')
@skipif_circleci("too long")
@ignore_warnings(category=(UserWarning, ConvergenceWarning, RuntimeWarning))
def test_validate_sklearn_operators_dump_all(self):
fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
logger = getLogger('skl2onnx')
logger.disabled = True
verbose = 1 if __name__ == "__main__" else 0
temp = get_temp_folder(
__file__, "temp_validate_sklearn_operators_dump_all")
self.assertRaise(lambda: list(enumerate_validated_operator_opsets(
verbose, models={"DecisionTreeClassifier"},
filter_exp=lambda m, p: '64' not in p,
fLOG=fLOG, dump_all=True)),
ValueError)
rows = list(enumerate_validated_operator_opsets(
verbose, models={"DecisionTreeClassifier"},
filter_exp=lambda m, p: '64' not in p,
fLOG=fLOG, dump_all=True,
dump_folder=temp))
self.assertGreater(len(rows), 1)
df = DataFrame(rows)
self.assertGreater(df.shape[1], 1)
fLOG("output results")
df.to_csv(os.path.join(temp, "sklearn_opsets_report.csv"), index=False)
df.to_excel(os.path.join(
temp, "sklearn_opsets_report.xlsx"), index=False)

stored = os.path.join(
temp, "dump-i-python-DecisionTreeClassifier-default-b-cl-tree._classes.DecisionTreeClassifierzipmapFalse-op11-nf4.pkl")
with open(stored, "rb") as f:
obj = pickle.load(f)
self.assertIn('onnx_bytes', obj)
self.assertIn('skl_model', obj)
self.assertIn('X_test', obj)
self.assertIn('Xort_test', obj)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion mlprodict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@brief Ways to speed up predictions for a machine learned model.
"""

__version__ = "0.3.1048"
__version__ = "0.3.1050"
__author__ = "Xavier Dupré"


Expand Down
2 changes: 2 additions & 0 deletions mlprodict/onnxrt/validate/validate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ def dump_into_folder(dump_folder, obs_op=None, is_error=True,
@param kwargs additional parameters
@return name
"""
if dump_folder is None:
raise ValueError("dump_folder cannot be None.")
optim = obs_op.get('optim', '')
optim = str(optim)
optim = optim.replace("<class 'sklearn.", "")
Expand Down
46 changes: 31 additions & 15 deletions mlprodict/onnxrt/validate/validate_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
VotingClassifier, AdaBoostRegressor, VotingRegressor,
ExtraTreesRegressor, ExtraTreesClassifier,
RandomForestRegressor, RandomForestClassifier,
HistGradientBoostingRegressor, HistGradientBoostingClassifier
HistGradientBoostingRegressor, HistGradientBoostingClassifier,
AdaBoostClassifier
)
from sklearn.feature_extraction import DictVectorizer, FeatureHasher
from sklearn.feature_selection import (
Expand All @@ -25,10 +26,12 @@
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain
from sklearn.neighbors import LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import Normalizer, PowerTransformer
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.svm import SVC, NuSVC, SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier

try:
from sklearn.ensemble import StackingClassifier, StackingRegressor
except ImportError:
Expand All @@ -51,10 +54,12 @@ def build_custom_scenarios():
# skips
SparseCoder: None,
# scenarios
AdaBoostClassifier: [
('default', {'n_estimators': 5},
{'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}),
],
AdaBoostRegressor: [
('default', {
'n_estimators': 5,
}),
('default', {'n_estimators': 5}),
],
CalibratedClassifierCV: [
('sgd', {
Expand All @@ -67,11 +72,20 @@ def build_custom_scenarios():
'base_estimator': LogisticRegression(solver='liblinear'),
})
],
DecisionTreeClassifier: [
('default', {}, {'conv_options': [
{DecisionTreeClassifier: {'zipmap': False}}]})
],
DictVectorizer: [
('default', {}),
],
ExtraTreeClassifier: [
('default', {},
{'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}),
],
ExtraTreesClassifier: [
('default', {'n_estimators': 10}),
('default', {'n_estimators': 10},
{'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}),
],
ExtraTreesRegressor: [
('default', {'n_estimators': 10}),
Expand Down Expand Up @@ -117,7 +131,8 @@ def build_custom_scenarios():
}, ['cluster']),
],
HistGradientBoostingClassifier: [
('default', {'max_iter': 10}),
('default', {'max_iter': 10},
{'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}),
],
HistGradientBoostingRegressor: [
('default', {'max_iter': 10}),
Expand Down Expand Up @@ -154,9 +169,7 @@ def build_custom_scenarios():
('default', {'n_components': 2}),
],
LocalOutlierFactor: [
('novelty', {
'novelty': True,
}),
('novelty', {'novelty': True}),
],
LogisticRegression: [
('liblinear', {'solver': 'liblinear', },
Expand All @@ -168,6 +181,10 @@ def build_custom_scenarios():
{'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}],
'subset_problems': ['~b-cl-dec', '~m-cl-dec']}),
],
MLPClassifier: [
('default', {}, {'conv_options': [
{MLPClassifier: {'zipmap': False}}]}),
],
MultiOutputClassifier: [
('logreg', {
'estimator': LogisticRegression(solver='liblinear'),
Expand Down Expand Up @@ -260,9 +277,8 @@ def build_custom_scenarios():
}),
],
SGDClassifier: [
('log', {
'loss': 'log',
}),
('log', {'loss': 'log'},
{'conv_options': [{SGDClassifier: {'zipmap': False}}]}),
],
SparseRandomProjection: [
('eps95', {'eps': 0.95}),
Expand Down Expand Up @@ -293,7 +309,7 @@ def build_custom_scenarios():
('lr2', LogisticRegression(
solver='liblinear', fit_intercept=False)),
],
})
}, {'conv_options': [{VotingClassifier: {'zipmap': False}}]})
],
VotingRegressor: [
('linreg', {
Expand All @@ -313,7 +329,7 @@ def build_custom_scenarios():
('lr2', LogisticRegression(
solver='liblinear', fit_intercept=False)),
],
})
}, {'conv_options': [{StackingClassifier: {'zipmap': False}}]})
],
StackingRegressor: [
('linreg', {
Expand Down

0 comments on commit 44b29b2

Please sign in to comment.