Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
Experiment around random forest
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Aug 20, 2019
1 parent 6f0f472 commit 2880f49
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 5 deletions.
2 changes: 1 addition & 1 deletion _doc/notebooks/onnx_float32_and_64.ipynb
Expand Up @@ -1029,7 +1029,7 @@
}
],
"source": [
"from mlprodict.onnxrt.side_by_side import side_by_side_by_values\n",
"from mlprodict.onnxrt.validate.side_by_side import side_by_side_by_values\n",
"sbs = side_by_side_by_values([(oinf32, {'X': X_test.astype(numpy.float32)}),\n",
" (oinf64, {'X': X_test.astype(numpy.float64)})])\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion _doc/notebooks/onnx_sbs.ipynb
Expand Up @@ -1338,7 +1338,7 @@
}
],
"source": [
"from mlprodict.onnxrt.side_by_side import side_by_side_by_values\n",
"from mlprodict.onnxrt.validate.side_by_side import side_by_side_by_values\n",
"from pandas import DataFrame\n",
"\n",
"def run_sbs(r1, r2, r3, x):\n",
Expand Down
110 changes: 110 additions & 0 deletions _unittests/ut_onnxrt/test_onnxrt_python_runtime_ml_tree_rf.py
@@ -0,0 +1,110 @@
"""
@brief test log(time=2s)
"""
import unittest
from logging import getLogger
import warnings
import numpy
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.utils.testing import ignore_warnings
from sklearn.ensemble import RandomForestRegressor
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import ExtTestCase
from mlprodict.onnxrt import OnnxInference, to_onnx
from mlprodict.onnxrt.validate import enumerate_validated_operator_opsets


class TestOnnxrtPythonRuntimeMlTreeRF(ExtTestCase):

def setUp(self):
logger = getLogger('skl2onnx')
logger.disabled = True

def onnxrt_python_RandomForestRegressor_dtype(self, dtype, n=37, full=False):
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, _ = train_test_split(X, y,
random_state=11 if not full else 13)
X_test = X_test.astype(dtype)
if full:
clr = RandomForestRegressor(n_jobs=1)
else:
clr = RandomForestRegressor(n_estimators=10, n_jobs=1, max_depth=4)
clr.fit(X_train, y_train)

model_def = to_onnx(clr, X_train.astype(dtype),
dtype=dtype, rewrite_ops=True)
oinf = OnnxInference(model_def)

tt = oinf.sequence_[0].ops_

text = "\n".join(map(lambda x: str(x.ops_), oinf.sequence_))
self.assertIn("TreeEnsembleRegressor", text)
if full:
n = 34
X_test = X_test[n:n + 5]
else:
n = 37
X_test = X_test[n:n + 5]
X_test = numpy.vstack([X_test, X_test[:1].copy() * 1.01,
X_test[:1].copy() * 0.99])
y = oinf.run({'X': X_test})
self.assertEqual(list(sorted(y)), ['variable'])
lexp = clr.predict(X_test)
vals = tt.rt_.nodes_values_
ori = []
for tt in clr.estimators_:
ori.extend(tt.tree_.threshold)
ori.sort()
vals.sort()
tori = numpy.array(ori)
tval = numpy.array(vals)
tval = tval[tori > -2]
tori = tori[tori > -2]
self.assertEqualArray(lexp, y['variable'])

@ignore_warnings(category=(UserWarning, RuntimeWarning, DeprecationWarning))
def test_onnxrt_python_RandomForestRegressor(self):
try:
self.onnxrt_python_RandomForestRegressor_dtype(numpy.float32)
except AssertionError as e:
self.assertIn("Max absolute difference", str(e))
self.onnxrt_python_RandomForestRegressor_dtype(numpy.float64)

@ignore_warnings(category=(UserWarning, RuntimeWarning, DeprecationWarning))
def test_onnxrt_python_RandomForestRegressor_full(self):
try:
self.onnxrt_python_RandomForestRegressor_dtype(
numpy.float32, full=True)
except AssertionError as e:
self.assertIn("Max absolute difference", str(e))
try:
self.onnxrt_python_RandomForestRegressor_dtype(
numpy.float64, full=True)
except AssertionError as e:
# still issues
warnings.warn(e)

@ignore_warnings(category=(UserWarning, RuntimeWarning, DeprecationWarning))
def test_rt_RandomForestRegressor_python(self):
fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
logger = getLogger('skl2onnx')
logger.disabled = True
verbose = 1 if __name__ == "__main__" else 0

debug = True
buffer = []

def myprint(*args, **kwargs):
buffer.append(" ".join(map(str, args)))

rows = list(enumerate_validated_operator_opsets(
verbose, models={"RandomForestRegressor"}, opset_min=11, opset_max=11, fLOG=myprint,
runtime='python', debug=debug, filter_exp=lambda m, p: p == "~b-reg-64"))
self.assertGreater(len(rows), 1)
self.assertGreater(len(buffer), 1 if debug else 0)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion mlprodict/onnxrt/ops_cpu/_op.py
Expand Up @@ -78,7 +78,7 @@ def __str__(self):
if k in {'desc', 'onnx_node'}:
continue
if 'a' <= k[0] <= 'z' and k[-1] != '_':
atts.append(' {0}={1}'.format(k, v))
atts.append(' {0}={1},'.format(k, v))
atts.append(')')
return "\n".join(atts)

Expand Down
3 changes: 3 additions & 0 deletions mlprodict/onnxrt/ops_cpu/op_tree_ensemble_regressor_.cpp
Expand Up @@ -338,6 +338,8 @@ void RuntimeTreeEnsembleRegressor<NTYPE>::compute_gil_free(
for (size_t j = 0; j < roots_.size(); j++) {
//walk each tree from its root
ProcessTreeNode(scores, roots_[j], x_data, current_weight_0);
// printf(" i=%d: tree %d: %f, %f, %f\n", i, j,
// std::get<0>(scores[0]), std::get<1>(scores[0]), std::get<2>(scores[0]));
}
//find aggregate, could use a heap here if there are many classes
std::vector<NTYPE> outputs;
Expand All @@ -356,6 +358,7 @@ void RuntimeTreeEnsembleRegressor<NTYPE>::compute_gil_free(
val += std::get<2>(scores[j]); // third element of tuple is max
}
}
// printf("-i=%d val=%f\n", i, val);
outputs.push_back(val);
}
write_scores(outputs, post_transform_, (NTYPE*)Z_.data(i * n_targets_), -1);
Expand Down
2 changes: 1 addition & 1 deletion mlprodict/onnxrt/validate/validate.py
Expand Up @@ -302,7 +302,7 @@ def enumerate_compatible_opset(model, opset_min=9, opset_max=None,

def fct_conv(itt=inst, it=init_types[0][1], ops=opset, options=conv_options): # pylint: disable=W0102
return to_onnx(itt, it, target_opset=ops, options=options,
dtype=init_types[0][1], rewrite_ops=runtime == "python")
dtype=init_types[0][1], rewrite_ops=runtime in ('', None, 'python'))

if verbose >= 2 and fLOG is not None:
fLOG("[enumerate_compatible_opset] conversion to onnx")
Expand Down
5 changes: 4 additions & 1 deletion mlprodict/onnxrt/validate/validate_helper.py
Expand Up @@ -177,7 +177,10 @@ def to_onnx(model, X=None, name=None, initial_types=None,
@return converted model
The function rewrites function *to_onnx* from :epkg:`sklearn-onnx`
but may changes a few converters if
but may changes a few converters if *rewrite_ops* is True.
For example, :epkg:`ONNX` only supports *TreeEnsembleRegressor*
for float but not for double. It becomes available
if ``dtype=numpy.float64`` and ``rewrite_ops=True``.
"""
from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
from skl2onnx.algebra.type_helper import guess_initial_types
Expand Down

0 comments on commit 2880f49

Please sign in to comment.