Skip to content
This repository was archived by the owner on Jan 13, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,144 changes: 1,572 additions & 1,572 deletions _doc/notebooks/onnx_fft.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion _doc/sphinxdoc/source/api/sklearn.rst

This file was deleted.

85 changes: 83 additions & 2 deletions _unittests/ut_onnx_conv/test_lightgbm_tree_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,23 @@
import unittest
from logging import getLogger
import copy
import json
import base64
import lzma
import numpy
from pandas import DataFrame
from pyquickhelper.pycode import ExtTestCase

try:
from pyquickhelper.pycode.unittest_cst import decompress_cst
except ImportError:
decompress_cst = lambda d: json.loads(
lzma.decompress(base64.b64decode(b"".join(d))))

from skl2onnx.common.data_types import FloatTensorType
from sklearn.datasets import load_iris
from mlprodict.onnx_conv.operator_converters.conv_lightgbm import modify_tree_for_rule_in_set
from mlprodict.onnx_conv.helpers.lgbm_helper import (
modify_tree_for_rule_in_set, restore_lgbm_info)
from mlprodict.onnx_conv.parsers.parse_lightgbm import MockWrappedLightGbmBoosterClassifier
from mlprodict.onnx_conv import register_converters, to_onnx
from mlprodict.onnxrt import OnnxInference
Expand All @@ -30,6 +41,30 @@ def count_nodes(tree, done=None):
return nb


def clean_tree(tree):
def walk_through(tree):
if 'tree_structure' in tree:
for w in walk_through(tree['tree_structure']):
yield w
yield tree
if 'left_child' in tree:
for w in walk_through(tree['left_child']):
yield w
if 'right_child' in tree:
for w in walk_through(tree['right_child']):
yield w

nodes = list(walk_through(tree3))
for node in nodes:
for k in ['split_gain', 'split_feature', 'split_index', 'leaf_count',
'internal_value', 'internal_weight', 'internal_count', 'leaf_weight']:
if k in node:
del node[k]
for k in ['leaf_value', 'leaf_value']:
if k in node:
node[k] = 0


tree2 = {'average_output': False,
'feature_names': ['c1', 'c2', 'c3', 'c4'],
'label_index': 0,
Expand Down Expand Up @@ -140,6 +175,29 @@ def count_nodes(tree, done=None):
'version': 'v2'}


# This constant by built by appling function pyquickhelper.pycode.unittest_cst.compress_cst.

tree3 = decompress_cst([
b'/Td6WFoAAATm1rRGAgAhARYAAAB0L+Wj4Ck9A2tdAD2IiodjqVNsvcJJI6C9h2Y0CbG5b7',
b'OaqsqxvLBzg7BltxogYoUzxj35qbUETbBAyJeMccezEDeIKOT1GB+I50txUuc8zkWDcp/n',
b'kx2YhORZxAyj55pXJF/xW5aySLknuTn/5cRfSL9AGF7dHdW9k8RqP5GONWx3YvvnP0tCW0',
b'lGKd5caxoNFaB5tg+je6f0s6N6QQo8wqrBPtjJ7bQf50vFrpYgkQNAEZIVutpzgE9c4o1L',
b'Uv/vJgnhQXOpk/4hOCV2q8VG+jD9oIjPINOOZ642k2QmsdWC+l3XagJnbN9dqT/4C9ehfM',
b'nf6Bw5XcRXD4rtmOyUq/ocuh1WfPinlKd/Jn0YOydq1FpH+VNSUjjPUGJbJal4Pa6jcx/Y',
b'9mcBjp9kP1pM5wkCJ52Kv12UQ/+2j+n0rUQbbqs10iFJo4h4KB/Ie/bugBLNItmNhNhDP4',
b'36Q6jCsLsXlu0gTiWZfGQapR+DJIsVKHh9GeagotXpHTwYX72KrTFwIdxgf9Y2X1EUqiJV',
b'wXdP7GprCs9QsIvCkqW59hPNStt2tyWtlSsXsnjU5e0Jn3USVHOcbwCBSpCtFlpg8tiS9m',
b'Zv1TIGj9cvEk1Ke9p6bZelvtXqHJRISJ8fCVjrqTnEjyUdPaG1wmqCyz7NFEkngrBinY7e',
b'ZMHmO1y6IhLI1zN0kq8zBHIQeqUruYgBatPI6jI585wQ6mYCobgQc7B6Ae6XlgOthATrr2',
b'oDdnIeAPeUKVMXPIq9NnwlwsyNEoTddI42NiMde8jVzVm4wwwnqrmbKlJsi5LJhRQlaEFX',
b'etzNn7llkCSwv88gYhcaDWP3Ewchse2iQDkJ0dPZhx0FB18X6wvEcwkt/H+dzTgAYOCSkr',
b'T3thNkPCvQ4keiRzHiWNzLc+NAhz5NX8BXsVQFkEyf4oUkKHjy053LBmXpHM75LBhdJmFH',
b'vqRENHF6QgiPLAjc/1NHatYLcY0VRetr55Bp2jWU+z75P2TrMkTHFnjbOEQ3p13USzVmnq',
b'3d0EUvp5Q5dUPDFAIhkH+oUkgK4lX2xlyEGh+23EqQtmkjOyKj7HPHoPZo2AjASlRTc78u',
b'1c9nWkTbwBGbZUsMmWzyjbDe/h2Yi2GvkSkIh8UKtYDlTzpT62G9Chf5N9HEfFjQWcdCEi',
b'7Y3Hx86ee03jpP42ssAADRqUIMvx3yYwABhwe+UgAA2u9V4LHEZ/sCAAAAAARZWg=='])


class TestLightGbmTreeStructure(ExtTestCase):

def setUp(self):
Expand Down Expand Up @@ -223,7 +281,6 @@ def test_onnxrt_python_lightgbm_categorical2(self):
self.assertEqual(nb2, 18)

def test_mock_lightgbm(self):

tree = copy.deepcopy(tree2)
nb1 = sum(count_nodes(t['tree_structure']) for t in tree['tree_info'])
model = MockWrappedLightGbmBoosterClassifier(tree)
Expand Down Expand Up @@ -258,6 +315,30 @@ def test_mock_lightgbm(self):
prob = DataFrame(pred["output_probability"]).values
self.assertEqual(prob.shape, (row, 2))

def test_mock_lightgbm_info(self):
tree = copy.deepcopy(tree3)
info = restore_lgbm_info(tree)
modify_tree_for_rule_in_set(tree, info=info)
expected = tree
tree = copy.deepcopy(tree3)
info = restore_lgbm_info(tree)
modify_tree_for_rule_in_set(tree, info=info)
self.assertEqual(expected, tree)

def test_mock_lightgbm_profile(self):
tree = copy.deepcopy(tree3)
info = restore_lgbm_info(tree)
self.assertIsInstance(info, list)
self.assertGreater(len(info), 1)

def g():
for _ in range(0, 100):
modify_tree_for_rule_in_set(tree, info=info)
p2 = self.profile(g)[1]
self.assertIn('cumtime', p2)
if __name__ == "__main__":
print(p2)


if __name__ == "__main__":
unittest.main()
115 changes: 113 additions & 2 deletions _unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
@brief test log(time=3s)
@brief test log(time=6s)
"""
import sys
import unittest
Expand Down Expand Up @@ -200,6 +200,116 @@ def test_onnxrt_python_lightgbm_categorical_iris(self):
values = pandas.DataFrame(got['output_probability']).values
self.assertEqualArray(exp, values[:, 1], decimal=5)

@skipif_circleci('stuck')
@unittest.skipIf(sys.platform == 'darwin', 'stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris_booster3(self):
from lightgbm import LGBMClassifier, Dataset, train as lgb_train

iris = load_iris()
X, y = iris.data, iris.target
X = (X * 10).astype(numpy.int32)
X_train, X_test, y_train, _ = train_test_split(
X, y, random_state=11)
other_x = numpy.random.randint(
0, high=10, size=(1500, X_train.shape[1]))
X_train = numpy.vstack([X_train, other_x]).astype(dtype=numpy.int32)
y_train = numpy.hstack(
[y_train, numpy.zeros(500) + 3, numpy.zeros(500) + 4,
numpy.zeros(500) + 5]).astype(dtype=numpy.int32)
self.assertEqual(y_train.shape, (X_train.shape[0], ))

# Classic
gbm = LGBMClassifier()
gbm.fit(X_train, y_train)
exp = gbm.predict_proba(X_test)
onx = to_onnx(gbm, initial_types=[
('X', Int64TensorType([None, X_train.shape[1]]))])
self.assertIn('ZipMap', str(onx))
oif = OnnxInference(onx)
got = oif.run({'X': X_test})
values = pandas.DataFrame(got['output_probability']).values
self.assertEqualArray(exp, values, decimal=5)

# categorical_feature=[0, 1]
train_data = Dataset(
X_train, label=y_train,
feature_name=['c1', 'c2', 'c3', 'c4'],
categorical_feature=['c1', 'c2'])

params = {
"boosting_type": "gbdt",
"learning_rate": 0.05,
"n_estimators": 2,
"objective": "binary",
"max_bin": 5,
"min_child_samples": 100,
'verbose': -1,
}

booster = lgb_train(params, train_data)
exp = booster.predict(X_test)

onx = to_onnx(booster, initial_types=[
('X', Int64TensorType([None, X_train.shape[1]]))])
self.assertIn('ZipMap', str(onx))
oif = OnnxInference(onx)
got = oif.run({'X': X_test})
values = pandas.DataFrame(got['output_probability']).values
self.assertEqualArray(exp, values[:, 1], decimal=5)

@skipif_circleci('stuck')
@unittest.skipIf(sys.platform == 'darwin', 'stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris_booster3_real(self):
from lightgbm import LGBMClassifier, Dataset, train as lgb_train

iris = load_iris()
X, y = iris.data, iris.target
X = (X * 10).astype(numpy.float32)
X_train, X_test, y_train, _ = train_test_split(
X, y, random_state=11)

# Classic
gbm = LGBMClassifier()
gbm.fit(X_train, y_train)
exp = gbm.predict_proba(X_test)
onx = to_onnx(gbm.booster_, initial_types=[
('X', FloatTensorType([None, X_train.shape[1]]))])
self.assertIn('ZipMap', str(onx))
oif = OnnxInference(onx)
got = oif.run({'X': X_test})
values = pandas.DataFrame(got['output_probability']).values
self.assertEqualArray(exp, values, decimal=5)

# categorical_feature=[0, 1]
train_data = Dataset(
X_train, label=y_train,
feature_name=['c1', 'c2', 'c3', 'c4'],
categorical_feature=['c1', 'c2'])

params = {
"boosting_type": "gbdt",
"learning_rate": 0.05,
"n_estimators": 2,
"objective": "multiclass",
"max_bin": 5,
"min_child_samples": 100,
'verbose': -1,
'num_class': 3,
}

booster = lgb_train(params, train_data)
exp = booster.predict(X_test)

onx = to_onnx(booster, initial_types=[
('X', FloatTensorType([None, X_train.shape[1]]))])
self.assertIn('ZipMap', str(onx))
oif = OnnxInference(onx)
got = oif.run({'X': X_test})
values = pandas.DataFrame(got['output_probability']).values
self.assertEqualArray(exp, values, decimal=5)

@skipif_circleci('stuck')
@unittest.skipIf(sys.platform == 'darwin', 'stuck')
@ignore_warnings((RuntimeWarning, UserWarning))
Expand Down Expand Up @@ -286,9 +396,10 @@ def test_lightgbm_booster_classifier(self):
'subsample_freq': 1, 'bagging_fraction': 0.5,
'feature_fraction': 0.5},
data)
model_onnx = to_onnx(model, X)
model_onnx = to_onnx(model, X, verbose=2, rewrite_ops=True)
self.assertNotEmpty(model_onnx)


if __name__ == "__main__":
# TestOnnxrtRuntimeLightGbm().test_lightgbm_booster_classifier()
unittest.main()
Loading