Skip to content

Commit

Permalink
Fixes #18, add converters for xgboost (#192)
Browse files Browse the repository at this point in the history
* remove unnecessary print, add quote around filenames in some places

* replaces as_matrix by values (pandas warnings)

* changes variable name to avoid getting warnings about invalid names

* better consistency for converted, allows targetted onnx version to be None

* Revert "better consistency for converted, allows targetted onnx version to be None"

This reverts commit e257ca1.

* handle the comparison of ONNX versions in only one place

* fix bug with OneHotEncoder and scikit-learn 0.20

* release the constraint on scikit-learn (0.20.0 allowed)

* fix one type issue for Python 2.7

* add documentation to compare_strict_version

* Fixes #151, BernouilliNB converter

* Removes unused nodes in graph

* Adresses issue #143, enables build with keras 2.1.2

* Revert modifications due to a wrong merge

* update keras version

* Disable test on keras/mobilenet as it does not work

* add unit test for xception (failing)

* remove duplicate install

* skip unit test if not installed (tensorflow still not available on python 3.7)

* Fix when keras is not available

* Fix missing import

* Update test_single_operator_with_cntk_backend.py

* Set up CI with Azure Pipelines

* Update azure pipeline

* Skip a unit test if tensorflow is not installed

* merge

* missing import

* Revert "Merge branch 'master' of https://github.com/onnx/onnxmltools"

This reverts commit 178e763, reversing
changes made to 1a617ef.

* revert changes

* Revert changes

* \r

* \r

* first step in the migration of xgboost code

* XGBoost regression works

* Finalize xgboost converter

* Update README.md

* Add function has_tensorflow

* Update test_single_operator_with_cntk_backend.py

* better desgin for a unit test

* update xgboost classifier

* Delete test_keras_xception.py

* Delete requirements-deep.txt

* Delete test_keras_modebilenetv2.py

* less spaces

* lower precision for xgboost comparison tests

* disable xgboost testing on python 2
  • Loading branch information
xadupre authored and wenbingl committed Feb 14, 2019
1 parent 30d5fcf commit eef63ee
Show file tree
Hide file tree
Showing 24 changed files with 610 additions and 11 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model
2. CoreMLTools
3. Keras (version 2.0.8 or higher) with the corresponding Tensorflow version
4. LightGBM (scikit-learn interface)
5. XGBoost (scikit-learn interface)
6. libsvm

# Examples
If you want the converted ONNX model to be compatible with a certain ONNX version, please specify the target_opset parameter upon invoking the convert function. The following Keras model conversion example demonstrates this below. You can identify the mapping from ONNX Operator Sets (referred to as opsets) to ONNX releases in the [versioning documentation](https://github.com/onnx/onnx/blob/master/docs/Versioning.md#released-versions).
Expand Down
2 changes: 2 additions & 0 deletions onnxmltools/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
from .main import convert_libsvm
from .main import convert_lightgbm
from .main import convert_sklearn
from .main import convert_xgboost

4 changes: 4 additions & 0 deletions onnxmltools/convert/common/_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ class LightGbmModelContainer(CommonSklearnModelContainer):
pass


class XGBoostModelContainer(CommonSklearnModelContainer):
pass


class KerasModelContainer(RawModelContainer):

def __init__(self, keras_model):
Expand Down
4 changes: 4 additions & 0 deletions onnxmltools/convert/common/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ def __init__(self, shape=None, doc_string=''):

def to_onnx_type(self):
raise NotImplementedError()

def __repr__(self):
name = self.__class__.__name__
return "{}({}, '{}')".format(name, self.shape, self.doc_string)


class Int64Type(DataType):
Expand Down
3 changes: 3 additions & 0 deletions onnxmltools/convert/common/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import abc
import six


@six.add_metaclass(abc.ABCMeta)
class ModelContainer:
__metaclass = abc.ABCMeta
Expand Down Expand Up @@ -41,6 +42,7 @@ def add_node(self, op_type, inputs, outputs, op_domain='', op_version=1, **attrs
"""
return


@six.add_metaclass(abc.ABCMeta)
class OperatorBase:
__metaclass__ = abc.ABCMeta
Expand Down Expand Up @@ -77,6 +79,7 @@ def original_operator(self):
"""
pass


@six.add_metaclass(abc.ABCMeta)
class ScopeBase:
__metaclass__ = abc.ABCMeta
Expand Down
3 changes: 2 additions & 1 deletion onnxmltools/convert/common/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def build_from_onnx(onnx_nodes, nchw_inputs, inputs, outputs):
ln = LinkedNode(o_)
view.append(ln)
for var_ in o_.output:
assert var_map.get(var_) is None
if var_map.get(var_) is not None:
raise RuntimeError("Duplicated output name (accross all nodes) '{0}'".format(var_))
var_map[var_] = ln

additional_nodes = []
Expand Down
4 changes: 2 additions & 2 deletions onnxmltools/convert/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def xgboost_installed():
try:
_LIB.XGBoosterDumpModelEx
except AttributeError:
# The version is now recent enough even though it is version 0.6.
# The version is not recent enough even though it is version 0.6.
# You need to install xgboost from github and not from pypi.
return False
from xgboost import __version__
Expand Down Expand Up @@ -290,7 +290,7 @@ def check_input_and_output_numbers(operator, input_count_range=None, output_coun
if max_output_count is not None and len(operator.outputs) > max_output_count:
raise RuntimeError(
'For operator %s (type: %s), at most %s outputs(s) is(are) supported but we got %s output(s) which are %s' \
% (operator.full_name, operator.type, max_output_count, len(operator.outputs), operator.outputs_full_names))
% (operator.full_name, operator.type, max_output_count, len(operator.outputs), operator.output_full_names))


def check_input_and_output_types(operator, good_input_types=None, good_output_types=None):
Expand Down
4 changes: 2 additions & 2 deletions onnxmltools/convert/lightgbm/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _get_lightgbm_operator_name(model_type):
'''
Get operator name of the input argument
:param model_type: A scikit-learn object (e.g., SGDClassifier and Binarizer)
:param model_type: A lightgbm object.
:return: A string which stands for the type of the input model in our conversion framework
'''
if model_type not in lightgbm_operator_name_map:
Expand Down Expand Up @@ -60,7 +60,7 @@ def _parse_lightgbm(scope, model, inputs):
This is a delegate function. It doesn't nothing but invoke the correct parsing function according to the input
model's type.
:param scope: Scope object
:param model: A scikit-learn object (e.g., OneHotEncoder and LogisticRegression)
:param model: A lightgbm object
:param inputs: A list of variables
:return: The output variables produced by the input model
'''
Expand Down
10 changes: 10 additions & 0 deletions onnxmltools/convert/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,13 @@ def convert_sklearn(model, name=None, initial_types=None, doc_string='', target_
from skl2onnx.convert import convert_sklearn as convert_skl2onnx
return convert_skl2onnx(model, name, initial_types, doc_string, target_opset,
custom_conversion_functions, custom_shape_calculators)


def convert_xgboost(*args, **kwargs):
if not utils.xgboost_installed():
raise RuntimeError('xgboost is not installed. Please install xgboost to use this feature.')

from .xgboost.convert import convert
return convert(*args, **kwargs)


7 changes: 7 additions & 0 deletions onnxmltools/convert/xgboost/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

from .convert import convert
91 changes: 91 additions & 0 deletions onnxmltools/convert/xgboost/_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

from ..common._container import XGBoostModelContainer
from ..common._topology import *

from xgboost import XGBRegressor, XGBClassifier

xgboost_classifier_list = [XGBClassifier]

# Associate types with our operator names.
xgboost_operator_name_map = {XGBClassifier: 'XGBClassifier',
XGBRegressor: 'XGBRegressor'}


def _get_xgboost_operator_name(model_type):
'''
Get operator name of the input argument
:param model_type: A xgboost object.
:return: A string which stands for the type of the input model in our conversion framework
'''
if model_type not in xgboost_operator_name_map:
raise ValueError("No proper operator name found for '%s'" % model_type)
return xgboost_operator_name_map[model_type]


def _parse_xgboost_simple_model(scope, model, inputs):
'''
This function handles all non-pipeline models.
:param scope: Scope object
:param model: A xgboost object
:param inputs: A list of variables
:return: A list of output variables which will be passed to next stage
'''
this_operator = scope.declare_local_operator(_get_xgboost_operator_name(type(model)), model)
this_operator.inputs = inputs

if type(model) in xgboost_classifier_list:
# For classifiers, we may have two outputs, one for label and the other one for probabilities of all classes.
# Notice that their types here are not necessarily correct and they will be fixed in shape inference phase
label_variable = scope.declare_local_variable('label', FloatTensorType())
probability_map_variable = scope.declare_local_variable('probabilities', FloatTensorType())
this_operator.outputs.append(label_variable)
this_operator.outputs.append(probability_map_variable)
else:
# We assume that all scikit-learn operator can only produce a single float tensor.
variable = scope.declare_local_variable('variable', FloatTensorType())
this_operator.outputs.append(variable)
return this_operator.outputs


def _parse_xgboost(scope, model, inputs):
'''
This is a delegate function. It doesn't nothing but invoke the correct parsing function according to the input
model's type.
:param scope: Scope object
:param model: A xgboost object
:param inputs: A list of variables
:return: The output variables produced by the input model
'''
return _parse_xgboost_simple_model(scope, model, inputs)


def parse_xgboost(model, initial_types=None, target_opset=None,
custom_conversion_functions=None, custom_shape_calculators=None):

raw_model_container = XGBoostModelContainer(model)
topology = Topology(raw_model_container,
initial_types=initial_types, target_opset=target_opset,
custom_conversion_functions=custom_conversion_functions,
custom_shape_calculators=custom_shape_calculators)
scope = topology.declare_scope('__root__')

inputs = []
for var_name, initial_type in initial_types:
inputs.append(scope.declare_local_variable(var_name, initial_type))

for variable in inputs:
raw_model_container.add_input(variable)

outputs = _parse_xgboost(scope, model, inputs)

for variable in outputs:
raw_model_container.add_output(variable)

return topology
16 changes: 16 additions & 0 deletions onnxmltools/convert/xgboost/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Common function to converters and shape calculators.
"""

def get_xgb_params(xgb_node):
"""
Retrieves parameters of a model.
"""
if hasattr(xgb_node, 'kwargs'):
# XGBoost >= 0.7
params = xgb_node.get_xgb_params()
else:
# XGBoost < 0.7
params = xgb_node.__dict__

return params
44 changes: 44 additions & 0 deletions onnxmltools/convert/xgboost/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

from uuid import uuid4
from ...proto import onnx, get_opset_number_from_onnx
from ..common._topology import convert_topology
from ._parse import parse_xgboost

# Invoke the registration of all our converters and shape calculators
# from . import shape_calculators
from . import operator_converters, shape_calculators


def convert(model, name=None, initial_types=None, doc_string='', target_opset=None,
targeted_onnx=onnx.__version__, custom_conversion_functions=None,
custom_shape_calculators=None):
'''
This function produces an equivalent ONNX model of the given xgboost model.
:param model: A xgboost model
:param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py
:param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto)
:param doc_string: A string attached onto the produced ONNX model
:param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3.
:param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the
produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown.
:param custom_conversion_functions: a dictionary for specifying the user customized conversion function
:param custom_shape_calculators: a dictionary for specifying the user customized shape calculator
:return: An ONNX model (type: ModelProto) which is equivalent to the input xgboost model
'''
if initial_types is None:
raise ValueError('Initial types are required. See usage of convert(...) in \
onnxmltools.convert.xgboost.convert for details')
if name is None:
name = str(uuid4().hex)

target_opset = target_opset if target_opset else get_opset_number_from_onnx()
topology = parse_xgboost(model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators)
topology.compile()
onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
return onnx_model
Loading

0 comments on commit eef63ee

Please sign in to comment.