Skip to content

Commit

Permalink
Fix DecisionTree, RandomForest, StopWords, Tokenizer failing in #468
Browse files Browse the repository at this point in the history
…(sparkml converters) (#471)

* enable spark on CI
* update init.py
* update CI
  • Loading branch information
xadupre committed Jun 30, 2021
1 parent 582540e commit 26e2429
Show file tree
Hide file tree
Showing 58 changed files with 724 additions and 677 deletions.
19 changes: 9 additions & 10 deletions .azure-pipelines/linux-CI-nightly.yml
Expand Up @@ -13,14 +13,16 @@ jobs:
vmImage: 'Ubuntu-16.04'
strategy:
matrix:
Python36-nightly:
python.version: '3.6'
ONNX_PATH: onnx==1.7.0
Python39-nightly:
python.version: '3.9'
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
Python38-nightly:
python.version: '3.8'
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
Python37-nightly:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
maxParallel: 3
Expand All @@ -43,20 +45,17 @@ jobs:
conda install -c conda-forge cmake
python -m pip install $(COREML_PATH)
python -m pip install $(ONNX_PATH)
python -m pip install tensorflow-cpu==1.15.0
python -m pip install tf2onnx==1.5.6
python -m pip install git+https://github.com/microsoft/onnxconverter-common
python -m pip install git+https://github.com/onnx/keras-onnx
python -m pip install hummingbird-ml --no-deps
python -m pip install -r requirements.txt
python -m pip install -r requirements-dev.txt
python -m pip install $(ORT_PATH)
python -m pip install pytest
displayName: 'Install dependencies'
- script: |
python -c "import onnxconverter_common"
python -c "import onnxruntime"
pip install -e .
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
Expand Down
49 changes: 24 additions & 25 deletions .azure-pipelines/linux-conda-CI.yml
Expand Up @@ -10,15 +10,27 @@ jobs:

- job: 'Test'
pool:
vmImage: 'Ubuntu-16.04'
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python36-141-RT050:
python.version: '3.6'
ONNX_PATH: onnx==1.4.1
ONNXRT_PATH: onnxruntime==0.5.0
Python39-190-RT180-xgb11:
python.version: '3.9'
ONNX_PATH: onnx==1.9.0
ONNXRT_PATH: onnxruntime==1.8.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: ''
xgboost.version: '>=1.2'
Python38-181-RT170-xgb11:
python.version: '3.8'
ONNX_PATH: onnx==1.8.1
ONNXRT_PATH: onnxruntime==1.7.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: '>=1.2'
Python37-180-RT160-xgb11:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: '>=1.2'
Python37-150-RT100:
python.version: '3.7'
ONNX_PATH: onnx==1.5.0
Expand Down Expand Up @@ -49,18 +61,6 @@ jobs:
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: '>=1.0'
Python37-180-RT160-xgb11:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: '>=1.2'
Python38-181-RT170-xgb11:
python.version: '3.7'
ONNX_PATH: onnx==1.8.1
ONNXRT_PATH: onnxruntime==1.7.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
xgboost.version: '>=1.2'
maxParallel: 3

steps:
Expand All @@ -81,11 +81,9 @@ jobs:
conda install -c conda-forge cmake
pip install $(COREML_PATH)
pip install $(ONNX_PATH)
python -m pip install tensorflow-cpu==1.15.0
python -m pip install tf2onnx==1.5.6
python -m pip install git+https://github.com/microsoft/onnxconverter-common
python -m pip install git+https://github.com/onnx/keras-onnx
pip install hummingbird-ml --no-deps
pip install -r requirements.txt
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
pip install -r requirements-dev.txt
pip install xgboost$(xgboost.version)
pip install $(ONNXRT_PATH)
Expand All @@ -101,9 +99,10 @@ jobs:
displayName: 'local installation'
- script: |
python -c "import onnxconverter_common"
python -c "import onnxruntime"
pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
export PYTHONPATH=.
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
pytest tests --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
- task: PublishTestResults@2
Expand Down
22 changes: 10 additions & 12 deletions .azure-pipelines/win32-CI-nightly.yml
Expand Up @@ -10,17 +10,19 @@ jobs:

- job: 'Test'
pool:
vmImage: 'vs2017-win2016'
vmImage: 'windows-latest'
strategy:
matrix:
Python36-nightly:
python.version: '3.6'
ONNX_PATH: onnx==1.7.0
Python39-nightly:
python.version: '3.9'
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
Python38-nightly:
python.version: '3.8'
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
Python37-nightly:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
COREML_PATH: git+https://github.com/apple/coremltools@3.1
maxParallel: 3
Expand All @@ -40,22 +42,18 @@ jobs:
- script: |
call activate py$(python.version)
python -m pip install --upgrade pip numpy
echo Test numpy installation... && python -c "import numpy"
pip install %COREML_PATH% %ONNX_PATH%
python -m pip install tensorflow-cpu==1.15.0
python -m pip install tf2onnx==1.5.6
python -m pip install git+https://github.com/microsoft/onnxconverter-common
python -m pip install git+https://github.com/onnx/keras-onnx
echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
pip install humming-bird-ml --no-deps
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install %ONNXRT_PATH%
echo Test onnxruntime installation... && python -c "import onnxruntime"
displayName: 'Install dependencies'
- script: |
call activate py$(python.version)
pip install -e .
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
python -m pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
Expand Down
51 changes: 21 additions & 30 deletions .azure-pipelines/win32-conda-CI.yml
Expand Up @@ -10,20 +10,27 @@ jobs:

- job: 'Test'
pool:
vmImage: 'vs2017-win2016'
vmImage: 'windows-latest'
strategy:
matrix:
Python36-141-RT030:
python.version: '3.6'
ONNX_PATH: onnx==1.4.1
ONNXRT_PATH: onnxruntime==0.3.0
Python39-190-RT180:
python.version: '3.9'
ONNX_PATH: onnx==1.9.0
ONNXRT_PATH: onnxruntime==1.8.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-150-RT040:
Python38-181-RT170:
python.version: '3.8'
ONNX_PATH: onnx==1.8.1
ONNXRT_PATH: onnxruntime==1.7.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-180-RT160:
python.version: '3.7'
ONNX_PATH: onnx==1.5.0
ONNXRT_PATH: onnxruntime==0.4.0
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Expand All @@ -41,20 +48,6 @@ jobs:
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-180-RT160:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python38-181-RT170:
python.version: '3.8'
ONNX_PATH: onnx==1.8.1
ONNXRT_PATH: onnxruntime==1.7.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

maxParallel: 3

steps:
Expand All @@ -74,17 +67,12 @@ jobs:
python -m pip install --upgrade pip numpy
echo Test numpy installation... && python -c "import numpy"
python -m pip install %COREML_PATH% %ONNX_PATH%
python -m pip install tensorflow-cpu==1.15.0
python -m pip install tf2onnx==1.5.6
python -m pip install git+https://github.com/microsoft/onnxconverter-common
python -m pip install git+https://github.com/onnx/keras-onnx
echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
python -m pip install humming-bird-ml --no-deps
python -m pip install -r requirements.txt
python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install -r requirements-dev.txt
python -m pip install %ONNXRT_PATH%
python -m pip install scikit-learn$(sklearn.version)
echo Test onnxruntime installation... && python -c "import onnxruntime"
echo "debug environment" && path
python -m pip show pytest
displayName: 'Install dependencies'
Expand All @@ -96,7 +84,10 @@ jobs:
- script: |
call activate py$(python.version)
python -m pip install -e .
python -m pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
export PYTHONPATH=.
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
python -m pytest tests --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
- task: PublishTestResults@2
Expand Down
1 change: 0 additions & 1 deletion README.md
Expand Up @@ -9,7 +9,6 @@

# Introduction
ONNXMLTools enables you to convert models from different machine learning toolkits into [ONNX](https://onnx.ai). Currently the following toolkits are supported:
* Keras (a wrapper of [keras2onnx converter](https://github.com/onnx/keras-onnx/))
* Tensorflow (a wrapper of [tf2onnx converter](https://github.com/onnx/tensorflow-onnx/))
* scikit-learn (a wrapper of [skl2onnx converter](https://github.com/onnx/sklearn-onnx/))
* Apple Core ML
Expand Down
1 change: 0 additions & 1 deletion docs/index.rst
Expand Up @@ -32,7 +32,6 @@ Currently the following toolkits are supported:
* `XGBoost <https://xgboost.readthedocs.io/en/latest/>`_

*onnxmltools* leverages existing converting library,
`keras-onnx <https://github.com/onnx/keras-onnx>`_,
`sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_,
`tensorflow-onnx <https://github.com/onnx/tensorflow-onnx>`_
and implements converters for the other libraries.
Expand Down
20 changes: 17 additions & 3 deletions onnxmltools/convert/common/utils.py
@@ -1,3 +1,17 @@
# SPDX-License-Identifier: Apache-2.0

from onnxconverter_common.utils import * # noqa
# SPDX-License-Identifier: Apache-2.0

try:
from onnxconverter_common.utils import hummingbird_installed # noqa
except ImportError:
def hummingbird_installed():
"""
Checks that *Hummingbird* is available.
"""
try:
import hummingbird.ml # noqa: F401

return True
except ImportError:
return False

from onnxconverter_common.utils import * # noqa
14 changes: 3 additions & 11 deletions onnxmltools/convert/lightgbm/convert.py
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: Apache-2.0

from uuid import uuid4
import onnx
import lightgbm
import warnings
from onnxconverter_common.onnx_ex import get_maximum_opset_supported
import onnx
from ..common._topology import convert_topology
from ..common.utils import hummingbird_installed
from ._parse import parse_lightgbm, WrappedBooster
Expand Down Expand Up @@ -57,19 +56,12 @@ def convert(model, name=None, initial_types=None, doc_string='', target_opset=No
onnx_ml_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)

if without_onnx_ml:
from hummingbird.ml import convert
from hummingbird.ml import constants

if target_opset == 13:
warnings.warn('Pytorch-onnx does not support opset 13 yet, use opset 12 instead.')
target_opset = 12

from hummingbird.ml import convert, constants
extra_config = {}
extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
# extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = name
extra_config[constants.ONNX_TARGET_OPSET] = target_opset
onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config).model

return onnx_model

return onnx_ml_model
16 changes: 6 additions & 10 deletions onnxmltools/convert/sparkml/operator_converters/min_hash_lsh.py
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: Apache-2.0

from onnx import onnx_pb as onnx_proto
from ...common._apply_operation import apply_add, apply_mul, apply_sum, apply_div, apply_sub, \
apply_concat, apply_cast
from ...common._apply_operation import (
apply_add, apply_mul, apply_sum, apply_div, apply_sub,
apply_concat, apply_cast)
from ...common._registration import register_converter, register_shape_calculator
from ...common.data_types import FloatTensorType
from ...common.data_types import FloatTensorType, DoubleTensorType
from ...common.utils import check_input_and_output_numbers, check_input_and_output_types
from ..utils import SparkMlConversionError
from .tree_ensemble_common import save_read_sparkml_model_data

MinHashLSH_HASH_PRIME = 2038074743
Expand All @@ -23,10 +23,7 @@ def get_rand_coefficients(operator):


def convert_min_hash_lsh(scope, operator, container):
spark = operator.raw_params['SparkSession']
int_type = onnx_proto.TensorProto.INT64
if spark.version < '2.4.0':
int_type = onnx_proto.TensorProto.INT32
rand_coefficients = get_rand_coefficients(operator)
coeffs = []
for i in range(0, len(rand_coefficients), 2):
Expand Down Expand Up @@ -75,11 +72,10 @@ def convert_min_hash_lsh(scope, operator, container):

def calculate_min_hash_lsh_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[FloatTensorType])
check_input_and_output_types(
operator, good_input_types=[FloatTensorType, DoubleTensorType])

N = operator.inputs[0].type.shape[0]
if N != 1:
raise SparkMlConversionError('MinHashLSHModel converter cannot handle batch size of more than 1')
C = len(get_rand_coefficients(operator)) // 2
operator.outputs[0].type = FloatTensorType([N, C])

Expand Down

0 comments on commit 26e2429

Please sign in to comment.