Fix DecisionTree, RandomForest, StopWords, Tokenizer failing in #468 …

…(sparkml converters) (#471) * enable spark on CI * update init.py * update CI
onnx · Jun 30, 2021 · 26e2429 · 26e2429
1 parent 582540e
commit 26e2429
Show file tree

Hide file tree

Showing 58 changed files with 724 additions and 677 deletions.
diff --git a/.azure-pipelines/linux-CI-nightly.yml b/.azure-pipelines/linux-CI-nightly.yml
@@ -13,14 +13,16 @@ jobs:
     vmImage: 'Ubuntu-16.04'
   strategy:
     matrix:
-      Python36-nightly:
-        python.version: '3.6'
-        ONNX_PATH: onnx==1.7.0
+      Python39-nightly:
+        python.version: '3.9'
+        ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
+        COREML_PATH: git+https://github.com/apple/coremltools@3.1
+      Python38-nightly:
+        python.version: '3.8'
         ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
       Python37-nightly:
         python.version: '3.7'
-        ONNX_PATH: onnx==1.8.0
         ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
     maxParallel: 3
@@ -43,20 +45,17 @@ jobs:
       conda install -c conda-forge cmake
       python -m pip install $(COREML_PATH)
       python -m pip install $(ONNX_PATH)
-      python -m pip install tensorflow-cpu==1.15.0
-      python -m pip install tf2onnx==1.5.6
-      python -m pip install git+https://github.com/microsoft/onnxconverter-common
-      python -m pip install git+https://github.com/onnx/keras-onnx
+      python -m pip install hummingbird-ml --no-deps
       python -m pip install -r requirements.txt
       python -m pip install -r requirements-dev.txt
       python -m pip install $(ORT_PATH)
       python -m pip install pytest
     displayName: 'Install dependencies'
 
   - script: |
-      python -c "import onnxconverter_common"
-      python -c "import onnxruntime"
       pip install -e .
+      python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
+      python -c "import onnxruntime;print(onnxruntime.__version__)"
       pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
     displayName: 'pytest - onnxmltools'
 

diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -10,15 +10,27 @@ jobs:
 
 - job: 'Test'
   pool:
-    vmImage: 'Ubuntu-16.04'
+    vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python36-141-RT050:
-        python.version: '3.6'
-        ONNX_PATH: onnx==1.4.1
-        ONNXRT_PATH: onnxruntime==0.5.0
+      Python39-190-RT180-xgb11:
+        python.version: '3.9'
+        ONNX_PATH: onnx==1.9.0
+        ONNXRT_PATH: onnxruntime==1.8.0
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
-        xgboost.version: ''
+        xgboost.version: '>=1.2'
+      Python38-181-RT170-xgb11:
+        python.version: '3.8'
+        ONNX_PATH: onnx==1.8.1
+        ONNXRT_PATH: onnxruntime==1.7.0
+        COREML_PATH: git+https://github.com/apple/coremltools@3.1
+        xgboost.version: '>=1.2'
+      Python37-180-RT160-xgb11:
+        python.version: '3.7'
+        ONNX_PATH: onnx==1.8.0
+        ONNXRT_PATH: onnxruntime==1.6.0
+        COREML_PATH: git+https://github.com/apple/coremltools@3.1
+        xgboost.version: '>=1.2'
       Python37-150-RT100:
         python.version: '3.7'
         ONNX_PATH: onnx==1.5.0
@@ -49,18 +61,6 @@ jobs:
         ONNXRT_PATH: onnxruntime==1.6.0
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
         xgboost.version: '>=1.0'
-      Python37-180-RT160-xgb11:
-        python.version: '3.7'
-        ONNX_PATH: onnx==1.8.0
-        ONNXRT_PATH: onnxruntime==1.6.0
-        COREML_PATH: git+https://github.com/apple/coremltools@3.1
-        xgboost.version: '>=1.2'
-      Python38-181-RT170-xgb11:
-        python.version: '3.7'
-        ONNX_PATH: onnx==1.8.1
-        ONNXRT_PATH: onnxruntime==1.7.0
-        COREML_PATH: git+https://github.com/apple/coremltools@3.1
-        xgboost.version: '>=1.2'
     maxParallel: 3
 
   steps:
@@ -81,11 +81,9 @@ jobs:
       conda install -c conda-forge cmake
       pip install $(COREML_PATH)
       pip install $(ONNX_PATH)
-      python -m pip install tensorflow-cpu==1.15.0
-      python -m pip install tf2onnx==1.5.6
-      python -m pip install git+https://github.com/microsoft/onnxconverter-common
-      python -m pip install git+https://github.com/onnx/keras-onnx
+      pip install hummingbird-ml --no-deps
       pip install -r requirements.txt
+      pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
       pip install -r requirements-dev.txt
       pip install xgboost$(xgboost.version)
       pip install $(ONNXRT_PATH)
@@ -101,9 +99,10 @@ jobs:
     displayName: 'local installation'
 
   - script: |
-      python -c "import onnxconverter_common"
-      python -c "import onnxruntime"
-      pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
+      export PYTHONPATH=.
+      python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
+      python -c "import onnxruntime;print(onnxruntime.__version__)"
+      pytest tests --doctest-modules --junitxml=junit/test-results.xml
     displayName: 'pytest - onnxmltools'
 
   - task: PublishTestResults@2

diff --git a/.azure-pipelines/win32-CI-nightly.yml b/.azure-pipelines/win32-CI-nightly.yml
@@ -10,17 +10,19 @@ jobs:
 
 - job: 'Test'
   pool:
-    vmImage: 'vs2017-win2016'
+    vmImage: 'windows-latest'
   strategy:
     matrix:
-      Python36-nightly:
-        python.version: '3.6'
-        ONNX_PATH: onnx==1.7.0
+      Python39-nightly:
+        python.version: '3.9'
+        ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
+        COREML_PATH: git+https://github.com/apple/coremltools@3.1
+      Python38-nightly:
+        python.version: '3.8'
         ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
       Python37-nightly:
         python.version: '3.7'
-        ONNX_PATH: onnx==1.8.0
         ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
     maxParallel: 3
@@ -40,22 +42,18 @@ jobs:
   - script: |
       call activate py$(python.version)
       python -m pip install --upgrade pip numpy
-      echo Test numpy installation... && python -c "import numpy"
       pip install %COREML_PATH% %ONNX_PATH%
-      python -m pip install tensorflow-cpu==1.15.0
-      python -m pip install tf2onnx==1.5.6
-      python -m pip install git+https://github.com/microsoft/onnxconverter-common
-      python -m pip install git+https://github.com/onnx/keras-onnx
-      echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
+      pip install humming-bird-ml --no-deps
       pip install -r requirements.txt
       pip install -r requirements-dev.txt
       pip install %ONNXRT_PATH%
-      echo Test onnxruntime installation... && python -c "import onnxruntime"
     displayName: 'Install dependencies'
 
   - script: |
       call activate py$(python.version)
       pip install -e .
+      python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
+      python -c "import onnxruntime;print(onnxruntime.__version__)"
       python -m pytest tests  --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
     displayName: 'pytest - onnxmltools'
 

diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
@@ -10,20 +10,27 @@ jobs:
 
 - job: 'Test'
   pool:
-    vmImage: 'vs2017-win2016'
+    vmImage: 'windows-latest'
   strategy:
     matrix:
-      Python36-141-RT030:
-        python.version: '3.6'
-        ONNX_PATH: onnx==1.4.1
-        ONNXRT_PATH: onnxruntime==0.3.0
+      Python39-190-RT180:
+        python.version: '3.9'
+        ONNX_PATH: onnx==1.9.0
+        ONNXRT_PATH: onnxruntime==1.8.0
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
         sklearn.version: ''
 
-      Python37-150-RT040:
+      Python38-181-RT170:
+        python.version: '3.8'
+        ONNX_PATH: onnx==1.8.1
+        ONNXRT_PATH: onnxruntime==1.7.0
+        COREML_PATH: git+https://github.com/apple/coremltools@3.1
+        sklearn.version: ''
+
+      Python37-180-RT160:
         python.version: '3.7'
-        ONNX_PATH: onnx==1.5.0
-        ONNXRT_PATH: onnxruntime==0.4.0
+        ONNX_PATH: onnx==1.8.0
+        ONNXRT_PATH: onnxruntime==1.6.0
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
         sklearn.version: ''
 
@@ -41,20 +48,6 @@ jobs:
         COREML_PATH: git+https://github.com/apple/coremltools@3.1
         sklearn.version: ''
 
-      Python37-180-RT160:
-        python.version: '3.7'
-        ONNX_PATH: onnx==1.8.0
-        ONNXRT_PATH: onnxruntime==1.6.0
-        COREML_PATH: git+https://github.com/apple/coremltools@3.1
-        sklearn.version: ''
-
-      Python38-181-RT170:
-        python.version: '3.8'
-        ONNX_PATH: onnx==1.8.1
-        ONNXRT_PATH: onnxruntime==1.7.0
-        COREML_PATH: git+https://github.com/apple/coremltools@3.1
-        sklearn.version: ''
-
     maxParallel: 3
 
   steps:
@@ -74,17 +67,12 @@ jobs:
       python -m pip install --upgrade pip numpy
       echo Test numpy installation... && python -c "import numpy"
       python -m pip install %COREML_PATH% %ONNX_PATH%
-      python -m pip install tensorflow-cpu==1.15.0
-      python -m pip install tf2onnx==1.5.6
-      python -m pip install git+https://github.com/microsoft/onnxconverter-common
-      python -m pip install git+https://github.com/onnx/keras-onnx
-      echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
+      python -m pip install humming-bird-ml --no-deps
       python -m pip install -r requirements.txt
+      python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
       python -m pip install -r requirements-dev.txt
       python -m pip install %ONNXRT_PATH%
       python -m pip install scikit-learn$(sklearn.version)
-      echo Test onnxruntime installation... && python -c "import onnxruntime"
-      echo "debug environment" && path
       python -m pip show pytest
     displayName: 'Install dependencies'
 
@@ -96,7 +84,10 @@ jobs:
   - script: |
       call activate py$(python.version)
       python -m pip install -e .
-      python -m pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
+      export PYTHONPATH=.
+      python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
+      python -c "import onnxruntime;print(onnxruntime.__version__)"
+      python -m pytest tests --doctest-modules --junitxml=junit/test-results.xml
     displayName: 'pytest - onnxmltools'
 
   - task: PublishTestResults@2

diff --git a/README.md b/README.md
@@ -9,7 +9,6 @@
 
 # Introduction
 ONNXMLTools enables you to convert models from different machine learning toolkits into [ONNX](https://onnx.ai). Currently the following toolkits are supported:
-* Keras (a wrapper of [keras2onnx converter](https://github.com/onnx/keras-onnx/))
 * Tensorflow (a wrapper of [tf2onnx converter](https://github.com/onnx/tensorflow-onnx/))
 * scikit-learn (a wrapper of [skl2onnx converter](https://github.com/onnx/sklearn-onnx/))
 * Apple Core ML

diff --git a/docs/index.rst b/docs/index.rst
@@ -32,7 +32,6 @@ Currently the following toolkits are supported:
 * `XGBoost <https://xgboost.readthedocs.io/en/latest/>`_
 
 *onnxmltools* leverages existing converting library,
-`keras-onnx <https://github.com/onnx/keras-onnx>`_,
 `sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_,
 `tensorflow-onnx <https://github.com/onnx/tensorflow-onnx>`_
 and implements converters for the other libraries.

diff --git a/onnxmltools/convert/common/utils.py b/onnxmltools/convert/common/utils.py
@@ -1,3 +1,17 @@
-# SPDX-License-Identifier: Apache-2.0
-
-from onnxconverter_common.utils import *  # noqa
+# SPDX-License-Identifier: Apache-2.0
+
+try:
+    from onnxconverter_common.utils import hummingbird_installed  # noqa
+except ImportError:
+    def hummingbird_installed():
+        """
+        Checks that *Hummingbird* is available.
+        """
+        try:
+            import hummingbird.ml  # noqa: F401
+
+            return True
+        except ImportError:
+            return False
+
+from onnxconverter_common.utils import *  # noqa
diff --git a/onnxmltools/convert/lightgbm/convert.py b/onnxmltools/convert/lightgbm/convert.py
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from uuid import uuid4
+import onnx
 import lightgbm
-import warnings
 from onnxconverter_common.onnx_ex import get_maximum_opset_supported
-import onnx
 from ..common._topology import convert_topology
 from ..common.utils import hummingbird_installed
 from ._parse import parse_lightgbm, WrappedBooster
@@ -57,19 +56,12 @@ def convert(model, name=None, initial_types=None, doc_string='', target_opset=No
     onnx_ml_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
 
     if without_onnx_ml:
-        from hummingbird.ml import convert
-        from hummingbird.ml import constants
-
-        if target_opset == 13:
-            warnings.warn('Pytorch-onnx does not support opset 13 yet, use opset 12 instead.')
-            target_opset = 12
-
+        from hummingbird.ml import convert, constants
         extra_config = {}
-        extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
+        # extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
         extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = name
         extra_config[constants.ONNX_TARGET_OPSET] = target_opset
         onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config).model
-
         return onnx_model
 
     return onnx_ml_model
diff --git a/onnxmltools/convert/sparkml/operator_converters/min_hash_lsh.py b/onnxmltools/convert/sparkml/operator_converters/min_hash_lsh.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from onnx import onnx_pb as onnx_proto
-from ...common._apply_operation import apply_add, apply_mul, apply_sum, apply_div, apply_sub, \
-    apply_concat, apply_cast
+from ...common._apply_operation import (
+    apply_add, apply_mul, apply_sum, apply_div, apply_sub,
+    apply_concat, apply_cast)
 from ...common._registration import register_converter, register_shape_calculator
-from ...common.data_types import FloatTensorType
+from ...common.data_types import FloatTensorType, DoubleTensorType
 from ...common.utils import check_input_and_output_numbers, check_input_and_output_types
-from ..utils import SparkMlConversionError
 from .tree_ensemble_common import save_read_sparkml_model_data
 
 MinHashLSH_HASH_PRIME = 2038074743
@@ -23,10 +23,7 @@ def get_rand_coefficients(operator):
 
 
 def convert_min_hash_lsh(scope, operator, container):
-    spark = operator.raw_params['SparkSession']
     int_type = onnx_proto.TensorProto.INT64
-    if spark.version < '2.4.0':
-        int_type = onnx_proto.TensorProto.INT32
     rand_coefficients = get_rand_coefficients(operator)
     coeffs = []
     for i in range(0, len(rand_coefficients), 2):
@@ -75,11 +72,10 @@ def convert_min_hash_lsh(scope, operator, container):
 
 def calculate_min_hash_lsh_output_shapes(operator):
     check_input_and_output_numbers(operator, output_count_range=1)
-    check_input_and_output_types(operator, good_input_types=[FloatTensorType])
+    check_input_and_output_types(
+        operator, good_input_types=[FloatTensorType, DoubleTensorType])
 
     N = operator.inputs[0].type.shape[0]
-    if N != 1:
-        raise SparkMlConversionError('MinHashLSHModel converter cannot handle batch size of more than 1')
     C = len(get_rand_coefficients(operator)) // 2
     operator.outputs[0].type = FloatTensorType([N, C])