diff --git a/_doc/notebooks/onnx_discrepencies.ipynb b/_doc/notebooks/onnx_discrepencies.ipynb
new file mode 100644
index 000000000..4cda7369d
--- /dev/null
+++ b/_doc/notebooks/onnx_discrepencies.ipynb
@@ -0,0 +1,1548 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Discrepencies with ONNX\n",
+ "\n",
+ "The notebook shows one example where the conversion leads with discrepencies if default options are used. It converts a pipeline with two steps, a scaler followed by a tree."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The bug this notebook is tracking does not always appear, it has a better chance to happen with integer features but that's not always the case. The notebook must be run again in that case."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from jyquickhelper import add_notebook_menu\n",
+ "add_notebook_menu()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data and first model\n",
+ "\n",
+ "We take a random datasets with mostly integers."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import math\n",
+ "import numpy\n",
+ "from sklearn.datasets import make_regression\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "X, y = make_regression(10000, 10)\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
+ "\n",
+ "Xi_train, yi_train = X_train.copy(), y_train.copy()\n",
+ "Xi_test, yi_test = X_test.copy(), y_test.copy()\n",
+ "for i in range(X.shape[1]):\n",
+ " Xi_train[:, i] = (Xi_train[:, i] * math.pi * 2 ** i).astype(numpy.int64)\n",
+ " Xi_test[:, i] = (Xi_test[:, i] * math.pi * 2 ** i).astype(numpy.int64)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Pipeline(steps=[('scaler', StandardScaler()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=10))])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.pipeline import Pipeline\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.tree import DecisionTreeRegressor\n",
+ "\n",
+ "max_depth = 10\n",
+ "\n",
+ "model = Pipeline([\n",
+ " ('scaler', StandardScaler()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=max_depth))\n",
+ "])\n",
+ "\n",
+ "model.fit(Xi_train, yi_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-157.23383218, 18.05104732, -121.42539005, 96.50791123,\n",
+ " -138.59042507])"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(Xi_test[:5])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Other models:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model2 = Pipeline([\n",
+ " ('scaler', StandardScaler()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=max_depth))\n",
+ "])\n",
+ "model3 = Pipeline([\n",
+ " ('scaler', StandardScaler()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=3))\n",
+ "])\n",
+ "\n",
+ "\n",
+ "models = [\n",
+ " ('bug', Xi_test.astype(numpy.float32), model),\n",
+ " ('no scaler', Xi_test.astype(numpy.float32), \n",
+ " DecisionTreeRegressor(max_depth=max_depth).fit(Xi_train, yi_train)),\n",
+ " ('float', X_test.astype(numpy.float32),\n",
+ " model2.fit(X_train, y_train)),\n",
+ " ('max_depth=3', X_test.astype(numpy.float32),\n",
+ " model3.fit(X_train, y_train))\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Conversion to ONNX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy\n",
+ "from mlprodict.onnx_conv import to_onnx\n",
+ "\n",
+ "onx = to_onnx(model, X_train[:1].astype(numpy.float32))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "OnnxInference(...)\n",
+ " def compiled_run(dict_inputs):\n",
+ " # inputs\n",
+ " X = dict_inputs['X']\n",
+ " (variable1, ) = n0_scaler(X)\n",
+ " (variable, ) = n1_treeensembleregressor(variable1)\n",
+ " return {\n",
+ " 'variable': variable,\n",
+ " }\n"
+ ]
+ }
+ ],
+ "source": [
+ "from mlprodict.onnxrt import OnnxInference\n",
+ "\n",
+ "oinfpy = OnnxInference(onx, runtime=\"python_compiled\")\n",
+ "print(oinfpy)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " runtime | \n",
+ " diff | \n",
+ " v[1853] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sklearn | \n",
+ " 0.000000 | \n",
+ " 33.825028 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " python | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " python_compiled | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " onnxruntime1 | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " runtime diff v[1853]\n",
+ "0 sklearn 0.000000 33.825028\n",
+ "1 python 131.483138 165.308167\n",
+ "2 python_compiled 131.483138 165.308167\n",
+ "3 onnxruntime1 131.483138 165.308167"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas\n",
+ "\n",
+ "X32 = Xi_test.astype(numpy.float32)\n",
+ "y_skl = model.predict(X32)\n",
+ "\n",
+ "obs = [dict(runtime='sklearn', diff=0)]\n",
+ "for runtime in ['python', 'python_compiled', 'onnxruntime1']:\n",
+ " oinf = OnnxInference(onx, runtime=runtime)\n",
+ " y_onx = oinf.run({'X': X32})['variable']\n",
+ " delta = numpy.abs(y_skl - y_onx.ravel())\n",
+ " am = delta.argmax()\n",
+ " obs.append(dict(runtime=runtime, diff=delta.max()))\n",
+ " obs[-1]['v[%d]' % am] = y_onx.ravel()[am]\n",
+ " obs[0]['v[%d]' % am] = y_skl.ravel()[am]\n",
+ "\n",
+ "pandas.DataFrame(obs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The pipeline shows huge discrepencies. They appear for a pipeline *StandardScaler* + *DecisionTreeRegressor* applied in integer features. They disappear if floats are used, or if the scaler is removed. The bug also disappear if the tree is not big enough (max_depth=4 instread of 5)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " runtime | \n",
+ " diff | \n",
+ " name | \n",
+ " v[1853] | \n",
+ " v[1567] | \n",
+ " v[1015] | \n",
+ " v[2] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sklearn | \n",
+ " 0.000000 | \n",
+ " sklearn | \n",
+ " 33.825028 | \n",
+ " 288.589432 | \n",
+ " 361.193069 | \n",
+ " -140.648473 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " python | \n",
+ " 131.483138 | \n",
+ " bug | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " python_compiled | \n",
+ " 131.483138 | \n",
+ " bug | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " onnxruntime1 | \n",
+ " 131.483138 | \n",
+ " bug | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " python | \n",
+ " 0.000015 | \n",
+ " no scaler | \n",
+ " NaN | \n",
+ " 288.589447 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " python_compiled | \n",
+ " 0.000015 | \n",
+ " no scaler | \n",
+ " NaN | \n",
+ " 288.589447 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " onnxruntime1 | \n",
+ " 0.000015 | \n",
+ " no scaler | \n",
+ " NaN | \n",
+ " 288.589447 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " python | \n",
+ " 0.000015 | \n",
+ " float | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 361.193054 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " python_compiled | \n",
+ " 0.000015 | \n",
+ " float | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 361.193054 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " onnxruntime1 | \n",
+ " 0.000015 | \n",
+ " float | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 361.193054 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " python | \n",
+ " 0.000005 | \n",
+ " max_depth=3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " -140.648468 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " python_compiled | \n",
+ " 0.000005 | \n",
+ " max_depth=3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " -140.648468 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " onnxruntime1 | \n",
+ " 0.000005 | \n",
+ " max_depth=3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " -140.648468 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " runtime diff name v[1853] v[1567] \\\n",
+ "0 sklearn 0.000000 sklearn 33.825028 288.589432 \n",
+ "1 python 131.483138 bug 165.308167 NaN \n",
+ "2 python_compiled 131.483138 bug 165.308167 NaN \n",
+ "3 onnxruntime1 131.483138 bug 165.308167 NaN \n",
+ "4 python 0.000015 no scaler NaN 288.589447 \n",
+ "5 python_compiled 0.000015 no scaler NaN 288.589447 \n",
+ "6 onnxruntime1 0.000015 no scaler NaN 288.589447 \n",
+ "7 python 0.000015 float NaN NaN \n",
+ "8 python_compiled 0.000015 float NaN NaN \n",
+ "9 onnxruntime1 0.000015 float NaN NaN \n",
+ "10 python 0.000005 max_depth=3 NaN NaN \n",
+ "11 python_compiled 0.000005 max_depth=3 NaN NaN \n",
+ "12 onnxruntime1 0.000005 max_depth=3 NaN NaN \n",
+ "\n",
+ " v[1015] v[2] \n",
+ "0 361.193069 -140.648473 \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "5 NaN NaN \n",
+ "6 NaN NaN \n",
+ "7 361.193054 NaN \n",
+ "8 361.193054 NaN \n",
+ "9 361.193054 NaN \n",
+ "10 NaN -140.648468 \n",
+ "11 NaN -140.648468 \n",
+ "12 NaN -140.648468 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obs = [dict(runtime='sklearn', diff=0, name='sklearn')]\n",
+ "for name, x32, mod in models:\n",
+ " for runtime in ['python', 'python_compiled', 'onnxruntime1']:\n",
+ " lonx = to_onnx(mod, x32[:1])\n",
+ " loinf = OnnxInference(lonx, runtime=runtime)\n",
+ " y_skl = mod.predict(X32)\n",
+ " y_onx = loinf.run({'X': X32})['variable']\n",
+ " delta = numpy.abs(y_skl - y_onx.ravel())\n",
+ " am = delta.argmax()\n",
+ " obs.append(dict(runtime=runtime, diff=delta.max(), name=name))\n",
+ " obs[-1]['v[%d]' % am] = y_onx.ravel()[am]\n",
+ " obs[0]['v[%d]' % am] = y_skl.ravel()[am]\n",
+ "\n",
+ "df = pandas.DataFrame(obs)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | name | \n",
+ " bug | \n",
+ " float | \n",
+ " max_depth=3 | \n",
+ " no scaler | \n",
+ " sklearn | \n",
+ "
\n",
+ " \n",
+ " | runtime | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | onnxruntime1 | \n",
+ " 131.483138 | \n",
+ " 0.000015 | \n",
+ " 0.000005 | \n",
+ " 0.000015 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | python | \n",
+ " 131.483138 | \n",
+ " 0.000015 | \n",
+ " 0.000005 | \n",
+ " 0.000015 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | python_compiled | \n",
+ " 131.483138 | \n",
+ " 0.000015 | \n",
+ " 0.000005 | \n",
+ " 0.000015 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | sklearn | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "name bug float max_depth=3 no scaler sklearn\n",
+ "runtime \n",
+ "onnxruntime1 131.483138 0.000015 0.000005 0.000015 NaN\n",
+ "python 131.483138 0.000015 0.000005 0.000015 NaN\n",
+ "python_compiled 131.483138 0.000015 0.000005 0.000015 NaN\n",
+ "sklearn NaN NaN NaN NaN 0.0"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.pivot(\"runtime\", \"name\", \"diff\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Other way to converter\n",
+ "\n",
+ "ONNX does not support double for TreeEnsembleRegressor but that a new operator TreeEnsembleRegressorDouble was implemented into *mlprodict*. We need to update the conversion."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The mlprodict extension is already loaded. To reload it, use:\n",
+ " %reload_ext mlprodict\n"
+ ]
+ }
+ ],
+ "source": [
+ "%load_ext mlprodict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "onx32 = to_onnx(model, X_train[:1].astype(numpy.float32))\n",
+ "onx64 = to_onnx(model, X_train[:1].astype(numpy.float64), \n",
+ " dtype=numpy.float64, rewrite_ops=True)\n",
+ "%onnxview onx64"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " runtime | \n",
+ " diff | \n",
+ " v[1853] | \n",
+ " v[0] | \n",
+ " real | \n",
+ " error | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sklearn | \n",
+ " 0.000000 | \n",
+ " 33.825028 | \n",
+ " -157.233832 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " python | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " float | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " python | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " -157.233832 | \n",
+ " double | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " python_compiled | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " float | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " python_compiled | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " -157.233832 | \n",
+ " double | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " onnxruntime1 | \n",
+ " 131.483138 | \n",
+ " 165.308167 | \n",
+ " NaN | \n",
+ " float | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " onnxruntime1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " double | \n",
+ " Unable to create InferenceSession due to '[ONN... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " runtime diff v[1853] v[0] real \\\n",
+ "0 sklearn 0.000000 33.825028 -157.233832 NaN \n",
+ "1 python 131.483138 165.308167 NaN float \n",
+ "2 python 0.000000 NaN -157.233832 double \n",
+ "3 python_compiled 131.483138 165.308167 NaN float \n",
+ "4 python_compiled 0.000000 NaN -157.233832 double \n",
+ "5 onnxruntime1 131.483138 165.308167 NaN float \n",
+ "6 onnxruntime1 NaN NaN NaN double \n",
+ "\n",
+ " error \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "5 NaN \n",
+ "6 Unable to create InferenceSession due to '[ONN... "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X32 = Xi_test.astype(numpy.float32)\n",
+ "X64 = Xi_test.astype(numpy.float64)\n",
+ "\n",
+ "obs = [dict(runtime='sklearn', diff=0)]\n",
+ "for runtime in ['python', 'python_compiled', 'onnxruntime1']:\n",
+ " for name, onx, xr in [('float', onx32, X32), ('double', onx64, X64)]:\n",
+ " try:\n",
+ " oinf = OnnxInference(onx, runtime=runtime)\n",
+ " except Exception as e:\n",
+ " obs.append(dict(runtime=runtime, error=str(e), real=name))\n",
+ " continue\n",
+ " y_skl = model.predict(xr)\n",
+ " y_onx = oinf.run({'X': xr})['variable']\n",
+ " delta = numpy.abs(y_skl - y_onx.ravel())\n",
+ " am = delta.argmax()\n",
+ " obs.append(dict(runtime=runtime, diff=delta.max(), real=name))\n",
+ " obs[-1]['v[%d]' % am] = y_onx.ravel()[am]\n",
+ " obs[0]['v[%d]' % am] = y_skl.ravel()[am]\n",
+ "\n",
+ "pandas.DataFrame(obs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We see that the use of double removes the discrepencies."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## OnnxPipeline\n",
+ "\n",
+ "Another way to reduce the number of discrepencies is to use a pipeline which converts every steps into ONNX before training the next one. That way, every steps is either trained on the inputs, either trained on the outputs produced by ONNX. Let's see how it works."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\base.py:209: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
+ " FutureWarning)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "OnnxPipeline(steps=[('scaler',\n",
+ " OnnxTransformer(onnx_bytes=b'\\x08\\x06\\x12\\x08skl2onnx\\x1a\\x051.7.0\"\\x07ai.onnx(\\x002\\x00:\\xf6\\x01\\n\\xa6\\x01\\n\\x01X\\x12\\x08variable\\x1a\\x06Scaler\"\\x06Scaler*=\\n\\x06offset=\\xf7P5\\xbc=E\\xd8p==u\\x02\\x9a\\xbd=\\x07_\\x98==\\x90\\xc5\\xa3\\xbe=\\x97I\\x87?=R\\xdd\\x81@=\\xafJ\\xaf\\xc0=\\x90\\xc5s@=\\xcf\\xad\\xbd@\\xa0\\x01\\x06*<\\n\\x05scale=\\xb...xb8>=\\xab4.>=z\\x9f\\xa9==V\\x95#==d\\xcd\\xa3<=;\\x7f$<=\\xae\\'\\xa5;=d\\x8c$;=0g\\xa3:=\\xa5\\x84\":\\xa0\\x01\\x06:\\nai.onnx.ml\\x12\\x1emlprodict_ONNX(StandardScaler)Z\\x11\\n\\x01X\\x12\\x0c\\n\\n\\x08\\x01\\x12\\x06\\n\\x00\\n\\x02\\x08\\nb\\x18\\n\\x08variable\\x12\\x0c\\n\\n\\x08\\x01\\x12\\x06\\n\\x00\\n\\x02\\x08\\nB\\x0e\\n\\nai.onnx.ml\\x10\\x01')),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=10))])"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from mlprodict.sklapi import OnnxPipeline\n",
+ "\n",
+ "model_onx = OnnxPipeline([\n",
+ " ('scaler', StandardScaler()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=max_depth))\n",
+ "])\n",
+ "model_onx.fit(Xi_train, yi_train)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We see that the first steps was replaced by an object *OnnxTransformer* which wraps an ONNX file into a transformer following the *scikit-learn* API. The initial steps are still available."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('scaler', StandardScaler()), ('dt', DecisionTreeRegressor(max_depth=10))]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_onx.raw_steps_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "models = [\n",
+ " ('bug', Xi_test.astype(numpy.float32), model),\n",
+ " ('OnnxPipeline', Xi_test.astype(numpy.float32), model_onx),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " runtime | \n",
+ " diff | \n",
+ " name | \n",
+ " v[260] | \n",
+ " v[309] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sklearn | \n",
+ " 0.000000 | \n",
+ " sklearn | \n",
+ " -191.618456 | \n",
+ " 270.969924 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " python | \n",
+ " 186.296484 | \n",
+ " bug | \n",
+ " -5.321973 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " python_compiled | \n",
+ " 186.296484 | \n",
+ " bug | \n",
+ " -5.321973 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " onnxruntime1 | \n",
+ " 186.296484 | \n",
+ " bug | \n",
+ " -5.321973 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " python | \n",
+ " 0.000015 | \n",
+ " OnnxPipeline | \n",
+ " NaN | \n",
+ " 270.969910 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " python_compiled | \n",
+ " 0.000015 | \n",
+ " OnnxPipeline | \n",
+ " NaN | \n",
+ " 270.969910 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " onnxruntime1 | \n",
+ " 0.000015 | \n",
+ " OnnxPipeline | \n",
+ " NaN | \n",
+ " 270.969910 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " runtime diff name v[260] v[309]\n",
+ "0 sklearn 0.000000 sklearn -191.618456 270.969924\n",
+ "1 python 186.296484 bug -5.321973 NaN\n",
+ "2 python_compiled 186.296484 bug -5.321973 NaN\n",
+ "3 onnxruntime1 186.296484 bug -5.321973 NaN\n",
+ "4 python 0.000015 OnnxPipeline NaN 270.969910\n",
+ "5 python_compiled 0.000015 OnnxPipeline NaN 270.969910\n",
+ "6 onnxruntime1 0.000015 OnnxPipeline NaN 270.969910"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obs = [dict(runtime='sklearn', diff=0, name='sklearn')]\n",
+ "for name, x32, mod in models:\n",
+ " for runtime in ['python', 'python_compiled', 'onnxruntime1']:\n",
+ " lonx = to_onnx(mod, x32[:1])\n",
+ " loinf = OnnxInference(lonx, runtime=runtime)\n",
+ " y_skl = model_onx.predict(X32) # model_onx is the new baseline\n",
+ " y_onx = loinf.run({'X': X32})['variable']\n",
+ " delta = numpy.abs(y_skl - y_onx.ravel())\n",
+ " am = delta.argmax()\n",
+ " obs.append(dict(runtime=runtime, diff=delta.max(), name=name))\n",
+ " obs[-1]['v[%d]' % am] = y_onx.ravel()[am]\n",
+ " obs[0]['v[%d]' % am] = y_skl.ravel()[am]\n",
+ "\n",
+ "df = pandas.DataFrame(obs)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Training the next steps based on ONNX outputs is better. This is not completely satisfactory... Let's check the accuracy."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.7483581090213741, 0.746017898986315)"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.score(Xi_test, yi_test), model_onx.score(Xi_test, yi_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Pretty close."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Final explanation: StandardScalerFloat\n",
+ "\n",
+ "We proposed two ways to have an ONNX pipeline which produces the same prediction as *scikit-learn*. Let's now replace the StandardScaler by a new one which outputs float and not double. It turns out that class *StandardScaler* computes ``X /= self.scale_`` but ONNX does ``X *= self.scale_inv_``. We need to implement this exact same operator with float32 to remove all discrepencies."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Pipeline(steps=[('scaler', StandardScalerFloat()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=10))])"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "class StandardScalerFloat(StandardScaler):\n",
+ " \n",
+ " def __init__(self, with_mean=True, with_std=True):\n",
+ " StandardScaler.__init__(self, with_mean=with_mean, with_std=with_std)\n",
+ " \n",
+ " def fit(self, X, y=None):\n",
+ " StandardScaler.fit(self, X, y)\n",
+ " if self.scale_ is not None:\n",
+ " self.scale_inv_ = (1. / self.scale_).astype(numpy.float32)\n",
+ " return self\n",
+ " \n",
+ " def transform(self, X):\n",
+ " X = X.copy()\n",
+ " if self.with_mean:\n",
+ " X -= self.mean_\n",
+ " if self.with_std:\n",
+ " X *= self.scale_inv_\n",
+ " return X\n",
+ "\n",
+ " \n",
+ "model_float = Pipeline([\n",
+ " ('scaler', StandardScalerFloat()),\n",
+ " ('dt', DecisionTreeRegressor(max_depth=max_depth))\n",
+ "])\n",
+ "\n",
+ "model_float.fit(Xi_train.astype(numpy.float32), yi_train.astype(numpy.float32))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unable to find a shape calculator for type ''.\n",
+ "It usually means the pipeline being converted contains a\n",
+ "transformer or a predictor with no corresponding converter\n",
+ "implemented in sklearn-onnx. If the converted is implemented\n",
+ "in another library, you need to register\n",
+ "the converted so that it can be used by sklearn-onnx (function\n",
+ "update_registered_converter). If the model is not yet covered\n",
+ "by sklearn-onnx, you may raise an issue to\n",
+ "https://github.com/onnx/sklearn-onnx/issues\n",
+ "to get the converter implemented or even contribute to the\n",
+ "project. If the model is a custom model, a new converter must\n",
+ "be implemented. Examples can be found in the gallery.\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "try:\n",
+ " onx_float = to_onnx(model_float, Xi_test[:1].astype(numpy.float))\n",
+ "except RuntimeError as e:\n",
+ " print(e)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We need to register a new converter so that *sklearn-onnx* knows how to convert the new scaler. We reuse the existing converters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from skl2onnx import update_registered_converter\n",
+ "from skl2onnx.operator_converters.scaler_op import convert_sklearn_scaler\n",
+ "from skl2onnx.shape_calculators.scaler import calculate_sklearn_scaler_output_shapes\n",
+ "\n",
+ "\n",
+ "update_registered_converter(\n",
+ " StandardScalerFloat, \"SklearnStandardScalerFloat\",\n",
+ " calculate_sklearn_scaler_output_shapes,\n",
+ " convert_sklearn_scaler)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "models = [\n",
+ " ('bug', Xi_test.astype(numpy.float32), model),\n",
+ " ('FloatPipeline', Xi_test.astype(numpy.float32), model_float),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " runtime | \n",
+ " diff | \n",
+ " name | \n",
+ " v[71] | \n",
+ " v[114] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sklearn | \n",
+ " 0.000000 | \n",
+ " sklearn | \n",
+ " -5.321973 | \n",
+ " 300.935196 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " python | \n",
+ " 186.296481 | \n",
+ " bug | \n",
+ " -191.618454 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " python_compiled | \n",
+ " 186.296481 | \n",
+ " bug | \n",
+ " -191.618454 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " onnxruntime1 | \n",
+ " 186.296481 | \n",
+ " bug | \n",
+ " -191.618454 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " python | \n",
+ " 0.000015 | \n",
+ " FloatPipeline | \n",
+ " NaN | \n",
+ " 300.935181 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " python_compiled | \n",
+ " 0.000015 | \n",
+ " FloatPipeline | \n",
+ " NaN | \n",
+ " 300.935181 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " onnxruntime1 | \n",
+ " 0.000015 | \n",
+ " FloatPipeline | \n",
+ " NaN | \n",
+ " 300.935181 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " runtime diff name v[71] v[114]\n",
+ "0 sklearn 0.000000 sklearn -5.321973 300.935196\n",
+ "1 python 186.296481 bug -191.618454 NaN\n",
+ "2 python_compiled 186.296481 bug -191.618454 NaN\n",
+ "3 onnxruntime1 186.296481 bug -191.618454 NaN\n",
+ "4 python 0.000015 FloatPipeline NaN 300.935181\n",
+ "5 python_compiled 0.000015 FloatPipeline NaN 300.935181\n",
+ "6 onnxruntime1 0.000015 FloatPipeline NaN 300.935181"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obs = [dict(runtime='sklearn', diff=0, name='sklearn')]\n",
+ "for name, x32, mod in models:\n",
+ " for runtime in ['python', 'python_compiled', 'onnxruntime1']:\n",
+ " lonx = to_onnx(mod, x32[:1])\n",
+ " loinf = OnnxInference(lonx, runtime=runtime)\n",
+ " y_skl = model_float.predict(X32) # we use model_float as a baseline\n",
+ " y_onx = loinf.run({'X': X32})['variable']\n",
+ " delta = numpy.abs(y_skl - y_onx.ravel())\n",
+ " am = delta.argmax()\n",
+ " obs.append(dict(runtime=runtime, diff=delta.max(), name=name))\n",
+ " obs[-1]['v[%d]' % am] = y_onx.ravel()[am]\n",
+ " obs[0]['v[%d]' % am] = y_skl.ravel()[am]\n",
+ "\n",
+ "df = pandas.DataFrame(obs)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That means than the differences between ``float32(X / Y)`` and ``float32(X) * float32(1 / Y)`` are big enough to select a different path in the decision tree. ``float32(X) / float32(Y)`` and ``float32(X) * float32(1 / Y)`` are also different enough to trigger a different path. Let's illustrate that on example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 1.9073486e-06 1.4210854715202004e-14\n",
+ "1 1.8626451e-09 1.3877787807814457e-17\n",
+ "2 0.0009765625 1.4551915228366852e-11\n"
+ ]
+ }
+ ],
+ "source": [
+ "a1 = numpy.random.randn(100, 2) * 10\n",
+ "a2 = a1.copy()\n",
+ "a2[:, 1] *= 1000\n",
+ "a3 = a1.copy()\n",
+ "a3[:, 0] *= 1000\n",
+ "\n",
+ "for i, a in enumerate([a1, a2, a3]):\n",
+ " a = a.astype(numpy.float32)\n",
+ " max_diff32 = numpy.max([\n",
+ " numpy.abs(numpy.float32(x[0]) / numpy.float32(x[1]) - \n",
+ " numpy.float32(x[0]) * (numpy.float32(1) / numpy.float32(x[1])))\n",
+ " for x in a])\n",
+ " max_diff64 = numpy.max([\n",
+ " numpy.abs(numpy.float64(x[0]) / numpy.float64(x[1]) - \n",
+ " numpy.float64(x[0]) * (numpy.float64(1) / numpy.float64(x[1])))\n",
+ " for x in a])\n",
+ " print(i, max_diff32, max_diff64)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The last random set shows very big differences, obviously big enough to trigger a different path in the graph. The difference for double could probably be significant in some cases, not enough on this example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/_doc/sphinxdoc/source/api/sklapi.rst b/_doc/sphinxdoc/source/api/sklapi.rst
index 824b92437..659d6e4e2 100644
--- a/_doc/sphinxdoc/source/api/sklapi.rst
+++ b/_doc/sphinxdoc/source/api/sklapi.rst
@@ -6,5 +6,17 @@ This is the main class which makes it easy to insert
to use the prediction from an :epkg:`ONNX` files into a :epkg:`scikit-learn`
pipeline.
+.. contents::
+ :local:
+
+OnnxTransformer
++++++++++++++++
+
.. autosignature:: mlprodict.sklapi.onnx_transformer.OnnxTransformer
:members:
+
+OnnxPipeline
+++++++++++++
+
+.. autosignature:: mlprodict.sklapi.onnx_pipeline.OnnxPipeline
+ :members:
diff --git a/_doc/sphinxdoc/source/conf.py b/_doc/sphinxdoc/source/conf.py
index d2fde23d6..5dd5ac846 100644
--- a/_doc/sphinxdoc/source/conf.py
+++ b/_doc/sphinxdoc/source/conf.py
@@ -78,6 +78,7 @@
'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/',
'make_scorer': 'https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html',
'Minkowski distance': 'https://en.wikipedia.org/wiki/Minkowski_distance',
+ 'mlinsights': '',
'mlprodict': 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/index.html',
'openmp': 'https://www.openmp.org/',
'ONNX': 'https://onnx.ai/',
@@ -97,5 +98,6 @@
'run_asv.sh': 'https://github.com/sdpython/mlprodict/blob/master/bin/run_asv.sh',
'Rust': 'https://www.rust-lang.org/',
'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx',
+ 'TransferTransformer': 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/transfer_transformer.html',
'xgboost': "https://xgboost.readthedocs.io/en/latest/",
})
diff --git a/_unittests/ut_documentation/test_run_notebooks_onnx_discrepencies.py b/_unittests/ut_documentation/test_run_notebooks_onnx_discrepencies.py
new file mode 100644
index 000000000..e94905058
--- /dev/null
+++ b/_unittests/ut_documentation/test_run_notebooks_onnx_discrepencies.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""
+@brief test log(time=30s)
+"""
+import os
+import unittest
+from pyquickhelper.loghelper import fLOG
+from pyquickhelper.ipythonhelper import test_notebook_execution_coverage
+from pyquickhelper.pycode import (
+ add_missing_development_version, ExtTestCase
+)
+import mlprodict
+
+
+class TestNotebookOnnxDiscrepencies(ExtTestCase):
+
+ def setUp(self):
+ add_missing_development_version(["jyquickhelper"], __file__, hide=True)
+
+ def test_notebook_onnx_discrenpencies(self):
+ fLOG(
+ __file__,
+ self._testMethodName,
+ OutputPrint=__name__ == "__main__")
+
+ self.assertNotEmpty(mlprodict is not None)
+ folder = os.path.join(os.path.dirname(__file__),
+ "..", "..", "_doc", "notebooks")
+ test_notebook_execution_coverage(__file__, "onnx_discrepencies", folder,
+ this_module_name="mlprodict", fLOG=fLOG)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py b/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
index d8bb010fa..eb07181a7 100644
--- a/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
+++ b/_unittests/ut_onnx_conv/test_onnxrt_runtime_lightgbm.py
@@ -6,7 +6,7 @@
import numpy
import pandas
from lightgbm import LGBMClassifier, Dataset, train as lgb_train
-from pyquickhelper.pycode import ExtTestCase, skipif_circleci
+from pyquickhelper.pycode import ExtTestCase, skipif_circleci, ignore_warnings
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from skl2onnx.common.data_types import (
@@ -26,6 +26,7 @@ def setUp(self):
register_converters()
@skipif_circleci('stuck')
+ @ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical(self):
X = pandas.DataFrame({"A": numpy.random.permutation(['a', 'b', 'c', 'd'] * 75), # str
@@ -76,6 +77,7 @@ def test_onnxrt_python_lightgbm_categorical(self):
# self.assertEqualArray(exp, df.values, decimal=6)
@skipif_circleci('stuck')
+ @ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris(self):
iris = load_iris()
X, y = iris.data, iris.target
@@ -131,6 +133,7 @@ def test_onnxrt_python_lightgbm_categorical_iris(self):
self.assertEqualArray(exp, values[:, 1], decimal=5)
@skipif_circleci('stuck')
+ @ignore_warnings((RuntimeWarning, UserWarning))
def test_onnxrt_python_lightgbm_categorical_iris_dataframe(self):
iris = load_iris()
X, y = iris.data, iris.target
diff --git a/_unittests/ut_onnxrt/test_optim_onnx_identity.py b/_unittests/ut_onnxrt/test_optim_onnx_identity.py
index 515e89e30..13c75552e 100644
--- a/_unittests/ut_onnxrt/test_optim_onnx_identity.py
+++ b/_unittests/ut_onnxrt/test_optim_onnx_identity.py
@@ -30,8 +30,8 @@ def test_onnx_remove_identities(self):
'input', op_version=get_opset_number_from_onnx())
cdist = onnx_squareform_pdist(
cop, dtype=numpy.float32, op_version=get_opset_number_from_onnx())
- cop2 = OnnxIdentity(cdist, output_names=[
- 'cdist'], op_version=get_opset_number_from_onnx())
+ cop2 = OnnxIdentity(cdist, output_names=['cdist'],
+ op_version=get_opset_number_from_onnx())
model_def = cop2.to_onnx(
{'input': FloatTensorType()},
@@ -143,7 +143,7 @@ def onnx_test_knn_single_regressor(self, dtype, n_targets=1, debug=False,
self.assertIn('subgraphs_optim', stats)
def test_onnx_test_knn_single_regressor32(self):
- self.onnx_test_knn_single_regressor(numpy.float32, expected=[2, 1])
+ self.onnx_test_knn_single_regressor(numpy.float32, expected=[1, 1])
if __name__ == "__main__":
diff --git a/_unittests/ut_onnxrt/test_sklearn_helper.py b/_unittests/ut_onnxrt/test_sklearn_helper.py
index f1de4453c..cbfaceddf 100644
--- a/_unittests/ut_onnxrt/test_sklearn_helper.py
+++ b/_unittests/ut_onnxrt/test_sklearn_helper.py
@@ -122,7 +122,7 @@ def test_statistics_pipeline_sgd(self):
clr.fit(X_train, y_train)
onx = to_onnx(clr, X_train[:1].astype(numpy.float32))
ostats = onnx_statistics(onx)
- for k, v in {'nnodes': 9, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
+ for k, v in {'nnodes': 8, 'doc_string': '', 'domain': 'ai.onnx', 'model_version': 0,
'producer_name': 'skl2onnx', 'ai.onnx.ml': 1}.items():
self.assertEqual(ostats[k], v)
self.assertIn('', ostats)
diff --git a/_unittests/ut_sklapi/test_onnx_pipeline.py b/_unittests/ut_sklapi/test_onnx_pipeline.py
new file mode 100644
index 000000000..e3c57bb23
--- /dev/null
+++ b/_unittests/ut_sklapi/test_onnx_pipeline.py
@@ -0,0 +1,138 @@
+"""
+@brief test log(time=4s)
+"""
+import unittest
+import numpy
+import onnxruntime
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import load_iris
+from sklearn.linear_model import LogisticRegression
+from sklearn.mixture import GaussianMixture
+from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlinsights.mlmodel import TransferTransformer
+from mlprodict.onnx_conv import to_onnx
+from mlprodict.onnx_conv.register import _register_converters_mlinsights
+from mlprodict.onnxrt import OnnxInference
+from mlprodict.sklapi import OnnxPipeline, OnnxTransformer
+from mlprodict.tools import get_opset_number_from_onnx
+
+
+class TestOnnxPipeline(ExtTestCase):
+
+ def test_pipeline_iris(self):
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ pipe = OnnxPipeline([
+ ('pca', PCA(n_components=2)),
+ ('no', StandardScaler()),
+ ('lr', LogisticRegression())],
+ enforce_float32=True,
+ op_version=get_opset_number_from_onnx())
+ pipe.fit(X, y)
+ pipe.fit(X, y)
+ self.assertTrue(hasattr(pipe, 'raw_steps_'))
+ self.assertEqual(len(pipe.steps), 3)
+ self.assertEqual(len(pipe.raw_steps_), 3)
+ self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+ self.assertIsInstance(pipe.steps[1][1], OnnxTransformer)
+
+ X = X.astype(numpy.float32)
+ model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+ options={id(pipe): {'zipmap': False}})
+ sess = OnnxInference(model_def)
+ res = sess.run({'X': X})
+ self.assertEqualArray(res["label"], pipe.predict(X))
+ self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+ def test_transfer_transformer(self):
+ _register_converters_mlinsights(True)
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ pipe = TransferTransformer(StandardScaler(), trainable=True)
+ pipe.fit(X, y)
+ model_def = to_onnx(pipe, X[:1])
+ sess = OnnxInference(model_def)
+ res = sess.run({'X': X})
+ exp = pipe.transform(X)
+ self.assertEqualArray(exp, res['variable'], decimal=5)
+
+ def test_transfer_logistic_regression(self):
+ _register_converters_mlinsights(True)
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ pipe = TransferTransformer(
+ LogisticRegression(solver='liblinear'), trainable=True)
+ pipe.fit(X, y)
+ model_def = to_onnx(pipe, X[:1])
+ sess = OnnxInference(model_def)
+ res = sess.run({'X': X})
+ exp = pipe.transform(X)
+ self.assertEqualArray(exp, res['probabilities'], decimal=5)
+
+ def test_pipeline_pickable(self):
+ _register_converters_mlinsights(True)
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ pipe = OnnxPipeline([
+ ('gm', TransferTransformer(StandardScaler(), trainable=True)),
+ ('lr', LogisticRegression())],
+ enforce_float32=True,
+ op_version=get_opset_number_from_onnx(),
+ options={'gm__score_samples': True})
+ pipe.fit(X, y)
+ pipe.fit(X, y)
+
+ self.assertTrue(hasattr(pipe, 'raw_steps_'))
+ self.assertEqual(len(pipe.steps), 2)
+ self.assertEqual(len(pipe.raw_steps_), 2)
+ self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+
+ X = X.astype(numpy.float32)
+ model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+ options={id(pipe): {'zipmap': False}})
+ sess = OnnxInference(model_def)
+ res = sess.run({'X': X})
+ self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
+ self.assertEqualArray(res["label"], pipe.predict(X))
+ self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+ @ignore_warnings(warns=FutureWarning)
+ def test_pipeline_pickable_options(self):
+ _register_converters_mlinsights(True)
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ pipe = OnnxPipeline([
+ ('gm', TransferTransformer(
+ GaussianMixture(n_components=2),
+ trainable=True, method='predict_proba')),
+ ('lr', LogisticRegression())],
+ enforce_float32=True,
+ op_version=get_opset_number_from_onnx(),
+ options={'gm__score_samples': True,
+ 'lr__zipmap': False})
+ pipe.fit(X, y)
+ pipe.fit(X, y)
+
+ self.assertTrue(hasattr(pipe, 'raw_steps_'))
+ self.assertEqual(len(pipe.steps), 2)
+ self.assertEqual(len(pipe.raw_steps_), 2)
+ self.assertIsInstance(pipe.steps[0][1], OnnxTransformer)
+
+ X = X.astype(numpy.float32)
+ model_def = to_onnx(pipe, X[:1], target_opset=pipe.op_version,
+ options={id(pipe): {'zipmap': False}})
+ sess = OnnxInference(model_def, runtime="python_compiled")
+ self.assertIn("'probabilities': probabilities,", str(sess))
+ sess = onnxruntime.InferenceSession(model_def.SerializeToString())
+ r = sess.run(None, {'X': X})
+ self.assertEqual(len(r), 2)
+ sess = OnnxInference(model_def)
+ res = sess.run({'X': X})
+ self.assertEqual(list(sorted(res)), ['label', 'probabilities'])
+ self.assertEqualArray(res["label"], pipe.predict(X))
+ self.assertEqualArray(res["probabilities"], pipe.predict_proba(X))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/_unittests/ut_sklapi/test_onnx_transformer.py b/_unittests/ut_sklapi/test_onnx_transformer.py
index 000931027..d42ea503c 100644
--- a/_unittests/ut_sklapi/test_onnx_transformer.py
+++ b/_unittests/ut_sklapi/test_onnx_transformer.py
@@ -18,7 +18,7 @@
from mlprodict.tools import get_opset_number_from_onnx
-class TestInferenceSessionSklearn(ExtTestCase):
+class TestOnnxTransformer(ExtTestCase):
def setUp(self):
logger = getLogger('skl2onnx')
diff --git a/mlprodict/asv_benchmark/_create_asv_helper.py b/mlprodict/asv_benchmark/_create_asv_helper.py
index bcfd439f2..fde366d8f 100644
--- a/mlprodict/asv_benchmark/_create_asv_helper.py
+++ b/mlprodict/asv_benchmark/_create_asv_helper.py
@@ -157,8 +157,13 @@ def _sklearn_subfolder(model):
Returns the list of subfolders for a model.
"""
mod = model.__module__
+ if mod is not None and mod.startswith('mlinsights'):
+ return ['mlinsights', model.__name__]
spl = mod.split('.')
- pos = spl.index('sklearn')
+ try:
+ pos = spl.index('sklearn')
+ except ValueError: # pragma: no cover
+ raise ValueError("Unable to find 'sklearn' in '{}'.".format(mod))
res = spl[pos + 1: -1]
if len(res) == 0:
if spl[-1] == 'sklearn':
@@ -177,31 +182,31 @@ def _handle_init_files(model, flat, location, verbose, location_pyspy, fLOG):
if flat:
return ([], location, ".",
(None if location_pyspy is None else location_pyspy))
+
+ created = []
+ subf = _sklearn_subfolder(model)
+ subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
+ location_model = os.path.join(location, *subf)
+ prefix_import = "." * (len(subf) + 1)
+ if not os.path.exists(location_model):
+ os.makedirs(location_model)
+ for fold in [location_model, os.path.dirname(location_model),
+ os.path.dirname(os.path.dirname(location_model))]:
+ init = os.path.join(fold, '__init__.py')
+ if not os.path.exists(init):
+ with open(init, 'w') as _:
+ pass
+ created.append(init)
+ if verbose > 1 and fLOG is not None:
+ fLOG("[create_asv_benchmark] create '{}'.".format(init))
+ if location_pyspy is not None:
+ location_pyspy_model = os.path.join(location_pyspy, *subf)
+ if not os.path.exists(location_pyspy_model):
+ os.makedirs(location_pyspy_model)
else:
- created = []
- subf = _sklearn_subfolder(model)
- subf = [_ for _ in subf if _[0] != '_' or _ == '_externals']
- location_model = os.path.join(location, *subf)
- prefix_import = "." * (len(subf) + 1)
- if not os.path.exists(location_model):
- os.makedirs(location_model)
- for fold in [location_model, os.path.dirname(location_model),
- os.path.dirname(os.path.dirname(location_model))]:
- init = os.path.join(fold, '__init__.py')
- if not os.path.exists(init):
- with open(init, 'w') as _:
- pass
- created.append(init)
- if verbose > 1 and fLOG is not None:
- fLOG("[create_asv_benchmark] create '{}'.".format(init))
- if location_pyspy is not None:
- location_pyspy_model = os.path.join(location_pyspy, *subf)
- if not os.path.exists(location_pyspy_model):
- os.makedirs(location_pyspy_model)
- else:
- location_pyspy_model = None
+ location_pyspy_model = None
- return created, location_model, prefix_import, location_pyspy_model
+ return created, location_model, prefix_import, location_pyspy_model
def _asv_class_name(model, scenario, optimisation,
diff --git a/mlprodict/onnx_conv/convert.py b/mlprodict/onnx_conv/convert.py
index 5dd68b677..c5b3e5a8d 100644
--- a/mlprodict/onnx_conv/convert.py
+++ b/mlprodict/onnx_conv/convert.py
@@ -26,7 +26,8 @@ def convert_scorer(fct, initial_types, name=None,
dtype=numpy.float32,
custom_conversion_functions=None,
custom_shape_calculators=None,
- custom_parsers=None):
+ custom_parsers=None, white_op=None,
+ black_op=None, final_types=None):
"""
Converts a scorer into :epkg:`ONNX` assuming
there exists a converter associated to it.
@@ -54,6 +55,16 @@ def convert_scorer(fct, initial_types, name=None,
they can be rewritten, *custom_parsers* is a dictionary
``{ type: fct_parser(scope, model, inputs,
custom_parsers=None) }``
+ @param white_op white list of ONNX nodes allowed
+ while converting a pipeline, if empty,
+ all are allowed
+ @param black_op black list of ONNX nodes allowed
+ while converting a pipeline, if empty,
+ none are blacklisted
+ @param final_types a python list. Works the same way as
+ initial_types but not mandatory, it is used
+ to overwrites the type (if type is not None)
+ and the name of every output.
@return :epkg:`ONNX` graph
"""
if hasattr(fct, '_score_func'):
@@ -64,12 +75,13 @@ def convert_scorer(fct, initial_types, name=None,
if name is None:
name = "mlprodict_fct_ONNX(%s)" % fct.__name__
tr = CustomScorerTransform(fct.__name__, fct, kwargs)
- return convert_sklearn(tr, initial_types=initial_types,
- target_opset=target_opset, options=options,
- dtype=dtype,
- custom_conversion_functions=custom_conversion_functions,
- custom_shape_calculators=custom_shape_calculators,
- custom_parsers=custom_parsers)
+ return convert_sklearn(
+ tr, initial_types=initial_types,
+ target_opset=target_opset, options=options, dtype=dtype,
+ custom_conversion_functions=custom_conversion_functions,
+ custom_shape_calculators=custom_shape_calculators,
+ custom_parsers=custom_parsers, white_op=white_op,
+ black_op=black_op, final_types=final_types)
def guess_initial_types(X, initial_types):
@@ -186,7 +198,8 @@ def guess_schema_from_model(model, tensor_type=None, schema=None):
def to_onnx(model, X=None, name=None, initial_types=None,
target_opset=None, options=None,
- dtype=numpy.float32, rewrite_ops=False):
+ dtype=numpy.float32, rewrite_ops=False,
+ white_op=None, black_op=None, final_types=None):
"""
Converts a model using on :epkg:`sklearn-onnx`.
@@ -204,6 +217,16 @@ def to_onnx(model, X=None, name=None, initial_types=None,
@param dtype type to use to convert the model
@param rewrite_ops rewrites some existing converters,
the changes are permanent
+ @param white_op white list of ONNX nodes allowed
+ while converting a pipeline, if empty,
+ all are allowed
+ @param black_op black list of ONNX nodes allowed
+ while converting a pipeline, if empty,
+ none are blacklisted
+ @param final_types a python list. Works the same way as
+ initial_types but not mandatory, it is used
+ to overwrites the type (if type is not None)
+ and the name of every output.
@return converted model
The function rewrites function *to_onnx* from :epkg:`sklearn-onnx`
@@ -283,11 +306,13 @@ def to_onnx(model, X=None, name=None, initial_types=None,
print(onxp)
"""
if isinstance(model, OnnxOperatorMixin):
- if options is not None:
- raise NotImplementedError(
- "options not yet implemented for OnnxOperatorMixin.")
+ if not hasattr(model, 'op_version'):
+ raise RuntimeError(
+ "Missing attribute 'op_version' for type '{}'.".format(
+ type(model)))
return model.to_onnx(X=X, name=name, dtype=dtype,
- target_opset=target_opset)
+ options=options, black_op=black_op,
+ white_op=white_op, final_types=final_types)
if rewrite_ops:
old_values = register_rewritten_operators()
register_converters()
@@ -334,20 +359,17 @@ def _guess_type_(X, itype, dtype):
new_dtype = dts[0]
res = convert_scorer(model, initial_types, name=name,
target_opset=target_opset, options=options,
- dtype=new_dtype)
+ dtype=new_dtype, black_op=black_op,
+ white_op=white_op, final_types=final_types)
else:
if name is None:
name = "mlprodict_ONNX(%s)" % model.__class__.__name__
initial_types, dtype, new_dtype = _guess_type_(X, initial_types, dtype)
- try:
- res = convert_sklearn(model, initial_types=initial_types, name=name,
- target_opset=target_opset, options=options,
- dtype=new_dtype)
- except (TypeError, NameError):
- # older version of sklearn-onnx
- res = convert_sklearn(model, initial_types=initial_types, name=name,
- target_opset=target_opset, options=options)
+ res = convert_sklearn(model, initial_types=initial_types, name=name,
+ target_opset=target_opset, options=options,
+ dtype=new_dtype, black_op=black_op,
+ white_op=white_op, final_types=final_types)
if old_values is not None:
register_rewritten_operators(old_values)
diff --git a/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py b/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py
index d48a16f9f..fde1dbb72 100644
--- a/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py
+++ b/mlprodict/onnx_conv/operator_converters/conv_lightgbm.py
@@ -11,6 +11,28 @@
from skl2onnx.common._apply_operation import apply_div, apply_reshape, apply_sub # pylint: disable=E0611
from skl2onnx.common.tree_ensemble import get_default_tree_classifier_attribute_pairs
from skl2onnx.proto import onnx_proto
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_regressor_output_shapes,
+ calculate_linear_classifier_output_shapes
+)
+
+
+def calculate_lightgbm_output_shapes(operator):
+ """
+ Shape calculator for LightGBM Booster
+ (see :epkg:`lightgbm`).
+ """
+ op = operator.raw_operator
+ if hasattr(op, "_model_dict"):
+ objective = op._model_dict['objective']
+ else:
+ objective = op.objective_
+ if objective.startswith('binary') or objective.startswith('multiclass'):
+ return calculate_linear_classifier_output_shapes(operator)
+ if objective.startswith('regression'):
+ return calculate_linear_regressor_output_shapes(operator)
+ raise NotImplementedError(
+ "Objective '{}' is not implemented yet.".format(objective))
def _translate_split_criterion(criterion):
diff --git a/mlprodict/onnx_conv/operator_converters/conv_transfer_transformer.py b/mlprodict/onnx_conv/operator_converters/conv_transfer_transformer.py
new file mode 100644
index 000000000..77f71ed7d
--- /dev/null
+++ b/mlprodict/onnx_conv/operator_converters/conv_transfer_transformer.py
@@ -0,0 +1,108 @@
+"""
+@file
+@brief Converters for models from :epkg:`mlinsights`.
+"""
+from sklearn.base import ClassifierMixin
+from skl2onnx import get_model_alias
+from skl2onnx.common.data_types import FloatTensorType
+from skl2onnx.common._registration import get_shape_calculator
+from skl2onnx._parse import parse_sklearn
+from skl2onnx.common._apply_operation import apply_identity
+
+
+def _model_outputs(existing_scope, model, inputs, custom_parsers=None):
+ """
+ Retrieves the outputs of one particular models.
+ """
+ scope = existing_scope.temp()
+ if custom_parsers is not None and model in custom_parsers:
+ return custom_parsers[model](
+ scope, model, inputs, custom_parsers=custom_parsers)
+ return parse_sklearn(scope, model, inputs, custom_parsers=custom_parsers)
+
+
+def parser_transfer_transformer(scope, model, inputs, custom_parsers=None):
+ """
+ Parser for :epkg:`TransferTransformer`.
+ """
+ if custom_parsers is not None and model in custom_parsers:
+ return custom_parsers[model](
+ scope, model, inputs, custom_parsers=custom_parsers)
+
+ if model.method == 'predict_proba':
+ name = 'probabilities'
+ elif model.method == 'transform':
+ name = 'variable'
+ else:
+ raise NotImplementedError( # pragma: no cover
+ "Unable to defined the output for method='{}' and model='{}'.".format(
+ model.method, model.__class__.__name__))
+
+ prob = scope.declare_local_variable(name, FloatTensorType())
+ alias = get_model_alias(type(model))
+ this_operator = scope.declare_local_operator(alias, model)
+ this_operator.inputs = inputs
+ this_operator.outputs.append(prob)
+ return this_operator.outputs
+
+
+def shape_calculator_transfer_transformer(operator):
+ """
+ Shape calculator :epkg:`TransferTransformer`.
+ """
+ op = operator.raw_operator
+ alias = get_model_alias(type(op.estimator_))
+ calc = get_shape_calculator(alias)
+
+ scope = operator.scope_inst.temp()
+ this_operator = scope.declare_local_operator(alias)
+ this_operator.raw_operator = op.estimator_
+ this_operator.inputs = operator.inputs
+ res = _model_outputs(scope, op.estimator_, operator.inputs)
+ this_operator.outputs.extend([
+ scope.declare_local_variable(
+ "%sTTS" % r.onnx_name, r.type) for r in res])
+ this_operator.outputs = res
+ calc(this_operator)
+
+ if op.method == 'predict_proba':
+ operator.outputs[0].type = this_operator.outputs[1].type
+ elif op.method == 'transform':
+ operator.outputs[0].type = this_operator.outputs[0].type
+ else:
+ raise NotImplementedError( # pragma: no cover
+ "Unable to defined the output for method='{}' and model='{}'.".format(
+ op.method, op.__class__.__name__))
+
+
+def convert_transfer_transformer(scope, operator, container):
+ """
+ Converters for :epkg:`TransferTransformer`.
+ """
+ op = operator.raw_operator
+ op_type = get_model_alias(type(op.estimator_))
+
+ this_operator = scope.declare_local_operator(op_type)
+ this_operator.raw_operator = op.estimator_
+ this_operator.inputs = operator.inputs
+
+ if isinstance(op.estimator_, ClassifierMixin):
+ container.add_options(id(op.estimator_), {'zipmap': False})
+
+ res = _model_outputs(scope.temp(), op.estimator_, operator.inputs)
+ this_operator.outputs.extend([
+ scope.declare_local_variable(
+ "%sTTC" % r.onnx_name, r.type) for r in res])
+
+ if op.method == 'predict_proba':
+ index = 1
+ elif op.method == 'transform':
+ index = 0
+ else:
+ raise NotImplementedError( # pragma: no cover
+ "Unable to defined the output for method='{}' and model='{}'.".format(
+ op.method, op.__class__.__name__))
+
+ apply_identity(scope, this_operator.outputs[index].onnx_name,
+ operator.outputs[0].full_name, container,
+ operator_name=scope.get_unique_operator_name("IdentityTT"))
diff --git a/mlprodict/onnx_conv/register.py b/mlprodict/onnx_conv/register.py
index c47e03d57..cb5582cc9 100644
--- a/mlprodict/onnx_conv/register.py
+++ b/mlprodict/onnx_conv/register.py
@@ -7,6 +7,10 @@
import numbers
import numpy
from skl2onnx._parse import _parse_sklearn_classifier
+from skl2onnx import update_registered_converter
+from skl2onnx.common.shape_calculator import (
+ calculate_linear_classifier_output_shapes,
+ calculate_linear_regressor_output_shapes)
from .scorers import register_scorers
@@ -34,7 +38,6 @@ def _register_converters_lightgbm(exc=True):
@return list of models supported by the new converters
"""
registered = []
- from skl2onnx import update_registered_converter
try:
from lightgbm import LGBMClassifier
@@ -46,20 +49,13 @@ def _register_converters_lightgbm(exc=True):
"Cannot register LGBMClassifier due to '{}'.".format(e))
LGBMClassifier = None
if LGBMClassifier is not None:
- from .shape_calculators.shape_lightgbm import calculate_linear_classifier_output_shapes
- from .operator_converters.conv_lightgbm import convert_lightgbm
- try:
- update_registered_converter(
- LGBMClassifier, 'LgbmClassifier',
- calculate_linear_classifier_output_shapes,
- convert_lightgbm, parser=_parse_sklearn_classifier,
- options={'zipmap': [True, False], 'nocl': [True, False]})
- except TypeError:
- # skl2onnx <= 1.5
- update_registered_converter(
- LGBMClassifier, 'LgbmClassifier',
- calculate_linear_classifier_output_shapes,
- convert_lightgbm)
+ from .operator_converters.conv_lightgbm import (
+ convert_lightgbm, calculate_lightgbm_output_shapes)
+ update_registered_converter(
+ LGBMClassifier, 'LgbmClassifier',
+ calculate_lightgbm_output_shapes,
+ convert_lightgbm, parser=_parse_sklearn_classifier,
+ options={'zipmap': [True, False], 'nocl': [True, False]})
registered.append(LGBMClassifier)
try:
@@ -72,7 +68,6 @@ def _register_converters_lightgbm(exc=True):
"Cannot register LGBMRegressor due to '{}'.".format(e))
LGBMRegressor = None
if LGBMRegressor is not None:
- from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes
from .operator_converters.conv_lightgbm import convert_lightgbm
update_registered_converter(LGBMRegressor, 'LightGbmLGBMRegressor',
calculate_linear_regressor_output_shapes,
@@ -89,9 +84,8 @@ def _register_converters_lightgbm(exc=True):
"Cannot register LGBMRegressor due to '{}'.".format(e))
Booster = None
if Booster is not None:
- from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes
- from .operator_converters.conv_lightgbm import convert_lightgbm
- from .shape_calculators.shape_lightgbm import calculate_lightgbm_output_shapes
+ from .operator_converters.conv_lightgbm import (
+ convert_lightgbm, calculate_lightgbm_output_shapes)
from .parsers.parse_lightgbm import (
lightgbm_parser, WrappedLightGbmBooster,
WrappedLightGbmBoosterClassifier,
@@ -137,7 +131,6 @@ def _register_converters_xgboost(exc=True):
@return list of models supported by the new converters
"""
registered = []
- from skl2onnx import update_registered_converter
try:
from xgboost import XGBClassifier
@@ -149,20 +142,13 @@ def _register_converters_xgboost(exc=True):
"Cannot register XGBClassifier due to '{}'.".format(e))
XGBClassifier = None
if XGBClassifier is not None:
- from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from .operator_converters.conv_xgboost import convert_xgboost
- try:
- update_registered_converter(
- XGBClassifier, 'XGBoostXGBClassifier',
- calculate_linear_classifier_output_shapes,
- convert_xgboost, parser=_custom_parser_xgboost,
- options={'zipmap': [True, False], 'raw_scores': [True, False],
- 'nocl': [True, False]})
- except TypeError:
- # skl2onnx <= 1.5
- update_registered_converter(XGBClassifier, 'XGBoostXGBClassifier',
- calculate_linear_classifier_output_shapes,
- convert_xgboost)
+ update_registered_converter(
+ XGBClassifier, 'XGBoostXGBClassifier',
+ calculate_linear_classifier_output_shapes,
+ convert_xgboost, parser=_custom_parser_xgboost,
+ options={'zipmap': [True, False], 'raw_scores': [True, False],
+ 'nocl': [True, False]})
registered.append(XGBClassifier)
try:
@@ -175,7 +161,6 @@ def _register_converters_xgboost(exc=True):
"Cannot register LGBMRegressor due to '{}'.".format(e))
XGBRegressor = None
if XGBRegressor is not None:
- from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes
from .operator_converters.conv_xgboost import convert_xgboost
update_registered_converter(XGBRegressor, 'XGBoostXGBRegressor',
calculate_linear_regressor_output_shapes,
@@ -184,6 +169,41 @@ def _register_converters_xgboost(exc=True):
return registered
+def _register_converters_mlinsights(exc=True):
+ """
+ This functions registers additional converters
+ for :epkg:`mlinsights`.
+
+ @param exc if True, raises an exception if a converter cannot
+ registered (missing package for example)
+ @return list of models supported by the new converters
+ """
+ registered = []
+
+ try:
+ from mlinsights.mlmodel import TransferTransformer
+ except ImportError as e: # pragma: no cover
+ if exc:
+ raise e
+ else:
+ warnings.warn(
+ "Cannot register models from 'mlinsights' due to '{}'.".format(e))
+ TransferTransformer = None
+
+ if TransferTransformer is not None:
+ from .operator_converters.conv_transfer_transformer import (
+ shape_calculator_transfer_transformer, convert_transfer_transformer,
+ parser_transfer_transformer)
+ update_registered_converter(
+ TransferTransformer, 'MlInsightsTransferTransformer',
+ shape_calculator_transfer_transformer,
+ convert_transfer_transformer,
+ parser=parser_transfer_transformer)
+ registered.append(TransferTransformer)
+
+ return registered
+
+
def register_converters(exc=True):
"""
This functions registers additional converters
@@ -195,5 +215,6 @@ def register_converters(exc=True):
"""
ext = _register_converters_lightgbm(exc=exc)
ext += _register_converters_xgboost(exc=exc)
+ ext += _register_converters_mlinsights(exc=exc)
ext += register_scorers()
return ext
diff --git a/mlprodict/onnx_conv/shape_calculators/__init__.py b/mlprodict/onnx_conv/shape_calculators/__init__.py
deleted file mode 100644
index 12b134b02..000000000
--- a/mlprodict/onnx_conv/shape_calculators/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""
-@file
-@brief Shortcut to *shape_calculators*.
-"""
diff --git a/mlprodict/onnx_conv/shape_calculators/shape_lightgbm.py b/mlprodict/onnx_conv/shape_calculators/shape_lightgbm.py
deleted file mode 100644
index 0b6e76111..000000000
--- a/mlprodict/onnx_conv/shape_calculators/shape_lightgbm.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-@file
-@brief Shape calculator for LGBMClassifier, handles doubles.
-"""
-from skl2onnx.common.shape_calculator import (
- calculate_linear_regressor_output_shapes,
- calculate_linear_classifier_output_shapes
-)
-
-
-def calculate_lightgbm_output_shapes(operator):
- """
- Shape calculator for LightGBM Booster
- (see :epkg:`lightgbm`).
- """
- op = operator.raw_operator
- if not hasattr(op, "_model_dict"):
- raise TypeError("This converter does not apply on type '{}'."
- "".format(type(op)))
- if op._model_dict['objective'].startswith('binary'):
- return calculate_linear_classifier_output_shapes(operator)
- if op._model_dict['objective'].startswith('regression'):
- return calculate_linear_regressor_output_shapes(operator)
- raise NotImplementedError(
- "Objective '{}' is not implemented yet.".format(
- op._model_dict['objective']))
diff --git a/mlprodict/onnxrt/onnx_inference.py b/mlprodict/onnxrt/onnx_inference.py
index 89cc72e08..4af2ae8ea 100644
--- a/mlprodict/onnxrt/onnx_inference.py
+++ b/mlprodict/onnxrt/onnx_inference.py
@@ -112,6 +112,9 @@ def _init(self):
Prepares the instance to deliver predictions.
"""
self.graph_ = self.to_sequence()
+ if len(self.graph_['sequence']) == 0:
+ raise RuntimeError(
+ "No runnable nodes was found in the ONNX graph.")
self.outputs_ = self.graph_['outputs']
self.inputs_ = self.graph_['inputs']
self.target_opset_ = self.graph_['targets']
@@ -354,7 +357,16 @@ def to_sequence(self):
raise RuntimeError( # pragma: no cover
"A parameter has no (sparse) value '{}' for node '{}'\nv={}\ndobj=[{}]".format(
k, node.name, v, node))
- nodes[node.name] = OnnxInferenceNode(node, dobj, self.global_index)
+ if node.name in nodes:
+ i = 2
+ while True:
+ new_name = "%s_n%i" % (node.name, i)
+ if new_name not in nodes:
+ break
+ i += 1
+ else:
+ new_name = node.name
+ nodes[new_name] = OnnxInferenceNode(node, dobj, self.global_index)
# names
names = {}
diff --git a/mlprodict/onnxrt/optim/_onnx_optimisation_common.py b/mlprodict/onnxrt/optim/_onnx_optimisation_common.py
index 9889cb059..3f61125a7 100644
--- a/mlprodict/onnxrt/optim/_onnx_optimisation_common.py
+++ b/mlprodict/onnxrt/optim/_onnx_optimisation_common.py
@@ -35,6 +35,8 @@ def _apply_optimisation_on_graph(fct, onnx_model, recursive=True, debug_info=Non
new_model.doc_string = onnx_model.doc_string
if hasattr(onnx_model, 'value_info'):
graph.value_info.extend(onnx_model.value_info)
+ while len(new_model.opset_import) > 0: # pylint: disable=E1101
+ new_model.opset_import.pop() # pylint: disable=E1101
for oimp in onnx_model.opset_import:
op_set = new_model.opset_import.add() # pylint: disable=E1101
op_set.domain = oimp.domain
@@ -145,9 +147,9 @@ def _rename_node_input(onnx_node, old_name, new_name=None):
atts = new_atts
else:
atts = onnx_node.attribute
- node = _make_node(onnx_node.op_type, inputs,
- outputs, name=onnx_node.name,
- attributes=atts)
+ node = _make_node(
+ onnx_node.op_type, inputs, outputs, name=onnx_node.name,
+ domain=onnx_node.domain, attributes=atts)
return node
@@ -235,7 +237,7 @@ def _rename_node_output(onnx_node, old_name, new_name):
atts = new_atts
else:
atts = onnx_node.attribute
- node = _make_node(onnx_node.op_type, inputs,
- outputs, name=onnx_node.name,
- attributes=atts)
+ node = _make_node(
+ onnx_node.op_type, inputs, outputs, name=onnx_node.name,
+ domain=onnx_node.domain, attributes=atts)
return node
diff --git a/mlprodict/onnxrt/optim/onnx_optimisation_identity.py b/mlprodict/onnxrt/optim/onnx_optimisation_identity.py
index 3b9c7bc99..18205580e 100644
--- a/mlprodict/onnxrt/optim/onnx_optimisation_identity.py
+++ b/mlprodict/onnxrt/optim/onnx_optimisation_identity.py
@@ -43,13 +43,13 @@ def onnx_remove_node_identity(onnx_model, recursive=True, debug_info=None):
def retrieve_idnodes(graph, existing_nodes):
idnodes = []
- for i, (node, exnode) in enumerate(zip(graph.node, existing_nodes)):
+ for i, exnode in enumerate(existing_nodes):
if exnode is None:
continue
- if node.op_type == 'Identity':
- input = node.input[0]
- output = node.output[0]
- idnodes.append((i, node, input, output))
+ if exnode.op_type == 'Identity':
+ input = exnode.input[0]
+ output = exnode.output[0]
+ idnodes.append((i, exnode, input, output))
return idnodes
nodes = list(graph.node)
@@ -75,20 +75,25 @@ def retrieve_idnodes(graph, existing_nodes):
if out in nodes[j].input:
nodes[j] = _rename_node_input(nodes[j], out, inp)
rem += 1
- if nodes[j] == 'Identity':
+ if nodes[j].op_type == 'Identity':
restart = True
nodes[i] = None
rem += 1
continue
- if not restart and inp not in inputs:
- # We cannot change an input name.
+ if not restart and inp not in inputs and inp not in outputs:
+ # We cannot change an input name or an output name.
for j in range(len(nodes)): # pylint: disable=C0200
if nodes[j] is None:
continue
if inp in nodes[j].output:
nodes[j] = _rename_node_output(nodes[j], inp, out)
rem += 1
- if nodes[j] == 'Identity':
+ if nodes[j].op_type == 'Identity':
+ restart = True
+ if inp in nodes[j].input:
+ nodes[j] = _rename_node_input(nodes[j], inp, out)
+ rem += 1
+ if nodes[j].op_type == 'Identity':
restart = True
nodes[i] = None
rem += 1
diff --git a/mlprodict/sklapi/__init__.py b/mlprodict/sklapi/__init__.py
index aee923f41..c94311d5c 100644
--- a/mlprodict/sklapi/__init__.py
+++ b/mlprodict/sklapi/__init__.py
@@ -4,3 +4,4 @@
@brief Shortcut to *onnxrt*.
"""
from .onnx_transformer import OnnxTransformer
+from .onnx_pipeline import OnnxPipeline
diff --git a/mlprodict/sklapi/onnx_pipeline.py b/mlprodict/sklapi/onnx_pipeline.py
new file mode 100644
index 000000000..5dcf56188
--- /dev/null
+++ b/mlprodict/sklapi/onnx_pipeline.py
@@ -0,0 +1,212 @@
+"""
+@file
+@brief A pipeline which serializes into ONNX steps by steps.
+"""
+import numpy
+from sklearn.base import clone
+from sklearn.pipeline import Pipeline, _fit_transform_one
+from sklearn.utils.validation import check_memory
+from sklearn.utils import _print_elapsed_time
+from ..onnx_conv import to_onnx
+from .onnx_transformer import OnnxTransformer
+
+
+class OnnxPipeline(Pipeline):
+ """
+ The pipeline overwrites method *fit*, it trains and converts
+ every steps into ONNX before training the next step
+ in order to minimize discrepencies. By default,
+ ONNX is using float and not double which is the default
+ for :epkg:`scikit-learn`. It may introduce discrepencies
+ when a non-continuous model (mathematical definition) such
+ as tree ensemble and part of the pipeline.
+
+ Parameters
+ ----------
+
+ steps : list
+ List of (name, transform) tuples (implementing fit/transform) that are
+ chained, in the order in which they are chained, with the last object
+ an estimator.
+ memory : str or object with the joblib.Memory interface, default=None
+ Used to cache the fitted transformers of the pipeline. By default,
+ no caching is performed. If a string is given, it is the path to
+ the caching directory. Enabling caching triggers a clone of
+ the transformers before fitting. Therefore, the transformer
+ instance given to the pipeline cannot be inspected
+ directly. Use the attribute ``named_steps`` or ``steps`` to
+ inspect estimators within the pipeline. Caching the
+ transformers is advantageous when fitting is time consuming.
+ verbose : bool, default=False
+ If True, the time elapsed while fitting each step will be printed as it
+ is completed.
+ output_name: string
+ requested output name or None to request all and
+ have method *transform* to store all of them in a dataframe
+ enforce_float32 : boolean
+ :epkg:`onnxruntime` only supports *float32*,
+ :epkg:`scikit-learn` usually uses double floats, this parameter
+ ensures that every array of double floats is converted into
+ single floats
+ runtime: string, defined the runtime to use
+ as described in @see cl OnnxInference.
+ options: see @fn to_onnx
+ white_op: see @fn to_onnx
+ black_op: see @fn to_onnx
+ final_types: see @fn to_onnx
+ target_opset: ONNX targeted opset
+
+ The class stores transformers before converting them into ONNX
+ in attributes ``raw_steps_``.
+
+ See notebook :ref:`onnxdiscrepenciesrst` to see it can
+ be used to reduce discrepencies after it was converted into
+ *ONNX*.
+ """
+
+ def __init__(self, steps, *, memory=None, verbose=False,
+ output_name=None, enforce_float32=True,
+ runtime='python', op_version=None, options=None,
+ white_op=None, black_op=None, final_types=None):
+ Pipeline.__init__(
+ self, steps, memory=memory, verbose=verbose)
+ self.output_name = output_name
+ self.enforce_float32 = enforce_float32
+ self.runtime = runtime
+ self.op_version = op_version
+ self.options = options
+ self.white_op = white_op
+ self.white_op = white_op
+ self.black_op = black_op
+ self.final_types = final_types
+
+ def fit(self, X, y=None, **fit_params):
+ """
+ Fits the model, fits all the transforms one after the
+ other and transform the data, then fit the transformed
+ data using the final estimator.
+
+ Parameters
+ ----------
+ X : iterable
+ Training data. Must fulfill input requirements of first step of the
+ pipeline.
+ y : iterable, default=None
+ Training targets. Must fulfill label requirements for all steps of
+ the pipeline.
+ **fit_params : dict of string -> object
+ Parameters passed to the ``fit`` method of each step, where
+ each parameter name is prefixed such that parameter ``p`` for step
+ ``s`` has key ``s__p``.
+ Returns
+ -------
+ self : Pipeline
+ This estimator
+ """
+ fit_params_steps = self._check_fit_params(**fit_params)
+ Xt = self._fit(X, y, **fit_params_steps)
+ with _print_elapsed_time('OnnxPipeline',
+ self._log_message(len(self.steps) - 1)):
+ if self._final_estimator != 'passthrough':
+ fit_params_last_step = fit_params_steps[self.steps[-1][0]]
+ self._final_estimator.fit(Xt, y, **fit_params_last_step)
+
+ return self
+
+ def _fit(self, X, y=None, **fit_params_steps):
+ # shallow copy of steps - this should really be steps_
+ if hasattr(self, 'raw_steps_') and self.raw_steps_ is not None: # pylint: disable=E0203
+ # Let's reuse the previous training.
+ self.steps = list(self.raw_steps_) # pylint: disable=E0203
+ self.raw_steps_ = list(self.raw_steps_)
+ else:
+ self.steps = list(self.steps)
+ self.raw_steps_ = list(self.steps)
+
+ self._validate_steps()
+ # Setup the memory
+ memory = check_memory(self.memory)
+
+ fit_transform_one_cached = memory.cache(_fit_transform_one)
+
+ for (step_idx,
+ name,
+ transformer) in self._iter(with_final=False,
+ filter_passthrough=False):
+ if (transformer is None or transformer == 'passthrough'):
+ with _print_elapsed_time('Pipeline',
+ self._log_message(step_idx)):
+ continue
+
+ if hasattr(memory, 'location'):
+ # joblib >= 0.12
+ if memory.location is None:
+ # we do not clone when caching is disabled to
+ # preserve backward compatibility
+ cloned_transformer = transformer
+ else:
+ cloned_transformer = clone(transformer)
+ else:
+ cloned_transformer = clone(transformer)
+
+ # Fit or load from cache the current transformer
+ x_train = X
+ X, fitted_transformer = fit_transform_one_cached(
+ cloned_transformer, X, y, None,
+ message_clsname='Pipeline',
+ message=self._log_message(step_idx),
+ **fit_params_steps[name])
+ # Replace the transformer of the step with the fitted
+ # transformer. This is necessary when loading the transformer
+ # from the cache.
+ self.raw_steps_[step_idx] = (name, fitted_transformer)
+ self.steps[step_idx] = (
+ name, self._to_onnx(name, fitted_transformer, x_train))
+ return X
+
+ def _to_onnx(self, name, fitted_transformer, x_train):
+ """
+ Converts a transformer into ONNX.
+
+ @param fitted_transformer fitted transformer
+ @param x_train training dataset
+ @return corresponding @see cl OnnxTransformer
+ """
+ if not isinstance(x_train, numpy.ndarray):
+ raise RuntimeError( # pragma: no cover
+ "The pipeline only handle numpy arrays not {}.".format(
+ type(x_train)))
+ atts = {'options', 'white_op', 'black_op', 'final_types'}
+ kwargs = {k: getattr(self, k) for k in atts}
+ if self.enforce_float32 or x_train.dtype != numpy.float64:
+ x_train = x_train.astype(numpy.float32)
+ elif 'dtype' not in kwargs:
+ kwargs['dtype'] = numpy.float64
+ if 'options' in kwargs:
+ kwargs['options'] = self._preprocess_options(
+ name, kwargs['options'])
+ onx = to_onnx(fitted_transformer, x_train, **kwargs)
+ tr = OnnxTransformer(
+ onx.SerializeToString(), output_name=self.output_name,
+ enforce_float32=self.enforce_float32, runtime=self.runtime)
+ return tr.fit()
+
+ def _preprocess_options(self, name, options):
+ """
+ Preprocesses the options.
+
+ @param name option name
+ @param options conversion options
+ @return new options
+ """
+ if options is None:
+ return None
+ prefix = name + '__'
+ new_options = {}
+ for k, v in options.items():
+ if isinstance(k, str):
+ if k.startswith(prefix):
+ new_options[k[len(prefix):]] = v
+ else:
+ new_options[k] = v
+ return new_options
diff --git a/mlprodict/sklapi/onnx_transformer.py b/mlprodict/sklapi/onnx_transformer.py
index 41ca11c4a..05293318c 100644
--- a/mlprodict/sklapi/onnx_transformer.py
+++ b/mlprodict/sklapi/onnx_transformer.py
@@ -11,29 +11,35 @@
from skl2onnx.proto import TensorProto
from skl2onnx.helpers.onnx_helper import load_onnx_model, enumerate_model_node_outputs
from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs
-from skl2onnx.common.data_types import FloatTensorType
+from skl2onnx.common.data_types import (
+ FloatTensorType, DoubleTensorType,
+ Int64TensorType)
from ..onnxrt import OnnxInference
from ..onnxrt.onnx2py_helper import _var_as_dict
class OnnxTransformer(BaseEstimator, TransformerMixin, OnnxOperatorMixin):
"""
- Calls :epkg:`onnxruntime` inference following :epkg:`scikit-learn` API
+ Calls :epkg:`onnxruntime` or the runtime implemented
+ in this package to transform input based on a ONNX graph.
+ It follows :epkg:`scikit-learn` API
so that it can be included in a :epkg:`scikit-learn` pipeline.
See notebook :ref:`transferlearningrst` for an example.
Parameters
----------
- onnx_bytes : bytes
+ onnx_bytes: bytes
output_name: string
requested output name or None to request all and
have method *transform* to store all of them in a dataframe
- enforce_float32 : boolean
+ enforce_float32: boolean
:epkg:`onnxruntime` only supports *float32*,
:epkg:`scikit-learn` usually uses double floats, this parameter
ensures that every array of double floats is converted into
single floats
+ runtime: string, defined the runtime to use
+ as described in @see cl OnnxInference.
"""
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True,
@@ -54,9 +60,10 @@ def __repr__(self): # pylint: disable=W0222
ob = self.onnx_bytes
if len(ob) > 20:
ob = ob[:10] + b"..." + ob[-10:]
- return "{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, runtime='{4}')".format(
- self.__class__.__name__, ob, self.output_name,
- self.enforce_float32, self.runtime)
+ return ("{0}(onnx_bytes={1}, output_name={2}, enforce_float32={3}, "
+ "runtime='{4}')".format(
+ self.__class__.__name__, ob, self.output_name,
+ self.enforce_float32, self.runtime))
def fit(self, X=None, y=None, **fit_params):
"""
@@ -141,12 +148,11 @@ def transform(self, X, y=None, **inputs):
if self.output_name or len(outputs) == 1:
if isinstance(outputs[0], list):
return pandas.DataFrame(outputs[0])
- else:
- return outputs[0]
- else:
- names = self.output_name if self.output_name else [
- o.name for o in self.onnxrt_.output_names]
- return pandas.DataFrame({k: v for k, v in zip(names, outputs)})
+ return outputs[0]
+
+ names = self.output_name if self.output_name else [
+ o.name for o in self.onnxrt_.output_names]
+ return pandas.DataFrame({k: v for k, v in zip(names, outputs)})
def fit_transform(self, X, y=None, **inputs):
"""
@@ -193,7 +199,7 @@ def enumerate_create(onnx_bytes, output_names=None, enforce_float32=True):
enforce_float32=enforce_float32)
yield out, tr
- def onnx_parser(self, inputs=None):
+ def onnx_parser(self, scope=None, inputs=None):
"""
Returns a parser for this model.
"""
@@ -201,6 +207,9 @@ def onnx_parser(self, inputs=None):
self.parsed_inputs_ = inputs
def parser():
+ if (not hasattr(self, 'onnxrt_') or # pragma: no cover
+ not hasattr(self.onnxrt_, 'output_names')):
+ raise RuntimeError('OnnxTransformer not fit.')
return self.onnxrt_.output_names
return parser
@@ -221,9 +230,13 @@ def shape_calculator(operator):
elem = var['type']['elem']
if elem == 'float':
out_op.type = FloatTensorType(shape=shape)
+ elif elem == 'int64':
+ out_op.type = Int64TensorType(shape=shape)
+ elif elem == 'double':
+ out_op.type = DoubleTensorType(shape=shape)
else:
raise NotImplementedError(
- "Noy yet implemented for elem_type:\n{}".format(elem))
+ "Not yet implemented for elem_type:\n{}".format(elem))
return shape_calculator
def onnx_converter(self):
@@ -249,7 +262,6 @@ def clean_initializer_name(name, scope):
return scope.get_unique_variable_name(name)
def converter(scope, operator, container):
-
op = operator.raw_operator
graph = op.onnxrt_.obj.graph
@@ -283,10 +295,12 @@ def converter(scope, operator, container):
container.nodes.append(n)
for node in graph.node:
- n = helper.make_node(node.op_type,
- [name_mapping[o] for o in node.input],
- [name_mapping[o] for o in node.output],
- name=node_mapping[node.name] if node.name else None)
+ n = helper.make_node(
+ node.op_type,
+ [name_mapping[o] for o in node.input],
+ [name_mapping[o] for o in node.output],
+ name=node_mapping[node.name] if node.name else None,
+ domain=node.domain if node.domain else None)
n.attribute.extend(node.attribute) # pylint: disable=E1101
container.nodes.append(n)
diff --git a/requirements.txt b/requirements.txt
index 7e4c5a00b..0e1243481 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ cpyquickhelper
jyquickhelper
lightgbm
memory_profiler
+mlinsights>=0.2.450
onnx>=1.7.0
git+https://github.com/xadupre/onnxconverter-common.git@jenkins
openpyxl
@@ -13,7 +14,7 @@ pybind11
py-cpuinfo
pyinstrument
pylint>=2.4.2
-pyquickhelper>=1.9.3393
+pyquickhelper>=1.9.3396
git+https://github.com/xadupre/sklearn-onnx.git@jenkins
sphinx
sphinx_readable_theme
diff --git a/requirements_conda.txt b/requirements_conda.txt
index fa66811fc..1bcd3c5ed 100644
--- a/requirements_conda.txt
+++ b/requirements_conda.txt
@@ -2,6 +2,7 @@ cffi
Cython
datashape
jinja2
+joblib>=0.12
jupyter
matplotlib
notebook>=5.0.0
@@ -9,6 +10,6 @@ numba
numpy>=1.17.2
pandas
pillow
-scikit-learn>=0.21.3
+scikit-learn>=0.22
scipy
Sphinx
diff --git a/setup.py b/setup.py
index 8c4235080..826aa29cf 100644
--- a/setup.py
+++ b/setup.py
@@ -366,20 +366,22 @@ def write_version():
packages=packages,
package_dir=package_dir,
package_data=package_data,
- setup_requires=["pybind11", "numpy", "onnx>=1.6", "scikit-learn",
+ setup_requires=["pybind11", "numpy", "onnx>=1.7", "scikit-learn",
"jinja2", 'cython'],
- install_requires=["pybind11", "numpy", "onnx>=1.6", 'scipy>=1.0.0',
+ install_requires=["pybind11", "numpy>=1.17", "onnx>=1.7", 'scipy>=1.0.0',
'jinja2', 'cython'],
extras_require={
- 'onnx_conv': ['scikit-learn>=0.21', 'skl2onnx>=1.6.9',
- 'joblib', 'threadpoolctl'],
+ 'onnx_conv': ['scikit-learn>=0.21', 'skl2onnx>=1.7',
+ 'joblib', 'threadpoolctl', 'mlinsights>=0.2.450',
+ 'lightgbm', 'xgboost'],
'sklapi': ['scikit-learn>=0.21', 'joblib', 'threadpoolctl'],
- 'onnx_val': ['scikit-learn>=0.21', 'skl2onnx>=1.6.9',
- 'onnxconverter-common>=1.6.9',
+ 'onnx_val': ['scikit-learn>=0.21', 'skl2onnx>=1.7',
+ 'onnxconverter-common>=1.7',
'onnxruntime>=1.1.0', 'joblib', 'threadpoolctl'],
- 'all': ['scikit-learn>=0.21', 'skl2onnx>=1.6.9',
- 'onnxconverter-common>=1.6.9',
- 'onnxruntime>=1.1.0', 'scipy' 'joblib', 'pandas',
- 'threadpoolctl'],
+ 'all': ['scikit-learn>=0.21', 'skl2onnx>=1.7',
+ 'onnxconverter-common>=1.7',
+ 'onnxruntime>=1.3.0', 'scipy' 'joblib', 'pandas',
+ 'threadpoolctl', 'mlinsights>=0.2.450',
+ 'lightgbm', 'xgboost'],
},
)