Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ jobs:
name: flake8
command: |
. venv/bin/activate
python -m flake8 onnxcustom --max-line-length 90
python -m flake8 _doc/examples --max-line-length 90
python -m flake8 onnxcustom --max-line-length 100
python -m flake8 _doc/examples --max-line-length 100

- run:
name: run tests
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ _unittests/ut_documentation/plot_linear_regression.png
_unittests/ut_documentation/summary.csv
_unittests/ut_documentation/_test_example.txt
_unittests/ut_documentation/_test_example.txt
something
160 changes: 160 additions & 0 deletions _doc/examples/plot_funny_sigmoid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
.. _l-example-discrepencies-sigmoid:

Funny discrepancies
===================

Function sigmoid is :math:`sig(x) = \\frac{1}{1 + e^{-x}}`.
For small or high value, implementation has to do approximation
and they are not always the same. It may be a tradeoff between
precision and computation time...
It is always a tradeoff.

.. index:: discrepencies, sigmoid

.. contents::
:local:


Precision
+++++++++

This section compares the precision of a couple of implementations
of the ssigmoid function. The custom implementation is done with
a Taylor expansion of exponential function:
:math:`e^x \\sim 1 + x + \\frac{x^2}{2} + ... + \\frac{x^n}{n!} + o(x^n)`.

"""
import time
import numpy
import pandas
from tqdm import tqdm
from scipy.special import expit

from skl2onnx.algebra.onnx_ops import OnnxSigmoid
from skl2onnx.common.data_types import FloatTensorType
from onnxruntime import InferenceSession
from mlprodict.onnxrt import OnnxInference
from onnxcustom import get_max_opset
import matplotlib.pyplot as plt

one = numpy.array([1], dtype=numpy.float64)


def taylor_approximation_exp(x, degre=50):
y = numpy.zeros(x.shape, dtype=x.dtype)
a = numpy.ones(x.shape, dtype=x.dtype)
for i in range(1, degre + 1):
a *= x / i
y += a
return y


def taylor_sigmoid(x, degre=50):
den = one + taylor_approximation_exp(-x, degre)
return one / (den)


opset = get_max_opset()
N = 300
min_values = [-20 + float(i) * 10 / N for i in range(N)]
data = numpy.array([0], dtype=numpy.float32)

node = OnnxSigmoid('X', op_version=opset, output_names=['Y'])
onx = node.to_onnx({'X': FloatTensorType()},
{'Y': FloatTensorType()},
target_opset=opset)
rts = ['numpy', 'python', 'onnxruntime', 'taylor20', 'taylor40']

oinf = OnnxInference(onx)
sess = InferenceSession(onx.SerializeToString())

graph = []
for mv in tqdm(min_values):
data[0] = mv
for rt in rts:
lab = ""
if rt == 'numpy':
y = expit(data)
elif rt == 'python':
y = oinf.run({'X': data})['Y']
# * 1.2 to avoid curves to be superimposed
y *= 1.2
lab = "x1.2"
elif rt == 'onnxruntime':
y = sess.run(None, {'X': data})[0]
elif rt == 'taylor40':
y = taylor_sigmoid(data, 40)
# * 0.8 to avoid curves to be superimposed
y *= 0.8
lab = "x0.8"
elif rt == 'taylor20':
y = taylor_sigmoid(data, 20)
# * 0.6 to avoid curves to be superimposed
y *= 0.6
lab = "x0.6"
else:
raise AssertionError("Unknown runtime %r." % rt)
value = y[0]
graph.append(dict(rt=rt + lab, x=mv, y=value))

#############################################
# Graph.

_, ax = plt.subplots(1, 1, figsize=(12, 4))
df = pandas.DataFrame(graph)
piv = df.pivot('x', 'rt', 'y')
print(piv.T.head())
piv.plot(ax=ax, logy=True)

##############################################
# :math:`log(sig(x)) = -log(1 + e^{-x})`. When *x* is very negative,
# :math:`log(sig(x)) \\sim -x`. That explains the graph.
# We also see :epkg:`onnxruntime` is less precise for these values.
# What's the benefit?
#
# Computation time
# ++++++++++++++++

graph = []
for mv in tqdm(min_values):
data = numpy.array([mv] * 10000, dtype=numpy.float32)
for rt in rts:
begin = time.perf_counter()
if rt == 'numpy':
y = expit(data)
elif rt == 'python':
y = oinf.run({'X': data})['Y']
elif rt == 'onnxruntime':
y = sess.run(None, {'X': data})[0]
elif rt == 'taylor40':
y = taylor_sigmoid(data, 40)
elif rt == 'taylor20':
y = taylor_sigmoid(data, 20)
else:
raise AssertionError("Unknown runtime %r." % rt)
duration = time.perf_counter() - begin
graph.append(dict(rt=rt, x=mv, y=duration))

_, ax = plt.subplots(1, 1, figsize=(12, 4))
df = pandas.DataFrame(graph)
piv = df.pivot('x', 'rt', 'y')
piv.plot(ax=ax, logy=True)

#############################################
# Conclusion
# ++++++++++
#
# The implementation from :epkg:`onnxruntime` is faster but
# is much less contiguous for extremes. It explains why
# probabilities may be much different when an observation
# is far from every classification border. In that case,
# :epkg:`onnxruntime` implementation of the sigmoid function
# returns 0 when :func:`numpy.sigmoid` returns a smoother value.
# Probabilites of logistic regression are obtained after the raw
# scores are transformed with the sigmoid function and
# normalized. If the raw scores are very negative,
# the sum of probabilities becomes null with :epkg:`onnxruntime`.
# The normalization fails.

# plt.show()
11 changes: 8 additions & 3 deletions _doc/sphinxdoc/source/api/training.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BaseEstimator

Ancestor to both classes wrapping :epkg:`onnxruntime` API.

.. autosignature:: onnxcustom.training.base_estimator.BaseEstimator
.. autosignature:: onnxcustom.training._base_estimator.BaseEstimator
:members:

Exceptions
Expand All @@ -52,6 +52,8 @@ be combination of L1, L2 losses and L1, L2 penalties.

.. autosignature:: onnxcustom.utils.orttraining_helper.get_train_initializer

.. autosignature:: onnxcustom.utils.onnx_rewriter.onnx_rewrite_operator

.. _l-api-prt-gradient-optimizer:

OrtGradientOptimizer
Expand Down Expand Up @@ -100,11 +102,14 @@ LearningLoss
.. autosignature:: onnxcustom.training.sgd_learning_loss.ElasticLearningLoss
:members:

.. autosignature:: onnxcustom.training.sgd_learning_loss.NegLogLearningLoss
:members:

.. autosignature:: onnxcustom.training.sgd_learning_loss.SquareLearningLoss
:members:

Loss function
+++++++++++++
Loss functions
++++++++++++++

.. autosignature:: onnxcustom.utils.onnx_function.function_onnx_graph

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ optimized weights.

:epkg:`onnxruntime-training` does not implement loss functions.
That must be done independently. That's what function
:func:`onnxcustom.utils.orttraining_helper.add_loss_output>` does.
:func:`onnxcustom.utils.orttraining_helper.add_loss_output` does.
It implements a couple of usual losses in ONNX.
Another function :func:`onnxcustom.utils.orttraining_helper.get_train_initializer`
guesses all the coefficients of an ONNX graph if the user does not specify any.
Expand Down
1 change: 1 addition & 0 deletions _doc/sphinxdoc/source/tutorial_skl/tutorial_1_simple.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ used in the ONNX graph.
../gyexamples/plot_dbegin_options_list
../gyexamples/plot_dbegin_options_zipmap
../gyexamples/plot_ebegin_float_double
../gyexamples/plot_funny_sigmoid
../gyexamples/plot_fbegin_investigate
../gyexamples/plot_gbegin_dataframe
../gyexamples/plot_gbegin_transfer_learning
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ Cache
+++++

Base class :class:`BaseLearningOnnx
<onnxcustom.training.base_onnx_function.BaseLearningOnnx>` implements
<onnxcustom.training._base_onnx_function.BaseLearningOnnx>` implements
methods :meth:`_bind_input_ortvalue
<onnxcustom.training.base_onnx_function.BaseLearningOnnx._bind_input_ortvalue>`
<onnxcustom.training._base_onnx_function.BaseLearningOnnx._bind_input_ortvalue>`
and :meth:`_bind_output_ortvalue
<onnxcustom.training.base_onnx_function.BaseLearningOnnx._bind_output_ortvalue>`
<onnxcustom.training._base_onnx_function.BaseLearningOnnx._bind_output_ortvalue>`
used by the three components mentioned above. They cache the binded pointers
(the value returns by `c_ortvalue.data_ptr()` and do not bind again
if the method is called again with a different `OrtValue` but a same pointer
Expand Down
42 changes: 42 additions & 0 deletions _unittests/ut_module/test_onnx_runtimes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
@brief test log(time=0s)
"""
import unittest
import numpy
from scipy.special import expit # pylint: disable=E0611
from pyquickhelper.pycode import ExtTestCase
from skl2onnx.algebra.onnx_ops import OnnxSigmoid # pylint: disable=E0611
from skl2onnx.common.data_types import FloatTensorType
from mlprodict.onnxrt import OnnxInference
from onnxcustom import get_max_opset


class TestOnnxRuntimes(ExtTestCase):
"""Test style."""

def test_check(self):
opset = get_max_opset()
min_values = [-41.621277, -40.621277, -30.621277, -20.621277,
-19, -18, -17, -15, -14, -13, -12, -11, -10, -5, -2]
data = numpy.array(
[[0]],
dtype=numpy.float32)

node = OnnxSigmoid('X', op_version=opset, output_names=['Y'])
onx = node.to_onnx({'X': FloatTensorType()},
{'Y': FloatTensorType()},
target_opset=opset)
rts = ['numpy', 'python', 'onnxruntime1']
for mv in min_values:
data[:, 0] = mv
for rt in rts:
if rt == 'numpy':
y = expit(data)
else:
oinf = OnnxInference(onx, runtime=rt)
y = oinf.run({'X': data})['Y']
self.assertNotEmpty(y)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion _unittests/ut_training/test_optimizers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
@brief test log(time=8s)
@brief test log(time=9s)
"""
import os
import unittest
Expand Down
95 changes: 95 additions & 0 deletions _unittests/ut_training/test_optimizers_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
@brief test log(time=8s)
"""
import unittest
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
import numpy
from onnx.helper import set_model_props
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from mlprodict.onnx_conv import to_onnx
from mlprodict.plotting.text_plot import onnx_simple_text_plot
from mlprodict.onnx_tools.onnx_manipulations import select_model_inputs_outputs
# from mlprodict.onnxrt import OnnxInference
from onnxcustom import __max_supported_opset__ as opset
try:
from onnxruntime import TrainingSession
except ImportError:
# onnxruntime not training
TrainingSession = None


class TestOptimizersClassification(ExtTestCase):

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_binary(self):
from onnxcustom.utils.orttraining_helper import add_loss_output
from onnxcustom.training.optimizers import OrtGradientOptimizer
X, y = make_classification( # pylint: disable=W0632
100, n_features=10, random_state=0)
X = X.astype(numpy.float32)
y = y.astype(numpy.int64)
X_train, _, y_train, __ = train_test_split(X, y)
reg = SGDClassifier(loss='log')
reg.fit(X_train, y_train)
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearClassifier'},
options={'zipmap': False})
set_model_props(onx, {'info': 'unit test'})
onx_loss = add_loss_output(onx, 'log', output_index=1)
inits = ['intercept', 'coef']
train_session = OrtGradientOptimizer(
onx_loss, inits, learning_rate=1e-3)
self.assertRaise(lambda: train_session.get_state(), AttributeError)
train_session.fit(X_train, y_train.reshape((-1, 1)), use_numpy=True)
state_tensors = train_session.get_state()
self.assertEqual(len(state_tensors), 2)
r = repr(train_session)
self.assertIn("OrtGradientOptimizer(model_onnx=", r)
self.assertIn("learning_rate='invscaling'", r)
losses = train_session.train_losses_
self.assertGreater(len(losses), 1)
self.assertFalse(any(map(numpy.isnan, losses)))

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_fw_nesterov_binary(self):
from onnxcustom.training.optimizers_partial import (
OrtGradientForwardBackwardOptimizer)
from onnxcustom.training.sgd_learning_rate import (
LearningRateSGDNesterov)
from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss
X, y = make_classification( # pylint: disable=W0632
100, n_features=10, random_state=0)
X = X.astype(numpy.float32)
y = y.astype(numpy.int64)
X_train, _, y_train, __ = train_test_split(X, y)
reg = SGDClassifier(loss='log')
reg.fit(X_train, y_train)
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearRegressor'},
options={'zipmap': False,
'raw_scores': True})
onx = select_model_inputs_outputs(onx, outputs=['score'])
self.assertIn("output: name='score'",
onnx_simple_text_plot(onx))
set_model_props(onx, {'info': 'unit test'})
inits = ['coef', 'intercept']

train_session = OrtGradientForwardBackwardOptimizer(
onx, inits,
learning_rate=LearningRateSGDNesterov(
1e-4, nesterov=False, momentum=0.9),
learning_loss=NegLogLearningLoss(),
warm_start=False, max_iter=100, batch_size=10)
self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss)
self.assertEqual(train_session.learning_loss.eps, 1e-5)
train_session.fit(X, y)
temp = get_temp_folder(
__file__, "temp_ort_gradient_optimizers_fw_nesterov_binary")
train_session.save_onnx_graph(temp)


if __name__ == "__main__":
TestOptimizersClassification().test_ort_gradient_optimizers_fw_nesterov_binary()
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -401,4 +401,4 @@ def test_forward_training_logreg(self):

if __name__ == "__main__":
# TestOrtTrainingForwardBackward().forward_no_training(verbose=True)
unittest.main(verbosity=2)
unittest.main(verbosity=2, failfast=True)
Loading