Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions _unittests/ut_training/test_optimizers_nan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
"""
@brief test log(time=8s)
"""
import unittest
import logging
import numpy
from onnx import TensorProto
from pyquickhelper.pycode import ExtTestCase
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier, SGDRegressor
from mlprodict.onnx_conv import to_onnx
from mlprodict.plotting.text_plot import onnx_simple_text_plot
from mlprodict.onnx_tools.onnx_manipulations import select_model_inputs_outputs
# from mlprodict.onnxrt import OnnxInference
try:
from onnxruntime import TrainingSession
except ImportError:
# onnxruntime not training
TrainingSession = None
from onnxcustom import __max_supported_opset__ as opset
from onnxcustom.training.excs import ConvergenceError


class TestOptimizersNan(ExtTestCase):

@classmethod
def setUpClass(cls):
logger = logging.getLogger('skl2onnx')
logger.setLevel(logging.WARNING)
logger = logging.getLogger('onnxcustom')
logger.setLevel(logging.WARNING)
logging.basicConfig(level=logging.WARNING)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_reg(self):
self.wtest_ort_gradient_optimizers_reg(False)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_reg_w(self):
self.wtest_ort_gradient_optimizers_reg(True)

def wtest_ort_gradient_optimizers_reg(self, use_weight=False):
from onnxcustom.utils.orttraining_helper import add_loss_output
from onnxcustom.training.optimizers import OrtGradientOptimizer
X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1))
y[0, 0] += 1
y[-1, 0] += 1
w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
reg = SGDRegressor()
reg.fit(X_train, y_train.ravel())
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearRegressor'})
onx_loss = add_loss_output(
onx, 'squared_error',
weight_name='weight' if use_weight else None)
inits = ['intercept', 'coef']
inputs = onx_loss.graph.input
self.assertEqual(len(inputs), 3 if use_weight else 2)
train_session = OrtGradientOptimizer(
onx_loss, inits, learning_rate=1e9)
self.assertRaise(lambda: train_session.get_state(), AttributeError)
if use_weight:
self.assertRaise(
lambda: train_session.fit(
X_train, y_train.reshape((-1, 1)),
w_train.reshape((-1, 1)), use_numpy=False),
ConvergenceError)
else:
self.assertRaise(
lambda: train_session.fit(
X_train, y_train.reshape((-1, 1)), use_numpy=False),
ConvergenceError)
state_tensors = train_session.get_state()
self.assertEqual(len(state_tensors), 2)
r = repr(train_session)
self.assertIn("OrtGradientOptimizer(model_onnx=", r)
self.assertIn("learning_rate='invscaling'", r)
losses = train_session.train_losses_
self.assertGreater(len(losses), 1)
if any(map(numpy.isnan, losses)):
raise AssertionError(losses)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_binary(self):
self.wtest_ort_gradient_optimizers_binary(False)

def wtest_ort_gradient_optimizers_binary(self, use_weight=False):
from onnxcustom.utils.orttraining_helper import add_loss_output
from onnxcustom.training.optimizers import OrtGradientOptimizer
X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
y = numpy.arange(X.shape[0]).astype(
numpy.float32).reshape((-1, 1)) > 10
X = X.astype(numpy.float32)
y = y.astype(numpy.int64)
y[0, 0] = 0
y[-1, 0] = 1
w = (numpy.random.rand(X.shape[0]) + 1).astype(numpy.float32)
X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
reg = SGDClassifier(loss='log')
reg.fit(X_train, y_train.ravel())
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearClassifier'},
options={'zipmap': False})
onx_loss = add_loss_output(
onx, 'log', output_index=1,
weight_name='weight' if use_weight else None)
inits = ['intercept', 'coef']
inputs = onx_loss.graph.input
self.assertEqual(len(inputs), 3 if use_weight else 2)
dt = inputs[1].type.tensor_type.elem_type
self.assertEqual(TensorProto.INT64, dt) # pylint: disable=E1101
train_session = OrtGradientOptimizer(
onx_loss, inits, learning_rate=1e9)
self.assertRaise(lambda: train_session.get_state(), AttributeError)
if use_weight:
train_session.fit(
X_train, y_train.reshape((-1, 1)),
w_train.reshape((-1, 1)), use_numpy=False)
else:
train_session.fit(
X_train, y_train.reshape((-1, 1)), use_numpy=False)
state_tensors = train_session.get_state()
self.assertEqual(len(state_tensors), 2)
r = repr(train_session)
self.assertIn("OrtGradientOptimizer(model_onnx=", r)
self.assertIn("learning_rate='invscaling'", r)
losses = train_session.train_losses_
self.assertGreater(len(losses), 1)
if any(map(numpy.isnan, losses)):
raise AssertionError(losses)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_fw_sgd_binary(self):
self.wtest_ort_gradient_optimizers_fw_sgd_binary(False)

def wtest_ort_gradient_optimizers_fw_sgd_binary(self, use_weight):
from onnxcustom.training.optimizers_partial import (
OrtGradientForwardBackwardOptimizer)
from onnxcustom.training.sgd_learning_rate import (
LearningRateSGD)
from onnxcustom.training.sgd_learning_loss import NegLogLearningLoss
X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
y = numpy.arange(X.shape[0]).astype(
numpy.float32).reshape((-1, 1)) > 10
X = X.astype(numpy.float32)
y = y.astype(numpy.int64)
y[0, 0] = 0
y[-1, 0] = 1
w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32)
X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
reg = SGDClassifier(loss='log')
if use_weight:
reg.fit(X_train, y_train.ravel(),
sample_weight=w_train.astype(numpy.float64))
else:
reg.fit(X_train, y_train.ravel())
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearRegressor'},
options={'zipmap': False,
'raw_scores': True})
onx = select_model_inputs_outputs(onx, outputs=['score'])
self.assertIn("output: name='score'",
onnx_simple_text_plot(onx))
inits = ['coef', 'intercept']

train_session = OrtGradientForwardBackwardOptimizer(
onx, inits, weight_name='weight' if use_weight else None,
learning_rate=LearningRateSGD(1e10),
learning_loss=NegLogLearningLoss(),
warm_start=False, max_iter=100, batch_size=10,
enable_logging=False)
self.assertIsInstance(train_session.learning_loss, NegLogLearningLoss)
self.assertEqual(train_session.learning_loss.eps, 1e-5)
y_train = y_train.reshape((-1, 1))
if use_weight:
train_session.fit(X_train, y_train, w_train.reshape((-1, 1)))
else:
train_session.fit(X_train, y_train)
losses = train_session.train_losses_
self.assertGreater(len(losses), 1)
if any(map(numpy.isnan, losses)):
raise AssertionError(losses)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_fw_sgd_reg(self):
self.wtest_ort_gradient_optimizers_fw_sgd_reg(False)

@unittest.skipIf(TrainingSession is None, reason="not training")
def test_ort_gradient_optimizers_fw_sgd_reg_weight(self):
self.wtest_ort_gradient_optimizers_fw_sgd_reg(True)

def wtest_ort_gradient_optimizers_fw_sgd_reg(self, use_weight):
from onnxcustom.training.optimizers_partial import (
OrtGradientForwardBackwardOptimizer)
from onnxcustom.training.sgd_learning_rate import (
LearningRateSGD)
from onnxcustom.training.sgd_learning_loss import SquareLearningLoss
X = numpy.arange(60).astype(numpy.float32).reshape((-1, 3))
y = numpy.arange(X.shape[0]).astype(numpy.float32).reshape((-1, 1))
y[0, 0] += 1
y[-1, 0] += 1
w = (numpy.random.rand(y.shape[0]) + 1).astype(numpy.float32)
X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
reg = SGDRegressor()
if use_weight:
reg.fit(X_train, y_train.ravel(),
sample_weight=w_train.astype(numpy.float64))
else:
reg.fit(X_train, y_train.ravel())
onx = to_onnx(reg, X_train, target_opset=opset,
black_op={'LinearRegressor'})
inits = ['coef', 'intercept']

train_session = OrtGradientForwardBackwardOptimizer(
onx, inits, weight_name='weight' if use_weight else None,
learning_rate=LearningRateSGD(1e10),
learning_loss=SquareLearningLoss(),
warm_start=False, max_iter=100, batch_size=10,
enable_logging=False)
self.assertIsInstance(train_session.learning_loss, SquareLearningLoss)
y_train = y_train.reshape((-1, 1))
if use_weight:
self.assertRaise(
lambda: train_session.fit(
X_train, y_train, w_train.reshape((-1, 1))),
ConvergenceError)
else:
self.assertRaise(
lambda: train_session.fit(X_train, y_train),
ConvergenceError)
losses = train_session.train_losses_
self.assertLess(len(losses), 2)


if __name__ == "__main__":
unittest.main()
15 changes: 6 additions & 9 deletions onnxcustom/training/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ def fit(self, X, y, sample_weight=None, X_val=None, y_val=None,
else:
loop = range(self.max_iter)

train_losses = []
val_losses = []
self.train_losses_ = []
self.validation_losses_ = []
lr = self.learning_rate.value
for it in loop:
lr_alive = numpy.array([lr / self.batch_size], dtype=numpy.float32)
Expand All @@ -149,13 +149,11 @@ def fit(self, X, y, sample_weight=None, X_val=None, y_val=None,
"loss=%1.3g lr=%1.3g " # pylint: disable=E1307
"lrn=%1.3g" % (
loss, lr, lr_alive[0]))
train_losses.append(loss)
self.train_losses_.append(loss)
if (data_loader_val is not None and
(it + 1) % self.validation_every == 0):
val_losses.append(self._evaluation(data_loader_val, bind))
self.train_losses_ = train_losses
self.validation_losses_ = (
None if data_loader_val is None else val_losses)
self.validation_losses_.append(
self._evaluation(data_loader_val, bind))
self.trained_coef_ = self.train_session_.get_state()
return self

Expand Down Expand Up @@ -188,12 +186,12 @@ def _iteration(self, data_loader, ort_lr, bind, use_numpy, sample_weight):
actual_losses = []

bind.bind_output('loss', self.device)
idx = 3 if sample_weight else 2

if use_numpy:
# onnxruntime does not copy the data, so the numpy
# array must remain alive all along the iteration
lr_alive = ort_lr.numpy()
idx = 3 if sample_weight else 2
self._bind_input_ortvalue(
self.input_names_[idx], bind, lr_alive)

Expand Down Expand Up @@ -227,7 +225,6 @@ def _iteration(self, data_loader, ort_lr, bind, use_numpy, sample_weight):
else actual_losses[-5:])]))
actual_losses.append(loss / data.shape[0])
else:
idx = 3 if sample_weight else 2
self._bind_input_ortvalue(self.input_names_[idx], bind, ort_lr)

# Fast iterations
Expand Down
13 changes: 9 additions & 4 deletions onnxcustom/training/optimizers_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ def fit(self, X, y, sample_weight=None,
"weights_to_train=%r", self.weights_to_train)
logger.info(
"[OrtGradientForwardBackwardOptimizer.fit] "
"device=%r", self.device)
"device=%r|%r",
self.device.device_id(), self.device.device_type())
if logger is not None:
logger.info(
"[OrtGradientForwardBackwardOptimizer.fit] "
Expand Down Expand Up @@ -341,7 +342,7 @@ def fit(self, X, y, sample_weight=None,
else:
loop = range(self.max_iter)

train_losses = []
self.train_losses_ = []
val_losses = []
kinds = ['weight', 'grad'] if self.needs_grad else ['weight']
for it in loop:
Expand All @@ -353,12 +354,16 @@ def fit(self, X, y, sample_weight=None,
loop.set_description(
"loss=%1.3g lr=%1.3g" % ( # pylint: disable=E1101,E1307
loss, lr)) # pylint: disable=E1101,E1307
train_losses.append(loss)
if logger is not None:
logger.info(
"[OrtGradientForwardBackwardOptimizer.fit] "
"lr value=%r", lr)

self.train_losses_.append(loss)
if (data_loader_val is not None and
(it + 1) % self.validation_every == 0):
val_losses.append(
self._evaluation(data_loader_val, self.get_full_state()))
self.train_losses_ = train_losses
self.validation_losses_ = (
None if data_loader_val is None else val_losses)

Expand Down
4 changes: 2 additions & 2 deletions onnxcustom/training/sgd_learning_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ def update_weights(self, n_bind, device, statei, gradienti, batch_size,
self._bind_input_ortvalue("alpha", bind, ort_alpha, device, cache=True)
self._bind_output_ortvalue('Y', bind, statei, cache=True)
self._call_iobinding(self.axpy_sess_._sess, bind)
loss = bind.get_outputs()[0]
return loss
new_weights = bind.get_outputs()[0]
return new_weights


class LearningRateSGDNesterov(LearningRateSGD):
Expand Down