diff --git a/.gitignore b/.gitignore index 53832e37..c6ea96f1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ bin/* dist/* build/* data/* +.eggs/* *.jpg *.onnx *.pt @@ -26,3 +27,5 @@ examples/model.onnx tests/model.onnx examples/onnxruntime_profile*.json version.txt +_doc/bench/*.svg +_doc/examples/*.svg diff --git a/_doc/bench/bench_orttraining_nn_gpu.py b/_doc/bench/bench_orttraining_nn_gpu.py new file mode 100644 index 00000000..e5bbc99e --- /dev/null +++ b/_doc/bench/bench_orttraining_nn_gpu.py @@ -0,0 +1,130 @@ +""" + +.. _l-orttraining-nn-benchmark: + +Benchmark onnxruntime-training on a neural network +================================================== + +You may profile the full example with on CPU with :epkg:`py-spy`: + +:: + + py-spy record -o bench_orttraining_nn_gpu.svg -r 10 --native -- python bench_orttraining_nn_gpu.py + +And with `nvprof` on GPU: + +:: + + nvprof -o bench_orttraining_nn_gpu.nvprof python bench_orttraining_nn_gpu.py --run_skl 0 --device cuda --opset 14 + +.. contents:: + :local: + +A neural network with scikit-learn +++++++++++++++++++++++++++++++++++ + +""" +import warnings +from pprint import pprint +import time +import numpy +from pandas import DataFrame +from onnxruntime import get_device +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPRegressor +from sklearn.metrics import mean_squared_error +from mlprodict.onnx_conv import to_onnx +from onnxcustom.training import add_loss_output, get_train_initializer +from onnxcustom.training.optimizers import OrtGradientOptimizer + + +def benchmark(N=1000, n_features=20, hidden_layer_sizes="25,25", max_iter=1000, + learning_rate_init=1e-4, batch_size=100, run_skl=True, + device='cpu', opset=14): + """ + Compares :epkg:`onnxruntime-training` to :epkg:`scikit-learn` for + training. Training algorithm is SGD. + + :param N: number of observations to train on + :param n_features: number of features + :param hidden_layer_sizes: hidden layer sizes, comma separated values + :param max_iter: number of iterations + :param learning_rate_init: initial learning rate + :param batch_size: batch size + :param run_skl: train scikit-learn in the same condition (True) or + just walk through one iterator with *scikit-learn* + :param device: `'cpu'` or `'cuda'` + :param opset: opset to choose for the conversion + """ + N = int(N) + n_features = int(n_features) + max_iter = int(max_iter) + learning_rate_init = float(learning_rate_init) + batch_size = int(batch_size) + run_skl = run_skl in (1, True, '1', 'True') + + print("N=%d" % N) + print("n_features=%d" % n_features) + print("hidden_layer_sizes=%s" % hidden_layer_sizes) + print("max_iter=%d" % max_iter) + print("learning_rate_init=%f" % learning_rate_init) + print("batch_size=%d" % batch_size) + print("run_skl=%r" % run_skl) + print("opset=%r" % opset) + print("device=%r" % device) + print('------------------') + + hidden_layer_sizes = tuple(map(int, hidden_layer_sizes.split(","))) + X, y = make_regression(N, n_features=n_features, bias=2) + X = X.astype(numpy.float32) + y = y.astype(numpy.float32) + X_train, X_test, y_train, y_test = train_test_split(X, y) + + nn = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, + max_iter=max_iter if run_skl else 1, + solver='sgd', learning_rate_init=learning_rate_init, + n_iter_no_change=N, batch_size=batch_size) + + begin = time.perf_counter() + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + nn.fit(X_train, y_train) + dur_skl = time.perf_counter() - begin + + print("time_kl=%r, mean_squared_error=%r" % ( + dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) + + # conversion to ONNX + onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=opset) + + # add loss + onx_train = add_loss_output(onx) + + # list of weights + inits = get_train_initializer(onx) + weights = {k: v for k, v in inits.items() if k != "shape_tensor"} + + # training + print("device=%r get_device()=%r" % (device, get_device())) + + ####################################### + # The training session. + + train_session = OrtGradientOptimizer( + onx_train, list(weights), device=device, verbose=0, + eta0=learning_rate_init, + warm_start=False, max_iter=max_iter, batch_size=batch_size) + + begin = time.perf_counter() + train_session.fit(X, y) + dur_ort = time.perf_counter() - begin + print("time_kl=%r, mean_squared_error=%r" % ( + dur_skl, mean_squared_error(y_train, nn.predict(X_train)))) + print("time_ort=%r, last_trained_error=%r" % ( + dur_ort, train_session.train_losses_[-1])) + + +if __name__ == "__main__": + import fire + fire.Fire(benchmark) diff --git a/_doc/examples/plot_catwoe_transformer.py b/_doc/examples/plot_catwoe_transformer.py index 9c79a5f4..2a5fdc17 100644 --- a/_doc/examples/plot_catwoe_transformer.py +++ b/_doc/examples/plot_catwoe_transformer.py @@ -25,7 +25,7 @@ `_. Every feature is converter into integer. """ -import numpy as np +import numpy from onnxruntime import InferenceSession from sklearn.datasets import load_iris from sklearn.preprocessing import OrdinalEncoder as SklOrdinalEncoder @@ -40,8 +40,8 @@ data = load_iris() X, y = data.data, data.target -X = X.astype(np.int64)[:, :2] -y = (y == 2).astype(np.int64) +X = X.astype(numpy.int64)[:, :2] +y = (y == 2).astype(numpy.int64) woe = WOEEncoder(cols=[0]).fit(X, y) print(woe.transform(X[:5])) @@ -75,15 +75,15 @@ def ordenc_to_sklearn(op_mapping): mapping = column_map['mapping'] res = [] for i in range(mapping.shape[0]): - if np.isnan(mapping.index[i]): + if numpy.isnan(mapping.index[i]): continue ind = mapping.iloc[i] while len(res) <= ind: res.append(0) res[ind] = mapping.index[i] - cats[col] = np.array(res, dtype=np.int64) + cats[col] = numpy.array(res, dtype=numpy.int64) - skl_ord = SklOrdinalEncoder(categories=cats, dtype=np.int64) + skl_ord = SklOrdinalEncoder(categories=cats, dtype=numpy.int64) skl_ord.categories_ = cats return skl_ord @@ -202,7 +202,7 @@ def woe_encoder_converter(scope, operator, container): sub = OnnxSubEstimator(op.ordinal_encoder, X, op_version=opv) - cast = OnnxCast(sub, op_version=opv, to=np.float32) + cast = OnnxCast(sub, op_version=opv, to=numpy.float32) skl_ord = woeenc_to_sklearn(op.mapping) cat = OnnxSubEstimator(skl_ord, cast, op_version=opv, output_names=operator.outputs[:1], diff --git a/_doc/examples/plot_orttraining_linear_regression.py b/_doc/examples/plot_orttraining_linear_regression.py index 09ef3b6e..549a6632 100644 --- a/_doc/examples/plot_orttraining_linear_regression.py +++ b/_doc/examples/plot_orttraining_linear_regression.py @@ -18,7 +18,7 @@ """ from pprint import pprint -import numpy as np +import numpy from pandas import DataFrame from onnx import helper, numpy_helper, TensorProto from onnxruntime import ( @@ -31,8 +31,8 @@ from tqdm import tqdm X, y = make_regression(n_features=2, bias=2) -X = X.astype(np.float32) -y = y.astype(np.float32) +X = X.astype(numpy.float32) +y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) lr = LinearRegression() @@ -78,8 +78,8 @@ def onnx_linear_regression(coefs, intercept): return model_def -onx = onnx_linear_regression(lr.coef_.astype(np.float32), - lr.intercept_.astype(np.float32)) +onx = onnx_linear_regression(lr.coef_.astype(numpy.float32), + lr.intercept_.astype(numpy.float32)) ######################################## # Let's visualize it. @@ -157,8 +157,9 @@ def onnx_linear_regression_training(coefs, intercept): onx_train = onnx_linear_regression_training( - np.random.randn(*lr.coef_.shape).astype(np.float32), - np.random.randn(*lr.intercept_.reshape((-1, )).shape).astype(np.float32)) + numpy.random.randn(*lr.coef_.shape).astype(numpy.float32), + numpy.random.randn( + *lr.intercept_.reshape((-1, )).shape).astype(numpy.float32)) plot_onnx(onx_train) @@ -204,7 +205,7 @@ def __iter__(self): N = 0 b = len(self) - self.batch_size while N < len(self): - i = np.random.randint(0, b) + i = numpy.random.randint(0, b) N += self.batch_size yield (self.X[i:i + self.batch_size], self.y[i:i + self.batch_size]) @@ -304,7 +305,7 @@ def create_training_session( inputs = {'X': X_train[:1], 'label': y_train[:1].reshape((-1, 1)), - 'Learning_Rate': np.array([0.001], dtype=np.float32)} + 'Learning_Rate': numpy.array([0.001], dtype=numpy.float32)} train_session.run(None, inputs) state_tensors = train_session.get_state() @@ -315,7 +316,7 @@ def create_training_session( inputs = {'X': X_train[:1], 'label': y_train[:1].reshape((-1, 1)), - 'Learning_Rate': np.array([0.001], dtype=np.float32)} + 'Learning_Rate': numpy.array([0.001], dtype=numpy.float32)} res = train_session.run(None, inputs) state_tensors = train_session.get_state() pprint(state_tensors) @@ -379,7 +380,7 @@ def __init__(self, model_onnx, weights_to_train, loss_output_name='loss', def _init_learning_rate(self): self.eta0_ = self.eta0 if self.learning_rate == "optimal": - typw = np.sqrt(1.0 / np.sqrt(self.alpha)) + typw = numpy.sqrt(1.0 / numpy.sqrt(self.alpha)) self.eta0_ = typw / max(1.0, (1 + typw) * 2) self.optimal_init_ = 1.0 / (self.eta0_ * self.alpha) else: @@ -390,7 +391,7 @@ def _update_learning_rate(self, t, eta): if self.learning_rate == "optimal": eta = 1.0 / (self.alpha * (self.optimal_init_ + t)) elif self.learning_rate == "invscaling": - eta = self.eta0_ / np.power(t + 1, self.power_t) + eta = self.eta0_ / numpy.power(t + 1, self.power_t) return eta def fit(self, X, y): @@ -434,7 +435,7 @@ def _iteration(self, data_loader, learning_rate): :return: loss """ actual_losses = [] - lr = np.array([learning_rate], dtype=np.float32) + lr = numpy.array([learning_rate], dtype=numpy.float32) for batch_idx, (data, target) in enumerate(data_loader): if len(target.shape) == 1: target = target.reshape((-1, 1)) @@ -444,7 +445,7 @@ def _iteration(self, data_loader, learning_rate): self.input_names_[2]: lr} res = self.train_session_.run(None, inputs) actual_losses.append(res[self.loss_index_]) - return np.array(actual_losses).mean() + return numpy.array(actual_losses).mean() ########################################### # Let's now train the model in a very similar way @@ -456,7 +457,7 @@ def _iteration(self, data_loader, learning_rate): trainer.fit(X, y) print("training losses:", trainer.train_losses_) -df = DataFrame({"iteration": np.arange(len(trainer.train_losses_)), +df = DataFrame({"iteration": numpy.arange(len(trainer.train_losses_)), "loss": trainer.train_losses_}) df.set_index('iteration').plot(title="Training loss", logy=True) diff --git a/_doc/examples/plot_orttraining_linear_regression_gpu.py b/_doc/examples/plot_orttraining_linear_regression_gpu.py index 307b38f0..9814a62a 100644 --- a/_doc/examples/plot_orttraining_linear_regression_gpu.py +++ b/_doc/examples/plot_orttraining_linear_regression_gpu.py @@ -21,7 +21,7 @@ with random coefficients. """ from pprint import pprint -import numpy as np +import numpy from pandas import DataFrame from onnx import helper, numpy_helper, TensorProto from onnxruntime import ( @@ -33,8 +33,8 @@ from tqdm import tqdm X, y = make_regression(n_features=2, bias=2) -X = X.astype(np.float32) -y = y.astype(np.float32) +X = X.astype(numpy.float32) +y = y.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) @@ -87,8 +87,8 @@ def onnx_linear_regression_training(coefs, intercept): onx_train = onnx_linear_regression_training( - np.random.randn(2).astype(np.float32), - np.random.randn(1).astype(np.float32)) + numpy.random.randn(2).astype(numpy.float32), + numpy.random.randn(1).astype(numpy.float32)) plot_onnx(onx_train) @@ -181,7 +181,7 @@ def create_training_session( ortx = OrtValue.ortvalue_from_numpy(X_train[:1], device, 0) orty = OrtValue.ortvalue_from_numpy(y_train[:1].reshape((-1, 1)), device, 0) ortlr = OrtValue.ortvalue_from_numpy( - np.array([0.01], dtype=np.float32), device, 0) + numpy.array([0.01], dtype=numpy.float32), device, 0) inputs = {'X': ortx, 'label': orty, "Learning_Rate": ortlr} outputs = train_session.run(None, inputs) @@ -221,8 +221,8 @@ def __init__(self, X, y, batch_size=20, device='cpu', device_idx=0): if X.shape[0] != y.shape[0]: raise ValueError( "Shape mismatch X.shape=%r, y.shape=%r." % (X.shape, y.shape)) - self.X = np.ascontiguousarray(X) - self.y = np.ascontiguousarray(y) + self.X = numpy.ascontiguousarray(X) + self.y = numpy.ascontiguousarray(y) self.batch_size = batch_size self.device = device self.device_idx = device_idx @@ -239,7 +239,7 @@ def __iter__(self): N = 0 b = len(self) - self.batch_size while N < len(self): - i = np.random.randint(0, b) + i = numpy.random.randint(0, b) N += self.batch_size yield ( OrtValue.ortvalue_from_numpy( @@ -318,7 +318,7 @@ def __init__(self, model_onnx, weights_to_train, loss_output_name='loss', def _init_learning_rate(self): self.eta0_ = self.eta0 if self.learning_rate == "optimal": - typw = np.sqrt(1.0 / np.sqrt(self.alpha)) + typw = numpy.sqrt(1.0 / numpy.sqrt(self.alpha)) self.eta0_ = typw / max(1.0, (1 + typw) * 2) self.optimal_init_ = 1.0 / (self.eta0_ * self.alpha) else: @@ -329,7 +329,7 @@ def _update_learning_rate(self, t, eta): if self.learning_rate == "optimal": eta = 1.0 / (self.alpha * (self.optimal_init_ + t)) elif self.learning_rate == "invscaling": - eta = self.eta0_ / np.power(t + 1, self.power_t) + eta = self.eta0_ / numpy.power(t + 1, self.power_t) return eta def fit(self, X, y): @@ -361,7 +361,8 @@ def fit(self, X, y): train_losses = [] for it in loop: bind_lr = OrtValue.ortvalue_from_numpy( - np.array([lr], dtype=np.float32), self.device, self.device_idx) + numpy.array([lr], dtype=numpy.float32), + self.device, self.device_idx) loss = self._iteration(data_loader, bind_lr, bind) lr = self._update_learning_rate(it, lr) if self.verbose > 1: @@ -379,7 +380,7 @@ def _iteration(self, data_loader, learning_rate, bind): name=self.input_names_[0], device_type=self.device, device_id=self.device_idx, - element_type=np.float32, + element_type=numpy.float32, shape=data.shape(), buffer_ptr=data.data_ptr()) @@ -387,14 +388,14 @@ def _iteration(self, data_loader, learning_rate, bind): name=self.input_names_[1], device_type=self.device, device_id=self.device_idx, - element_type=np.float32, + element_type=numpy.float32, shape=target.shape(), buffer_ptr=target.data_ptr()) bind.bind_input( name=self.input_names_[2], device_type=learning_rate.device_name(), device_id=0, - element_type=np.float32, shape=learning_rate.shape(), + element_type=numpy.float32, shape=learning_rate.shape(), buffer_ptr=learning_rate.data_ptr()) bind.bind_output('loss') @@ -402,7 +403,7 @@ def _iteration(self, data_loader, learning_rate, bind): self.train_session_.run_with_iobinding(bind) outputs = bind.copy_outputs_to_cpu() actual_losses.append(outputs[self.loss_index_]) - return np.array(actual_losses).mean() + return numpy.array(actual_losses).mean() ########################################### # Let's now train the model in a very similar way @@ -414,7 +415,7 @@ def _iteration(self, data_loader, learning_rate, bind): trainer.fit(X, y) print("training losses:", trainer.train_losses_) -df = DataFrame({"iteration": np.arange(len(trainer.train_losses_)), +df = DataFrame({"iteration": numpy.arange(len(trainer.train_losses_)), "loss": trainer.train_losses_}) df.set_index('iteration').plot(title="Training loss", logy=True) diff --git a/_doc/examples/plot_orttraining_nn_gpu.py b/_doc/examples/plot_orttraining_nn_gpu.py new file mode 100644 index 00000000..2bfc5c7b --- /dev/null +++ b/_doc/examples/plot_orttraining_nn_gpu.py @@ -0,0 +1,155 @@ +""" + +.. _l-orttraining-nn-gpu: + +Train a scikit-learn neural network with onnxruntime-training on GPU +==================================================================== + +This example leverages example :ref:`l-orttraining-linreg-gpu` to +train a neural network from :epkg:`scikit-learn` on GPU. + +.. contents:: + :local: + +A neural network with scikit-learn +++++++++++++++++++++++++++++++++++ + +""" +import warnings +from pprint import pprint +import numpy +from pandas import DataFrame +from onnxruntime import get_device, InferenceSession +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPRegressor +from sklearn.metrics import mean_squared_error +from mlprodict.plotting.plotting_onnx import plot_onnx +from mlprodict.onnx_conv import to_onnx +from mlprodict.tools import measure_time +from onnxcustom.training import add_loss_output, get_train_initializer +from onnxcustom.training.optimizers import OrtGradientOptimizer + + +X, y = make_regression(1000, n_features=10, bias=2) +X = X.astype(numpy.float32) +y = y.astype(numpy.float32) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +nn = MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=200, + solver='sgd', learning_rate_init=1e-4, + n_iter_no_change=1000, batch_size=10) + +with warnings.catch_warnings(): + warnings.simplefilter('ignore') + nn.fit(X_train, y_train) + +################################# +# Score: + +print("mean_squared_error=%r" % mean_squared_error(y_test, nn.predict(X_test))) + + +####################################### +# Conversion to ONNX +# ++++++++++++++++++ + +onx = to_onnx(nn, X_train[:1].astype(numpy.float32), target_opset=15) +plot_onnx(onx) + +####################################### +# Training graph +# ++++++++++++++ +# +# The loss function is the square function. We use function +# :func:`add_loss_output `. +# It does something what is implemented in example +# :ref:`l-orttraining-linreg-cpu`. + +onx_train = add_loss_output(onx) +plot_onnx(onx_train) + +##################################### +# Let's check inference is working. + +sess = InferenceSession(onx_train.SerializeToString()) +res = sess.run(None, {'X': X_test, 'label': y_test.reshape((-1, 1))}) +print("onnx loss=%r" % (res[0][0, 0] / X_test.shape[0])) + +##################################### +# Let's retrieve the constant, the weight to optimize. +# We remove initializer which cannot be optimized. + +inits = get_train_initializer(onx) +weights = {k: v for k, v in inits.items() if k != "shape_tensor"} +pprint(list((k, v[0].shape) for k, v in weights.items())) + + +###################################### +# Training +# ++++++++ +# +# The training session. If GPU is available, it chooses CUDA +# otherwise it falls back to CPU. + +device = "cuda" if get_device() == 'GPU' else 'cpu' + +print("device=%r get_device()=%r" % (device, get_device())) + +####################################### +# The training session. + +train_session = OrtGradientOptimizer( + onx_train, list(weights), device=device, verbose=1, eta0=1e-4, + warm_start=False, max_iter=200, batch_size=10) + +train_session.fit(X, y) +state_tensors = train_session.get_state() + +print(train_session.train_losses_) + +df = DataFrame({'losses': train_session.train_losses_}) +df.plot(title="Train loss against iterations", logy=True) + + +################################################ +# Benchmark +# +++++++++ +# +# The last part compares the speed between the two training. + +nn = MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=200, + solver='sgd', learning_rate_init=1e-4, + n_iter_no_change=1000, batch_size=10) + + +def skl_train(): + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + nn.fit(X_train, y_train) + + +obs = [] +res = measure_time("skl_train()", context=dict(skl_train=skl_train), + repeat=1, number=1) +res['framework'] = ['skl'] +pprint(res) +obs.append(res) + +train_session = OrtGradientOptimizer( + onx_train, list(weights), device=device, verbose=0, eta0=1e-4, + warm_start=False, max_iter=200, batch_size=10) + + +def ort_train(): + train_session.fit(X, y) + + +res = measure_time("ort_train()", context=dict(ort_train=ort_train), + repeat=1, number=1) +res['framework'] = ['ort'] +pprint(res) +obs.append(res) + +df = DataFrame(obs) +print(df) diff --git a/_doc/examples/plot_woe_transformer.py b/_doc/examples/plot_woe_transformer.py index 4236b5df..a7cc5516 100644 --- a/_doc/examples/plot_woe_transformer.py +++ b/_doc/examples/plot_woe_transformer.py @@ -24,7 +24,7 @@ to weight 55 and and the second one to 107. """ import os -import numpy as np +import numpy import pandas as pd from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer from onnxruntime import InferenceSession @@ -34,7 +34,7 @@ # automatically registers the converter for WOETransformer import skl2onnx.sklapi.register # noqa -X = np.arange(10).astype(np.float32).reshape((-1, 1)) +X = numpy.arange(10).astype(numpy.float32).reshape((-1, 1)) intervals = [ [(1., 3., False, False), diff --git a/_doc/sphinxdoc/source/api.rst b/_doc/sphinxdoc/source/api.rst index 910a887c..092ac3e2 100644 --- a/_doc/sphinxdoc/source/api.rst +++ b/_doc/sphinxdoc/source/api.rst @@ -6,6 +6,18 @@ API .. contents:: :local: +Data +++++ + +.. autoclass:: onnxcustom.training.data_loader.OrtDataLoader + +Training +++++++++ + +.. autofunction:: onnxcustom.training.optimizers.OrtGradientOptimizer + +.. autofunction:: onnxcustom.training.orttraining.add_loss_output + Utils +++++ diff --git a/_doc/sphinxdoc/source/tutorial_6_training.rst b/_doc/sphinxdoc/source/tutorial_6_training.rst index 6a6ae3ef..b262d961 100644 --- a/_doc/sphinxdoc/source/tutorial_6_training.rst +++ b/_doc/sphinxdoc/source/tutorial_6_training.rst @@ -2,7 +2,6 @@ Training ======== - .. toctree:: :maxdepth: 1 diff --git a/_doc/sphinxdoc/source/tutorial_7_benchmark.rst b/_doc/sphinxdoc/source/tutorial_7_benchmark.rst index 8e15f92a..190e4e53 100644 --- a/_doc/sphinxdoc/source/tutorial_7_benchmark.rst +++ b/_doc/sphinxdoc/source/tutorial_7_benchmark.rst @@ -2,7 +2,6 @@ Benchmarks ========== - .. toctree:: :maxdepth: 1 diff --git a/_unittests/ut_training/test_data_loader.py b/_unittests/ut_training/test_data_loader.py new file mode 100644 index 00000000..b3748182 --- /dev/null +++ b/_unittests/ut_training/test_data_loader.py @@ -0,0 +1,32 @@ +""" +@brief test log(time=3s) +""" + +import unittest +from pyquickhelper.pycode import ExtTestCase +from sklearn.datasets import make_regression +from onnxruntime import OrtValue +from onnxcustom.training.data_loader import OrtDataLoader + + +class TestDataLoadeer(ExtTestCase): + + def test_ort_data_loader(self): + X, y = make_regression( # pylint: disable=W0632 + 100, n_features=10, bias=2) + data = OrtDataLoader(X, y, batch_size=5) + n = 0 + for it in data: + x, y = it + self.assertIsInstance(x, OrtValue) + self.assertIsInstance(y, OrtValue) + self.assertEqual(x.shape()[0], 5) + self.assertEqual(x.shape()[1], 10) + self.assertEqual(y.shape()[0], 5) + n += 1 + self.assertEqual(n, 20) + self.assertStartsWith("OrtDataLoader(...", repr(data)) + + +if __name__ == "__main__": + unittest.main() diff --git a/_unittests/ut_training/test_optimizers.py b/_unittests/ut_training/test_optimizers.py new file mode 100644 index 00000000..601de5be --- /dev/null +++ b/_unittests/ut_training/test_optimizers.py @@ -0,0 +1,51 @@ +""" +@brief test log(time=3s) +""" + +import unittest +from pyquickhelper.pycode import ExtTestCase +import numpy +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from mlprodict.onnx_conv import to_onnx +from onnxcustom import __max_supported_opset__ as opset +try: + from onnxruntime import TrainingSession +except ImportError: + # onnxruntime not training + TrainingSession = None + + +class TestOptimizers(ExtTestCase): + + @unittest.skipIf(TrainingSession is None, reason="not training") + def test_ort_gradient_optimizers(self): + from onnxcustom.training.orttraining import add_loss_output + from onnxcustom.training.optimizers import OrtGradientOptimizer + X, y = make_regression( # pylint: disable=W0632 + 100, n_features=10, bias=2) + X = X.astype(numpy.float32) + y = y.astype(numpy.float32) + X_train, _, y_train, __ = train_test_split(X, y) + reg = LinearRegression() + reg.fit(X_train, y_train) + onx = to_onnx(reg, X_train, target_opset=opset, + black_op={'LinearRegressor'}) + onx_loss = add_loss_output(onx) + inits = ['intercept', 'coef'] + train_session = OrtGradientOptimizer(onx_loss, inits) + self.assertRaise(lambda: train_session.get_state(), AttributeError) + train_session.fit(X, y) + state_tensors = train_session.get_state() + self.assertEqual(len(state_tensors), 2) + r = repr(train_session) + self.assertIn("OrtGradientOptimizer(model_onnx=", r) + self.assertIn("learning_rate='invscaling'", r) + losses = train_session.train_losses_ + self.assertGreater(len(losses), 1) + self.assertFalse(any(map(numpy.isnan, losses))) + + +if __name__ == "__main__": + unittest.main() diff --git a/_unittests/ut_training/test_orttraining.py b/_unittests/ut_training/test_orttraining.py new file mode 100644 index 00000000..1e58e4b5 --- /dev/null +++ b/_unittests/ut_training/test_orttraining.py @@ -0,0 +1,60 @@ +""" +@brief test log(time=3s) +""" + +import unittest +from pyquickhelper.pycode import ExtTestCase +import numpy +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +from mlprodict.onnx_conv import to_onnx +from mlprodict.onnxrt import OnnxInference +from onnxcustom import __max_supported_opset__ as opset +try: + from onnxruntime import TrainingSession +except ImportError: + # onnxruntime not training + TrainingSession = None + + +class TestOrtTraining(ExtTestCase): + + @unittest.skipIf(TrainingSession is None, reason="not training") + def test_add_loss_output(self): + from onnxcustom.training.orttraining import add_loss_output + X, y = make_regression( # pylint: disable=W0632 + 100, n_features=10, bias=2) + X = X.astype(numpy.float32) + y = y.astype(numpy.float32) + X_train, X_test, y_train, y_test = train_test_split(X, y) + reg = LinearRegression() + reg.fit(X_train, y_train) + onx = to_onnx(reg, X_train, target_opset=opset, + black_op={'LinearRegressor'}) + onx_loss = add_loss_output(onx) + oinf = OnnxInference(onx_loss) + output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))}) + loss = output['loss'] + skl_loss = mean_squared_error(reg.predict(X_test), y_test) + self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5) + + @unittest.skipIf(TrainingSession is None, reason="not training") + def test_get_train_initializer(self): + from onnxcustom.training.orttraining import get_train_initializer + X, y = make_regression( # pylint: disable=W0632 + 100, n_features=10, bias=2) + X = X.astype(numpy.float32) + y = y.astype(numpy.float32) + X_train, _, y_train, __ = train_test_split(X, y) + reg = LinearRegression() + reg.fit(X_train, y_train) + onx = to_onnx(reg, X_train, target_opset=opset, + black_op={'LinearRegressor'}) + inits = get_train_initializer(onx) + self.assertEqual({'shape_tensor', 'intercept', 'coef'}, set(inits)) + + +if __name__ == "__main__": + unittest.main() diff --git a/onnxcustom/__init__.py b/onnxcustom/__init__.py index 6e50fb96..167ed16e 100644 --- a/onnxcustom/__init__.py +++ b/onnxcustom/__init__.py @@ -1,10 +1,12 @@ # coding: utf-8 """ -Extends ONNX specification and runtime. +@file +@brief Experimentation with ONNX, examples. """ __version__ = "0.1.107" __author__ = "Xavier Dupré, ..." +__max_supported_opset__ = 15 # Converters are tested up to this version. def check(verbose=1): diff --git a/onnxcustom/__main__.py b/onnxcustom/__main__.py index 11ea9b6a..38edbf54 100644 --- a/onnxcustom/__main__.py +++ b/onnxcustom/__main__.py @@ -1,5 +1,6 @@ """ -Implements command line ``python -m onnxcustom ``. +@file +@brief Implements command line ``python -m onnxcustom ``. """ import fire from onnxcustom import check diff --git a/onnxcustom/training/__init__.py b/onnxcustom/training/__init__.py new file mode 100644 index 00000000..22f037b2 --- /dev/null +++ b/onnxcustom/training/__init__.py @@ -0,0 +1,6 @@ +""" +@file +@brief Shortcuts to *training*. +""" + +from .orttraining import add_loss_output, get_train_initializer # noqa diff --git a/onnxcustom/training/data_loader.py b/onnxcustom/training/data_loader.py new file mode 100644 index 00000000..e743783d --- /dev/null +++ b/onnxcustom/training/data_loader.py @@ -0,0 +1,67 @@ +""" +@file +@brief Manipulate data for training. +""" +import numpy +from onnxruntime import OrtValue + + +class OrtDataLoader: + """ + Draws consecutive random observations from a dataset + by batch. It iterates over the datasets by drawing + *batch_size* consecutive observations. + + :param X: features + :param y: labels + :param batch_size: batch size (consecutive observations) + :param device: `'cpu'` or `'cuda'` + :param device_idx: device index + + See example :ref:`l-orttraining-nn-gpu`. + """ + + def __init__(self, X, y, batch_size=20, device='cpu', device_idx=0): + if len(y.shape) == 1: + y = y.reshape((-1, 1)) + if X.shape[0] != y.shape[0]: + raise ValueError( + "Shape mismatch X.shape=%r, y.shape=%r." % (X.shape, y.shape)) + self.X = numpy.ascontiguousarray(X) + self.y = numpy.ascontiguousarray(y) + self.batch_size = batch_size + self.device = device + self.device_idx = device_idx + + def __repr__(self): + "usual" + return "%s(..., ..., batch_size=%r, device=%r, device_idx=%r)" % ( + self.__class__.__name__, self.batch_size, self.device, + self.device_idx) + + def __len__(self): + "Returns the number of observations." + return self.X.shape[0] + + def __iter__(self): + """ + Iterates over the datasets by drawing + *batch_size* consecutive observations. + """ + N = 0 + b = len(self) - self.batch_size + while N < len(self): + i = numpy.random.randint(0, b) + N += self.batch_size + yield ( + OrtValue.ortvalue_from_numpy( + self.X[i:i + self.batch_size], + self.device, self.device_idx), + OrtValue.ortvalue_from_numpy( + self.y[i:i + self.batch_size], + self.device, self.device_idx)) + + @property + def data(self): + "Returns a tuple of the datasets." + return self.X, self.y diff --git a/onnxcustom/training/optimizers.py b/onnxcustom/training/optimizers.py new file mode 100644 index 00000000..7970fccb --- /dev/null +++ b/onnxcustom/training/optimizers.py @@ -0,0 +1,271 @@ +""" +@file +@brief Helper for :epkg:`onnxruntime-training`. +""" +import inspect +import numpy +from onnxruntime import ( # pylint: disable=E0611 + OrtValue, TrainingParameters, + SessionOptions, TrainingSession) +from .data_loader import OrtDataLoader + + +class BaseEstimator: + """ + Base class for optimizers. + Implements common methods such `__repr__`. + """ + + @classmethod + def _get_param_names(cls): + init = getattr(cls.__init__, "deprecated_original", cls.__init__) + init_signature = inspect.signature(init) + parameters = [ + p for p in init_signature.parameters.values() + if p.name != "self" and p.kind != p.VAR_KEYWORD] + return [(p.name, p.default) for p in parameters] + + def __repr__(self): + param = self._get_param_names() + ps = [] + for k, v in param: + if k not in self.__dict__: + continue + ov = getattr(self, k) + if v is not inspect._empty or ov != v: + ro = repr(ov) + if len(ro) > 50 or "\n" in ro: + ro = ro[:10].replace("\n", " ") + "..." + ps.append("%s=%r" % (k, ro)) + else: + ps.append("%s=%r" % (k, ov)) + return "%s(%s)" % (self.__class__.__name__, ", ".join(ps)) + + +class OrtGradientOptimizer(BaseEstimator): + """ + Implements a simple :epkg:`Stochastic Gradient Descent` + with :epkg:`onnxruntime-training`. + + :param training_onnx: ONNX graph used to train + :param weights_to_train: names of initializers to be optimized + :param loss_output_name: name of the loss output + :param max_iter: number of training iterations + :param training_optimizer_name: optimizing algorithm + :param batch_size: batch size (see class *DataLoader*) + :param eta0: initial learning rate for the `'constant'`, `'invscaling'` + or `'adaptive'` schedules. + :param alpha: constant that multiplies the regularization term, + the higher the value, the stronger the regularization. + Also used to compute the learning rate when set to *learning_rate* + is set to `'optimal'`. + :param power_t: exponent for inverse scaling learning rate + :param learning_rate: learning rate schedule: + * `'constant'`: `eta = eta0` + * `'optimal'`: `eta = 1.0 / (alpha * (t + t0))` where *t0* is chosen + by a heuristic proposed by Leon Bottou. + * `'invscaling'`: `eta = eta0 / pow(t, power_t)` + :param device: `'cpu'` or `'cuda'` + :param device_idx: device index + :param warm_start: when set to True, reuse the solution of the previous + call to fit as initialization, otherwise, just erase the previous + solution. + :param verbose: use :epkg:`tqdm` to display the training progress + + Once initialized, the class creates the attribute + `session_` which holds an instance of `onnxruntime.TrainingSession`. + + See example :ref:`l-orttraining-nn-gpu`. + """ + + def __init__(self, model_onnx, weights_to_train, loss_output_name='loss', + max_iter=100, training_optimizer_name='SGDOptimizer', + batch_size=10, eta0=0.01, alpha=0.0001, power_t=0.25, + learning_rate='invscaling', device='cpu', device_idx=0, + warm_start=False, verbose=0): + # See https://scikit-learn.org/stable/modules/generated/ + # sklearn.linear_model.SGDRegressor.html + self.model_onnx = model_onnx + self.batch_size = batch_size + self.weights_to_train = weights_to_train + self.loss_output_name = loss_output_name + self.training_optimizer_name = training_optimizer_name + self.verbose = verbose + self.max_iter = max_iter + self.eta0 = eta0 + self.alpha = alpha + self.power_t = power_t + self.learning_rate = learning_rate.lower() + self.device = device + self.device_idx = device_idx + self.warm_start = warm_start + + def _init_learning_rate(self): + self.eta0_ = self.eta0 + if self.learning_rate == "optimal": + typw = numpy.sqrt(1.0 / numpy.sqrt(self.alpha)) + self.eta0_ = typw / max(1.0, (1 + typw) * 2) + self.optimal_init_ = 1.0 / (self.eta0_ * self.alpha) + else: + self.eta0_ = self.eta0 + return self.eta0_ + + def _update_learning_rate(self, t, eta): + if self.learning_rate == "optimal": + eta = 1.0 / (self.alpha * (self.optimal_init_ + t)) + elif self.learning_rate == "invscaling": + eta = self.eta0_ / numpy.power(t + 1, self.power_t) + return eta + + def fit(self, X, y): + """ + Trains the model. + :param X: features + :param y: expected output + :return: self + """ + self.train_session_ = self._create_training_session( + self.model_onnx, self.weights_to_train, + loss_output_name=self.loss_output_name, + training_optimizer_name=self.training_optimizer_name, + device=self.device) + + if not self.warm_start: + state = self.get_state() + new_state = {} + for k, v in state.items(): + if len(v.shape) > 0: + new_state[k] = numpy.random.randn(*v.shape).astype(v.dtype) + else: + f = numpy.random.randn(1) + f = f.astype(v.dtype) + new_state[k] = f + self.set_state(new_state) + + data_loader = OrtDataLoader( + X, y, batch_size=self.batch_size, device=self.device) + lr = self._init_learning_rate() + self.input_names_ = [i.name for i in self.train_session_.get_inputs()] + self.output_names_ = [ + o.name for o in self.train_session_.get_outputs()] + self.loss_index_ = self.output_names_.index(self.loss_output_name) + + bind = self.train_session_.io_binding() + + if self.verbose > 0: # pragma: no cover + from tqdm import tqdm # pylint: disable=C0415 + loop = tqdm(range(self.max_iter)) + else: + loop = range(self.max_iter) + + train_losses = [] + for it in loop: + bind_lr = OrtValue.ortvalue_from_numpy( + numpy.array([lr / self.batch_size], dtype=numpy.float32), + self.device, self.device_idx) + loss = self._iteration(data_loader, bind_lr, bind) + lr = self._update_learning_rate(it, lr) + if self.verbose > 1: # pragma: no cover + loop.set_description( + "loss=%1.3g lr=%1.3g" % ( # pylint: disable=E1101,E1307 + loss, lr)) # pylint: disable=E1101,E1307 + train_losses.append(loss) + self.train_losses_ = train_losses + self.trained_coef_ = self.train_session_.get_state() + return self + + def _iteration(self, data_loader, learning_rate, bind): + actual_losses = [] + for data, target in data_loader: + + bind.bind_input( + name=self.input_names_[0], + device_type=self.device, + device_id=self.device_idx, + element_type=numpy.float32, + shape=data.shape(), + buffer_ptr=data.data_ptr()) + + bind.bind_input( + name=self.input_names_[1], + device_type=self.device, + device_id=self.device_idx, + element_type=numpy.float32, + shape=target.shape(), + buffer_ptr=target.data_ptr()) + + bind.bind_input( + name=self.input_names_[2], + device_type=learning_rate.device_name(), device_id=0, + element_type=numpy.float32, shape=learning_rate.shape(), + buffer_ptr=learning_rate.data_ptr()) + + bind.bind_output('loss') + + self.train_session_.run_with_iobinding(bind) + outputs = bind.copy_outputs_to_cpu() + actual_losses.append(outputs[0] / data.shape()[0]) + return numpy.array(actual_losses).mean() + + def _create_training_session( + self, training_onnx, weights_to_train, + loss_output_name='loss', + training_optimizer_name='SGDOptimizer', + device='cpu'): + ort_parameters = TrainingParameters() + ort_parameters.loss_output_name = loss_output_name + ort_parameters.use_mixed_precision = False + # ort_parameters.world_rank = -1 + # ort_parameters.world_size = 1 + # ort_parameters.gradient_accumulation_steps = 1 + # ort_parameters.allreduce_post_accumulation = False + # ort_parameters.deepspeed_zero_stage = 0 + # ort_parameters.enable_grad_norm_clip = False + # ort_parameters.set_gradients_as_graph_outputs = False + # ort_parameters.use_memory_efficient_gradient = False + # ort_parameters.enable_adasum = False + + output_types = {} + for output in training_onnx.graph.output: + output_types[output.name] = output.type.tensor_type + + ort_parameters.weights_to_train = set(weights_to_train) + ort_parameters.training_optimizer_name = training_optimizer_name + # ort_parameters.lr_params_feed_name = lr_params_feed_name + + ort_parameters.optimizer_attributes_map = { + name: {} for name in weights_to_train} + ort_parameters.optimizer_int_attributes_map = { + name: {} for name in weights_to_train} + + session_options = SessionOptions() + session_options.use_deterministic_compute = True + + if device == 'cpu': + provider = ['CPUExecutionProvider'] + elif device.startswith("cuda"): + provider = ['CUDAExecutionProvider'] + else: + raise ValueError("Unexpected device %r." % device) + + session = TrainingSession( + training_onnx.SerializeToString(), ort_parameters, session_options, + providers=provider) + + return session + + def get_state(self): + """ + Returns the trained weights. + """ + if not hasattr(self, 'train_session_'): + raise AttributeError("Method fit must be called before.") + return self.train_session_.get_state() + + def set_state(self, state): + """ + Changes the trained weights. + """ + if not hasattr(self, 'train_session_'): + raise AttributeError("Method fit must be called before.") + return self.train_session_.load_state(state) diff --git a/onnxcustom/training/orttraining.py b/onnxcustom/training/orttraining.py new file mode 100644 index 00000000..b4a1b4e4 --- /dev/null +++ b/onnxcustom/training/orttraining.py @@ -0,0 +1,115 @@ +""" +@file +@brief Helper for :epkg:`onnxruntime-training`. +""" +from onnx.helper import ( + make_node, make_graph, make_model, make_tensor_value_info, + set_model_props) +from onnx.numpy_helper import to_array + + +def unique_name(existing_names, name, add=True): + """ + Returns a name different from any name in *existing_names*. + + :param existing_names: set of names + :param name: current + :param add: add the name of the list of existing names + :return: unique name + """ + if name not in existing_names: + existing_names.add(name) + return name + name0 = name + i = 2 + while name in existing_names: + name = "%s_%d" % (name0, i) + i += 1 + existing_names.add(name) + return name + + +def add_loss_output(onx, score_name='squared_error', + loss_name='loss', label_name='label'): + """ + Modifies an ONNX graph to add operators to score and allow training. + + :param onx: onx graph + :param score_name: name of the score + :param loss_name: name of the output loss + :param label_name: name of the label input + :return: modified graph + + Possible values for *score_name*: + + * `'squared_error'`: :math:`\\sum_i{(f(x_i)-y_i)^2}` + + See example :ref:`l-orttraining-nn-gpu`. + """ + outputs = onx.graph.output + if len(outputs) != 1: + raise ValueError( + "Unable to guess the output to compare to the " + "expacted labels among %r." % (o.name for o in outputs)) + + existing_names = [] + for node in onx.graph.node: + existing_names.extend(node.output) + existing_names.extend(node.input) + existing_names = set(existing_names) + + output_name = onx.graph.output[0].name + elem = onx.graph.output[0].type.tensor_type.elem_type + shape = [] + for d in onx.graph.output[0].type.tensor_type.shape.dim: + shape.append(d.dim_value if d.dim_value > 0 else None) + + if score_name == 'squared_error': + diff_name = unique_name(existing_names, "loss_diff") + diff2_name = unique_name(existing_names, "loss_diff") + nodes = [make_node('Sub', [output_name, label_name], [diff_name]), + make_node('Mul', [diff_name, diff_name], [diff2_name]), + make_node('ReduceSum', [diff2_name], [loss_name])] + + inputs = [make_tensor_value_info('label', elem, shape)] + outputs = [make_tensor_value_info('loss', elem, [1, 1])] + else: + raise NotImplementedError( + "Unexpected %r value for score_name." % score_name) + + graph = make_graph( + list(onx.graph.node) + nodes, + onx.graph.name, + list(onx.graph.input) + inputs, + outputs + list(onx.graph.output), + onx.graph.initializer) + onnx_model = make_model(graph) + onnx_model.ir_version = onx.ir_version + onnx_model.producer_name = onx.producer_name + onnx_model.producer_version = onx.producer_version + onnx_model.domain = onx.domain + onnx_model.model_version = onx.model_version + onnx_model.doc_string = onx.doc_string + if len(onx.metadata_props) > 0: + values = {p.key: p.value for p in onx.metadata_props} + set_model_props(onnx_model, values) + + # fix opset import + del onnx_model.opset_import[:] # pylint: disable=E1101 + for oimp in onx.opset_import: + op_set = onnx_model.opset_import.add() # pylint: disable=E1101 + op_set.domain = oimp.domain + op_set.version = oimp.version + return onnx_model + + +def get_train_initializer(onx): + """ + Returns the list of initializer to train. + + :return: dictionary `{name: (value, tensor)}` + """ + res = {} + for init in onx.graph.initializer: + res[init.name] = (to_array(init), init) + return res diff --git a/onnxcustom/utils/__init__.py b/onnxcustom/utils/__init__.py index 2b0d4f6e..5f9674bd 100644 --- a/onnxcustom/utils/__init__.py +++ b/onnxcustom/utils/__init__.py @@ -1,5 +1,6 @@ """ -Shortcuts to *utils*. +@file +@brief Shortcuts to *utils*. """ from .benchmark import measure_time # noqa diff --git a/onnxcustom/utils/benchmark.py b/onnxcustom/utils/benchmark.py index bba22541..c75c86bb 100644 --- a/onnxcustom/utils/benchmark.py +++ b/onnxcustom/utils/benchmark.py @@ -1,5 +1,6 @@ """ -Tools to help benchmarking. +@file +@brief Tools to help benchmarking. """ from timeit import Timer import numpy diff --git a/onnxcustom/utils/imagenet_classes.py b/onnxcustom/utils/imagenet_classes.py index a17e7c22..fef56838 100644 --- a/onnxcustom/utils/imagenet_classes.py +++ b/onnxcustom/utils/imagenet_classes.py @@ -1,5 +1,6 @@ """ -Informations related to the :epkg:`ImageNet` competition. +@file +@brief Informations related to the :epkg:`ImageNet` competition. """ class_names = { diff --git a/requirements-dev.txt b/requirements-dev.txt index bc724811..9513f45b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -25,7 +25,7 @@ pyquickhelper>=1.10 pytest pytest-cov scikit-learn>=1.0 -skl2onnx>=1.9.3 +skl2onnx>=1.10.0 sphinx sphinxcontrib-blockdiag sphinxcontrib.imagesvg