diff --git a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py index ec7178864..7834438c2 100644 --- a/_doc/sphinxdoc/source/_exts/generate_automated_pages.py +++ b/_doc/sphinxdoc/source/_exts/generate_automated_pages.py @@ -18,8 +18,7 @@ from pyquickhelper.loghelper.run_cmd import get_interpreter_path from mlprodict.onnxrt.validate.validate_helper import sklearn_operators from mlprodict.onnxrt.doc.doc_write_helper import ( - split_columns_subsets, build_key_split, filter_rows, _make_opset -) + split_columns_subsets, build_key_split, filter_rows, _make_opset) from mlprodict.onnxrt.validate.validate_summary import _clean_values_optim from mlprodict.onnx_conv import register_converters, register_rewritten_operators register_converters() @@ -46,41 +45,6 @@ def write_page_onnxrt_ops(app): print("[mlprodict-sphinx] done page '{}'.".format(whe)) -def run_benchmark(runtime, srcdir, logger, skip, white_list=None): - filenames = [] - skls = sklearn_operators(extended=True) - skls = [_['name'] for _ in skls] - if white_list: - skls = [_ for _ in skls if _ in white_list] - skls.sort() - pbar = tqdm(skls) - for op in pbar: - if skip is not None and op in skip: - continue - pbar.set_description("[%s]" % (op + " " * (25 - len(op)))) - - out_raw = os.path.join(srcdir, "bench_raw_%s_%s.csv" % (runtime, op)) - out_sum = os.path.join(srcdir, "bench_sum_%s_%s.csv" % (runtime, op)) - cmd = ('{0} -m mlprodict validate_runtime --verbose=0 --out_raw={1} --out_summary={2} ' - '--benchmark=1 --dump_folder={3} --runtime={4} --models={5}'.format( - get_interpreter_path(), out_raw, out_sum, srcdir, runtime, op)) - logger.info("[mlprodict] cmd '{}'.".format(cmd)) - out, err = run_cmd(cmd, wait=True, fLOG=None) - if not os.path.exists(out_sum): - logger.warning("[mlprodict] unable to find '{}'.".format(out_sum)) - print("[mlprodict-sphinx] cmd '{}'".format(cmd)) - print("[mlprodict-sphinx] unable to find '{}'".format(out_sum)) - msg = "Unable to find '{}'\n--CMD--\n{}\n--OUT--\n{}\n--ERR--\n{}".format( - out_sum, cmd, out, err) - print(msg) - rows = [{'name': op, 'scenario': 'CRASH', - 'ERROR-msg': msg.replace("\n", " -- ")}] - df = DataFrame(rows) - df.to_csv(out_sum, index=False) - filenames.append((out_raw, out_sum)) - return filenames - - def write_page_onnxrt_benches(app, runtime, skip=None, white_list=None): from mlprodict.onnxrt.validate.validate import enumerate_validated_operator_opsets @@ -102,47 +66,19 @@ def write_page_onnxrt_benches(app, runtime, skip=None, white_list=None): logger.info("[mlprodict] create page '{}'.".format(whe)) print("[mlprodict-sphinx] create page runtime '{}' - '{}'.".format(runtime, whe)) - filenames = run_benchmark(runtime, srcdir, logger, skip, - white_list=white_list) - dfs_raw = [read_csv(name[0]) - for name in filenames if os.path.exists(name[0])] - dfs_sum = [read_csv(name[1]) - for name in filenames if os.path.exists(name[1])] - df_raw = concat(dfs_raw, sort=False) - piv = concat(dfs_sum, sort=False) - - opset_cols = [(int(oc.replace("opset", "")), oc) - for oc in piv.columns if 'opset' in oc] - opset_cols.sort(reverse=True) - opset_cols = [oc[1] for oc in opset_cols] - new_cols = opset_cols[:1] - bench_cols = ["RT/SKL-N=1", "N=10", "N=100", - "N=1000", "N=10000"] - new_cols.extend(["ERROR-msg", "name", "problem", "scenario", 'optim']) - new_cols.extend(bench_cols) - new_cols.extend(opset_cols[1:]) - for c in bench_cols: - new_cols.append(c + '-min') - new_cols.append(c + '-max') - for c in piv.columns: - if c.startswith("skl_") or c.startswith("onx_"): - new_cols.append(c) - new_cols = [_ for _ in new_cols if _ in piv.columns] - piv = piv[new_cols] - - out_sum = os.path.join(srcdir, "bench_sum_%s.xlsx" % runtime) - piv.to_excel(out_sum, index=False) - logger.info("[mlprodict] wrote '{}'.".format(out_sum)) - print("[mlprodict-sphinx] wrote '{}'".format(out_sum)) + out_sum = os.path.join(srcdir, "bench_raw_%s.xlsx" % runtime) + piv = pandas.from_excel(out_sum, index=False) + logger.info("[mlprodict] read '{}'.".format(out_sum)) + print("[mlprodict-sphinx] read '{}'".format(out_sum)) out_raw = os.path.join(srcdir, "bench_raw_%s.xlsx" % runtime) - df_raw.to_excel(out_raw, index=False) + df_raw = pandas.to_excel(out_raw, index=False) logger.info("[mlprodict] wrote '{}'.".format(out_raw)) print("[mlprodict-sphinx] wrote '{}'".format(out_raw)) logger.info("[mlprodict] shape '{}'.".format(piv.shape)) print("[mlprodict-sphinx] shape '{}'".format(piv.shape)) - + def make_link(row): link = ":ref:`{name} `" name = row['name'] diff --git a/_unittests/ut_cli/test_cli_validate_bench_doc.py b/_unittests/ut_cli/test_cli_validate_bench_doc.py new file mode 100644 index 000000000..4a3937dc7 --- /dev/null +++ b/_unittests/ut_cli/test_cli_validate_bench_doc.py @@ -0,0 +1,38 @@ +""" +@brief test tree node (time=42s) +""" +import os +import unittest +from pyquickhelper.loghelper import BufferedPrint +from pyquickhelper.pycode import ( + ExtTestCase, get_temp_folder, ignore_warnings) +from mlprodict.__main__ import main + + +class TestCliValidateBenchDoc(ExtTestCase): + + @ignore_warnings(UserWarning) + def test_cli_validate_bench_doc_help(self): + st = BufferedPrint() + main(args=["benchmark_doc", "--help"], fLOG=st.fprint) + res = str(st) + self.assertIn("verbose", res) + + @ignore_warnings(UserWarning) + def test_cli_validate_bench_doc(self): + temp = get_temp_folder(__file__, "temp_bench_doc") + out1 = os.path.join(temp, "raw.xlsx") + out2 = os.path.join(temp, "sum.csv") + st = BufferedPrint() + main(args=["benchmark_doc", "-o", out1, "-ou", out2, "-w", + "LinearRegression", '-d', temp, + '-r', 'python_compiled'], + fLOG=st.fprint) + res = str(st) + self.assertIn('Linear', res) + self.assertExists(out1) + self.assertExists(out2) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bin/run_bench_documentation.sh b/bin/run_bench_documentation.sh index dc4920ccc..e03024373 100644 --- a/bin/run_bench_documentation.sh +++ b/bin/run_bench_documentation.sh @@ -1,8 +1,7 @@ echo --PYTHON-- mkdir bench_python_compiled -python -m mlprodict validate_runtime -se 1 --verbose=1 --out_raw=bench_python_compiled.csv --out_summary=bench_sum_python_compiled.xlsx --benchmark=1 --dump_folder=./bench_python_compiled --runtime=python_compiled +python -m mlprodict benchmark_dec --verbose=1 --out_raw=bench_python_compiled.xslx --out_summary=bench_sum_python_compiled.xlsx --dump_dir=./bench_python_compiled --runtime=python_compiled echo --ONNXRUNTIME-- mkdir bench_onnxruntime1 -python -m mlprodict validate_runtime -se 1 --verbose=1 --out_raw=bench_onnxruntime1.csv --out_summary=bench_sum_onnxruntime1.xlsx --benchmark=1 --dump_folder=./bench_onnxruntime1 --runtime=onnxruntime1 - +python -m mlprodict benchmark_dec --verbose=1 --out_raw=bench_onnxruntime1.xslx --out_summary=bench_sum_onnxruntime1.xlsx --dump_dir=./bench_onnxruntime1 --runtime=onnxruntime1 \ No newline at end of file diff --git a/mlprodict/__main__.py b/mlprodict/__main__.py index aa3a11a9f..b1d11a1cc 100644 --- a/mlprodict/__main__.py +++ b/mlprodict/__main__.py @@ -15,7 +15,8 @@ def main(args, fLOG=print): @param fLOG logging function """ try: - from .cli.validate import validate_runtime + from .cli.validate import ( + validate_runtime, latency, benchmark_doc) from .cli.convert_validate import convert_validate from .cli.optimize import onnx_optim, onnx_stats from .cli.asv_bench import asv_bench @@ -23,9 +24,9 @@ def main(args, fLOG=print): from .cli.replay import benchmark_replay from .cli.einsum import einsum_test from .cli.onnx_code import onnx_code, dynamic_doc, plot_onnx - from .cli.validate import latency except ImportError: # pragma: no cover - from mlprodict.cli.validate import validate_runtime + from mlprodict.cli.validate import ( + validate_runtime, latency, benchmark_doc) from mlprodict.cli.convert_validate import convert_validate from mlprodict.cli.optimize import onnx_optim, onnx_stats from mlprodict.cli.asv_bench import asv_bench @@ -33,7 +34,6 @@ def main(args, fLOG=print): from mlprodict.cli.replay import benchmark_replay from mlprodict.cli.einsum import einsum_test from mlprodict.cli.onnx_code import onnx_code, dynamic_doc, plot_onnx - from mlprodict.cli.validate import latency fcts = dict(validate_runtime=validate_runtime, convert_validate=convert_validate, @@ -46,7 +46,8 @@ def main(args, fLOG=print): onnx_code=onnx_code, latency=latency, dynamic_doc=dynamic_doc, - plot_onnx=plot_onnx) + plot_onnx=plot_onnx, + benchmark_doc=benchmark_doc) try: from pyquickhelper.cli import cli_main_helper except ImportError: # pragma: no cover diff --git a/mlprodict/cli/validate.py b/mlprodict/cli/validate.py index a54cd7d9c..a2dac97f9 100644 --- a/mlprodict/cli/validate.py +++ b/mlprodict/cli/validate.py @@ -1,490 +1,616 @@ -""" -@file -@brief Command line about validation of prediction runtime. -""" -import os -from io import StringIO -from logging import getLogger -import warnings -import json -from multiprocessing import Pool -from pandas import DataFrame -from sklearn.exceptions import ConvergenceWarning - - -def validate_runtime(verbose=1, opset_min=-1, opset_max="", - check_runtime=True, runtime='python', debug=False, - models=None, out_raw="model_onnx_raw.xlsx", - out_summary="model_onnx_summary.xlsx", - dump_folder=None, dump_all=False, benchmark=False, - catch_warnings=True, assume_finite=True, - versions=False, skip_models=None, - extended_list=True, separate_process=False, - time_kwargs=None, n_features=None, fLOG=print, - out_graph=None, force_return=False, - dtype=None, skip_long_test=False, - number=1, repeat=1, time_kwargs_fact='lin', - time_limit=4, n_jobs=0): - """ - Walks through most of :epkg:`scikit-learn` operators - or model or predictor or transformer, tries to convert - them into :epkg:`ONNX` and computes the predictions - with a specific runtime. - - :param verbose: integer from 0 (None) to 2 (full verbose) - :param opset_min: tries every conversion from this minimum opset, - -1 to get the current opset - :param opset_max: tries every conversion up to maximum opset, - -1 to get the current opset - :param check_runtime: to check the runtime - and not only the conversion - :param runtime: runtime to check, python, - onnxruntime1 to check :epkg:`onnxruntime`, - onnxruntime2 to check every *ONNX* node independently - with onnxruntime, many runtime can be checked at the same time - if the value is a comma separated list - :param models: comma separated list of models to test or empty - string to test them all - :param skip_models: models to skip - :param debug: stops whenever an exception is raised, - only if *separate_process* is False - :param out_raw: output raw results into this file (excel format) - :param out_summary: output an aggregated view into this file (excel format) - :param dump_folder: folder where to dump information (pickle) - in case of mismatch - :param dump_all: dumps all models, not only the failing ones - :param benchmark: run benchmark - :param catch_warnings: catch warnings - :param assume_finite: See `config_context - `_, - If True, validation for finiteness will be skipped, saving time, but leading - to potential crashes. If False, validation for finiteness will be performed, - avoiding error. - :param versions: add columns with versions of used packages, - :epkg:`numpy`, :epkg:`scikit-learn`, :epkg:`onnx`, :epkg:`onnxruntime`, - :epkg:`sklearn-onnx` - :param extended_list: extends the list of :epkg:`scikit-learn` converters - with converters implemented in this module - :param separate_process: run every model in a separate process, - this option must be used to run all model in one row - even if one of them is crashing - :param time_kwargs: a dictionary which defines the number of rows and - the parameter *number* and *repeat* when benchmarking a model, - the value must follow :epkg:`json` format - :param n_features: change the default number of features for - a specific problem, it can also be a comma separated list - :param force_return: forces the function to return the results, - used when the results are produces through a separate process - :param out_graph: image name, to output a graph which summarizes - a benchmark in case it was run - :param dtype: '32' or '64' or None for both, - limits the test to one specific number types - :param skip_long_test: skips tests for high values of N if - they seem too long - :param number: to multiply number values in *time_kwargs* - :param repeat: to multiply repeat values in *time_kwargs* - :param time_kwargs_fact: to multiply number and repeat in - *time_kwargs* depending on the model - (see :func:`_multiply_time_kwargs `) - :param time_limit: to stop benchmarking after this limit of time - :param n_jobs: force the number of jobs to have this value, - by default, it is equal to the number of CPU - :param fLOG: logging function - - .. cmdref:: - :title: Validates a runtime against scikit-learn - :cmd: -m mlprodict validate_runtime --help - :lid: l-cmd-validate_runtime - - The command walks through all scikit-learn operators, - tries to convert them, checks the predictions, - and produces a report. - - Example:: - - python -m mlprodict validate_runtime --models LogisticRegression,LinearRegression - - Following example benchmarks models - :epkg:`sklearn:ensemble:RandomForestRegressor`, - :epkg:`sklearn:tree:DecisionTreeRegressor`, it compares - :epkg:`onnxruntime` against :epkg:`scikit-learn` for opset 10. - - :: - - python -m mlprodict validate_runtime -v 1 -o 10 -op 10 -c 1 -r onnxruntime1 - -m RandomForestRegressor,DecisionTreeRegressor -out bench_onnxruntime.xlsx -b 1 - - Parameter ``--time_kwargs`` may be used to reduce or increase - bencharmak precisions. The following value tells the function - to run a benchmarks with datasets of 1 or 10 number, to repeat - a given number of time *number* predictions in one row. - The total time is divided by :math:`number \\times repeat``. - Parameter ``--time_kwargs_fact`` may be used to increase these - number for some specific models. ``'lin'`` multiplies - by 10 number when the model is linear. - - :: - - -t "{\\"1\\":{\\"number\\":10,\\"repeat\\":10},\\"10\\":{\\"number\\":5,\\"repeat\\":5}}" - - The following example dumps every model in the list: - - :: - - python -m mlprodict validate_runtime --out_raw raw.csv --out_summary sum.csv - --models LinearRegression,LogisticRegression,DecisionTreeRegressor,DecisionTreeClassifier - -r python,onnxruntime1 -o 10 -op 10 -v 1 -b 1 -dum 1 - -du model_dump -n 20,100,500 --out_graph benchmark.png --dtype 32 - - The command line generates a graph produced by function - :func:`plot_validate_benchmark - `. - """ - if separate_process: - return _validate_runtime_separate_process( - verbose=verbose, opset_min=opset_min, opset_max=opset_max, - check_runtime=check_runtime, runtime=runtime, debug=debug, - models=models, out_raw=out_raw, - out_summary=out_summary, dump_all=dump_all, - dump_folder=dump_folder, benchmark=benchmark, - catch_warnings=catch_warnings, assume_finite=assume_finite, - versions=versions, skip_models=skip_models, - extended_list=extended_list, time_kwargs=time_kwargs, - n_features=n_features, fLOG=fLOG, force_return=True, - out_graph=None, dtype=dtype, skip_long_test=skip_long_test, - time_kwargs_fact=time_kwargs_fact, time_limit=time_limit, - n_jobs=n_jobs) - - from ..onnxrt.validate import enumerate_validated_operator_opsets # pylint: disable=E0402 - - if not isinstance(models, list): - models = (None if models in (None, "") - else models.strip().split(',')) - if not isinstance(skip_models, list): - skip_models = ({} if skip_models in (None, "") - else skip_models.strip().split(',')) - if verbose <= 1: - logger = getLogger('skl2onnx') - logger.disabled = True - if not dump_folder: - dump_folder = None - if dump_folder and not os.path.exists(dump_folder): - os.mkdir(dump_folder) # pragma: no cover - if dump_folder and not os.path.exists(dump_folder): - raise FileNotFoundError( # pragma: no cover - "Cannot find dump_folder '{0}'.".format( - dump_folder)) - - # handling parameters - if opset_max == "": - opset_max = None # pragma: no cover - if isinstance(opset_min, str): - opset_min = int(opset_min) # pragma: no cover - if isinstance(opset_max, str): - opset_max = int(opset_max) - if isinstance(verbose, str): - verbose = int(verbose) # pragma: no cover - if isinstance(extended_list, str): - extended_list = extended_list in ( - '1', 'True', 'true') # pragma: no cover - if time_kwargs in (None, ''): - time_kwargs = None - if isinstance(time_kwargs, str): - time_kwargs = json.loads(time_kwargs) - # json only allows string as keys - time_kwargs = {int(k): v for k, v in time_kwargs.items()} - if isinstance(n_jobs, str): - n_jobs = int(n_jobs) - if n_jobs == 0: - n_jobs = None - if time_kwargs is not None and not isinstance(time_kwargs, dict): - raise ValueError( # pragma: no cover - "time_kwargs must be a dictionary not {}\n{}".format( - type(time_kwargs), time_kwargs)) - if not isinstance(n_features, list): - if n_features in (None, ""): - n_features = None - elif ',' in n_features: - n_features = list(map(int, n_features.split(','))) - else: - n_features = int(n_features) - if not isinstance(runtime, list) and ',' in runtime: - runtime = runtime.split(',') - - def fct_filter_exp(m, s): - cl = m.__name__ - if cl in skip_models: - return False - pair = "%s[%s]" % (cl, s) - if pair in skip_models: - return False - return True - - if dtype in ('', None): - fct_filter = fct_filter_exp - elif dtype == '32': - def fct_filter_exp2(m, p): - return fct_filter_exp(m, p) and '64' not in p - fct_filter = fct_filter_exp2 - elif dtype == '64': # pragma: no cover - def fct_filter_exp3(m, p): - return fct_filter_exp(m, p) and '64' in p - fct_filter = fct_filter_exp3 - else: - raise ValueError( # pragma: no cover - "dtype must be empty, 32, 64 not '{}'.".format(dtype)) - - # time_kwargs - - if benchmark: - if time_kwargs is None: - from ..onnxrt.validate.validate_helper import default_time_kwargs # pylint: disable=E0402 - time_kwargs = default_time_kwargs() - for _, v in time_kwargs.items(): - v['number'] *= number - v['repeat'] *= repeat - if verbose > 0: - fLOG("time_kwargs=%r" % time_kwargs) - - # body - - def build_rows(models_): - rows = list(enumerate_validated_operator_opsets( - verbose, models=models_, fLOG=fLOG, runtime=runtime, debug=debug, - dump_folder=dump_folder, opset_min=opset_min, opset_max=opset_max, - benchmark=benchmark, assume_finite=assume_finite, versions=versions, - extended_list=extended_list, time_kwargs=time_kwargs, dump_all=dump_all, - n_features=n_features, filter_exp=fct_filter, - skip_long_test=skip_long_test, time_limit=time_limit, - time_kwargs_fact=time_kwargs_fact, n_jobs=n_jobs)) - return rows - - def catch_build_rows(models_): - if catch_warnings: - with warnings.catch_warnings(): - warnings.simplefilter("ignore", - (UserWarning, ConvergenceWarning, - RuntimeWarning, FutureWarning)) - rows = build_rows(models_) - else: - rows = build_rows(models_) # pragma: no cover - return rows - - rows = catch_build_rows(models) - res = _finalize(rows, out_raw, out_summary, - verbose, models, out_graph, fLOG) - return res if (force_return or verbose >= 2) else None - - -def _finalize(rows, out_raw, out_summary, verbose, models, out_graph, fLOG): - from ..onnxrt.validate import summary_report # pylint: disable=E0402 - from ..tools.cleaning import clean_error_msg # pylint: disable=E0402 - - # Drops data which cannot be serialized. - for row in rows: - keys = [] - for k in row: - if 'lambda' in k: - keys.append(k) - for k in keys: - del row[k] - - df = DataFrame(rows) - - if out_raw: - if verbose > 0: - fLOG("Saving raw_data into '{}'.".format(out_raw)) - if os.path.splitext(out_raw)[-1] == ".xlsx": - df.to_excel(out_raw, index=False) - else: - clean_error_msg(df).to_csv(out_raw, index=False) - - if df.shape[0] == 0: - raise RuntimeError("No result produced by the benchmark.") - piv = summary_report(df) - if 'optim' not in piv: - raise RuntimeError( # pragma: no cover - "Unable to produce a summary. Missing column in \n{}".format( - piv.columns)) - - if out_summary: - if verbose > 0: - fLOG("Saving summary into '{}'.".format(out_summary)) - if os.path.splitext(out_summary)[-1] == ".xlsx": - piv.to_excel(out_summary, index=False) - else: - clean_error_msg(piv).to_csv(out_summary, index=False) - - if verbose > 1 and models is not None: - fLOG(piv.T) - if out_graph is not None: - if verbose > 0: - fLOG("Saving graph into '{}'.".format(out_graph)) - from ..plotting.plotting import plot_validate_benchmark - fig = plot_validate_benchmark(piv)[0] - fig.savefig(out_graph) - - return rows - - -def _validate_runtime_dict(kwargs): - return validate_runtime(**kwargs) - - -def _validate_runtime_separate_process(**kwargs): - models = kwargs['models'] - if models in (None, ""): - from ..onnxrt.validate.validate_helper import sklearn_operators # pragma: no cover - models = [_['name'] - for _ in sklearn_operators(extended=True)] # pragma: no cover - elif not isinstance(models, list): - models = models.strip().split(',') - - skip_models = kwargs['skip_models'] - skip_models = {} if skip_models in ( - None, "") else skip_models.strip().split(',') - - verbose = kwargs['verbose'] - fLOG = kwargs['fLOG'] - all_rows = [] - skls = [m for m in models if m not in skip_models] - skls.sort() - - if verbose > 0: - from tqdm import tqdm - pbar = tqdm(skls) - else: - pbar = skls # pragma: no cover - - for op in pbar: - if not isinstance(pbar, list): - pbar.set_description("[%s]" % (op + " " * (25 - len(op)))) - - if kwargs['out_raw']: - out_raw = os.path.splitext(kwargs['out_raw']) - out_raw = "".join([out_raw[0], "_", op, out_raw[1]]) - else: - out_raw = None # pragma: no cover - - if kwargs['out_summary']: - out_summary = os.path.splitext(kwargs['out_summary']) - out_summary = "".join([out_summary[0], "_", op, out_summary[1]]) - else: - out_summary = None # pragma: no cover - - new_kwargs = kwargs.copy() - if 'fLOG' in new_kwargs: - del new_kwargs['fLOG'] - new_kwargs['out_raw'] = out_raw - new_kwargs['out_summary'] = out_summary - new_kwargs['models'] = op - new_kwargs['verbose'] = 0 # tqdm fails - new_kwargs['out_graph'] = None - - with Pool(1) as p: - try: - result = p.apply_async(_validate_runtime_dict, [new_kwargs]) - lrows = result.get(timeout=150) # timeout fixed to 150s - all_rows.extend(lrows) - except Exception as e: # pylint: disable=W0703 - all_rows.append({ # pragma: no cover - 'name': op, 'scenario': 'CRASH', - 'ERROR-msg': str(e).replace("\n", " -- ") - }) - - return _finalize(all_rows, kwargs['out_raw'], kwargs['out_summary'], - verbose, models, kwargs.get('out_graph', None), fLOG) - - -def latency(model, law='normal', size=1, number=10, repeat=10, max_time=0, - runtime="onnxruntime", device='cpu', fmt=None, - profiling=None, profile_output='profiling.csv'): - """ - Measures the latency of a model (python API). - - :param model: ONNX graph - :param law: random law used to generate fake inputs - :param size: batch size, it replaces the first dimension - of every input if it is left unknown - :param number: number of calls to measure - :param repeat: number of times to repeat the experiment - :param max_time: if it is > 0, it runs as many time during - that period of time - :param runtime: available runtime - :param device: device, `cpu`, `cuda:0` or a list of providers - `CPUExecutionProvider, CUDAExecutionProvider - :param fmt: None or `csv`, it then - returns a string formatted like a csv file - :param profiling: if True, profile the execution of every - node, if can be sorted by name or type, - the value for this parameter should e in `(None, 'name', 'type')` - :param profile_output: output name for the profiling - if profiling is specified - - .. cmdref:: - :title: Measures model latency - :cmd: -m mlprodict latency --help - :lid: l-cmd-latency - - The command generates random inputs and call many times the - model on these inputs. It returns the processing time for one - iteration. - - Example:: - - python -m mlprodict latency --model "model.onnx" - """ - from ..onnxrt.validate.validate_latency import latency as _latency # pylint: disable=E0402 - - if not os.path.exists(model): - raise FileNotFoundError( # pragma: no cover - "Unable to find model %r." % model) - if profiling not in (None, '', 'name', 'type'): - raise ValueError( # pragma: no cover - "Unexpected value for profiling: %r." % profiling) - size = int(size) - number = int(number) - repeat = int(repeat) - if max_time in (None, 0, ""): - max_time = None - else: - max_time = float(max_time) - if max_time <= 0: - max_time = None - - if law != "normal": - raise ValueError( - "Only law='normal' is supported, not %r." % law) - - if profiling in ('name', 'type') and profile_output in (None, ''): - raise ValueError( # pragma: no cover - 'profiling is enabled but profile_output is wrong (%r).' - '' % profile_output) - - res = _latency( - model, law=law, size=size, number=number, repeat=repeat, - max_time=max_time, runtime=runtime, device=device, - profiling=profiling) - - if profiling not in (None, ''): - res, gr = res - ext = os.path.splitext(profile_output)[-1] - gr = gr.reset_index(drop=False) - if ext == '.csv': - gr.to_csv(profile_output, index=False) - elif ext == '.xlsx': - gr.to_excel(profile_output, index=False) - else: - raise ValueError( # pragma: no cover - "Unexpected extension for profile_output=%r." - "" % profile_output) - - if fmt == 'csv': - st = StringIO() - df = DataFrame([res]) - df.to_csv(st, index=False) - return st.getvalue() - if fmt in (None, ''): - return res - raise ValueError( # pragma: no cover - "Unexpected value for fmt: %r." % fmt) +""" +@file +@brief Command line about validation of prediction runtime. +""" +import os +from io import StringIO +from logging import getLogger +import warnings +import json +from multiprocessing import Pool +from pandas import DataFrame, read_csv, concat +from sklearn.exceptions import ConvergenceWarning + + +def benchmark_doc(runtime, black_list=None, white_list=None, + out_raw='bench_raw.xlsx', out_summary="bench_summary.xlsx", + dump_dir='dump', fLOG=print, verbose=0): + """ + Runs the benchmark published into the documentation + (see :ref:`l-onnx-bench-onnxruntime1` and + :ref:`l-onnx-bench-python_compiled`). + + :param runtime: runtime (python, python_compiled, + onnxruntime1, onnxruntime2) + :param black_list: models to skip, None for none + (comma separated list) + :param white_list: models to benchmark, None for all + (comma separated list) + :param out_raw: all results are saved in that file + :param out_summary: all results are summarized in that file + :param dump_dir: folder where to dump intermediate results + :param fLOG: logging function + :param verbose: verbosity + :return: list of created files + """ + def _save(df, name): + ext = os.path.splitext(name)[-1] + if ext == '.xlsx': + df.to_excel(name, index=False) + elif ext == '.csv': + df.to_csv(name, index=False) + else: + raise ValueError("Unexpected extension in %r." % name) + if verbose > 1: + fLOG("[mlprodict] wrote '{}'".format(name)) + + from pyquickhelper.loghelper import run_cmd + from pyquickhelper.loghelper.run_cmd import get_interpreter_path + from tqdm import tqdm + from ..onnxrt.validate.validate_helper import sklearn_operators + from ..onnx_conv import register_converters, register_rewritten_operators + register_converters() + try: + register_rewritten_operators() + except KeyError: + warnings.warn("converter for HistGradientBoosting* not not exist. " + "Upgrade sklearn-onnx") + + if black_list is None: + black_list = [] + else: + black_list = black_list.split(',') + if white_list is None: + white_list = [] + else: + white_list = white_list.split(',') + + filenames = [] + skls = sklearn_operators(extended=True) + skls = [_['name'] for _ in skls] + if white_list: + skls = [_ for _ in skls if _ in white_list] + skls.sort() + if verbose > 0: + pbar = tqdm(skls) + else: + pbar = skls + for op in pbar: + if black_list is not None and op in black_list: + continue + if verbose > 0: + pbar.set_description("[%s]" % (op + " " * (25 - len(op)))) + + loop_out_raw = os.path.join( + dump_dir, "bench_raw_%s_%s.csv" % (runtime, op)) + loop_out_sum = os.path.join( + dump_dir, "bench_sum_%s_%s.csv" % (runtime, op)) + cmd = ('{0} -m mlprodict validate_runtime --verbose=0 --out_raw={1} --out_summary={2} ' + '--benchmark=1 --dump_folder={3} --runtime={4} --models={5}'.format( + get_interpreter_path(), loop_out_raw, loop_out_sum, dump_dir, runtime, op)) + if verbose > 1: + fLOG("[mlprodict] cmd '{}'.".format(cmd)) + out, err = run_cmd(cmd, wait=True, fLOG=None) + if not os.path.exists(loop_out_sum): + if verbose > 2: + fLOG("[mlprodict] unable to find '{}'.".format(loop_out_sum)) + if verbose > 1: + fLOG("[mlprodict] cmd '{}'".format(cmd)) + fLOG("[mlprodict] unable to find '{}'".format(loop_out_sum)) + msg = "Unable to find '{}'\n--CMD--\n{}\n--OUT--\n{}\n--ERR--\n{}".format( + loop_out_sum, cmd, out, err) + fLOG(msg) + rows = [{'name': op, 'scenario': 'CRASH', + 'ERROR-msg': msg.replace("\n", " -- ")}] + df = DataFrame(rows) + df.to_csv(loop_out_sum, index=False) + filenames.append((loop_out_raw, loop_out_sum)) + + # concatenate summaries + dfs_raw = [read_csv(name[0]) + for name in filenames if os.path.exists(name[0])] + dfs_sum = [read_csv(name[1]) + for name in filenames if os.path.exists(name[1])] + df_raw = concat(dfs_raw, sort=False) + piv = concat(dfs_sum, sort=False) + + opset_cols = [(int(oc.replace("opset", "")), oc) + for oc in piv.columns if 'opset' in oc] + opset_cols.sort(reverse=True) + opset_cols = [oc[1] for oc in opset_cols] + new_cols = opset_cols[:1] + bench_cols = ["RT/SKL-N=1", "N=10", "N=100", + "N=1000", "N=10000"] + new_cols.extend(["ERROR-msg", "name", "problem", "scenario", 'optim']) + new_cols.extend(bench_cols) + new_cols.extend(opset_cols[1:]) + for c in bench_cols: + new_cols.append(c + '-min') + new_cols.append(c + '-max') + for c in piv.columns: + if c.startswith("skl_") or c.startswith("onx_"): + new_cols.append(c) + new_cols = [_ for _ in new_cols if _ in piv.columns] + piv = piv[new_cols] + + _save(piv, out_summary) + _save(df_raw, out_raw) + return filenames + + +def validate_runtime(verbose=1, opset_min=-1, opset_max="", + check_runtime=True, runtime='python', debug=False, + models=None, out_raw="model_onnx_raw.xlsx", + out_summary="model_onnx_summary.xlsx", + dump_folder=None, dump_all=False, benchmark=False, + catch_warnings=True, assume_finite=True, + versions=False, skip_models=None, + extended_list=True, separate_process=False, + time_kwargs=None, n_features=None, fLOG=print, + out_graph=None, force_return=False, + dtype=None, skip_long_test=False, + number=1, repeat=1, time_kwargs_fact='lin', + time_limit=4, n_jobs=0): + """ + Walks through most of :epkg:`scikit-learn` operators + or model or predictor or transformer, tries to convert + them into :epkg:`ONNX` and computes the predictions + with a specific runtime. + + :param verbose: integer from 0 (None) to 2 (full verbose) + :param opset_min: tries every conversion from this minimum opset, + -1 to get the current opset + :param opset_max: tries every conversion up to maximum opset, + -1 to get the current opset + :param check_runtime: to check the runtime + and not only the conversion + :param runtime: runtime to check, python, + onnxruntime1 to check :epkg:`onnxruntime`, + onnxruntime2 to check every *ONNX* node independently + with onnxruntime, many runtime can be checked at the same time + if the value is a comma separated list + :param models: comma separated list of models to test or empty + string to test them all + :param skip_models: models to skip + :param debug: stops whenever an exception is raised, + only if *separate_process* is False + :param out_raw: output raw results into this file (excel format) + :param out_summary: output an aggregated view into this file (excel format) + :param dump_folder: folder where to dump information (pickle) + in case of mismatch + :param dump_all: dumps all models, not only the failing ones + :param benchmark: run benchmark + :param catch_warnings: catch warnings + :param assume_finite: See `config_context + `_, + If True, validation for finiteness will be skipped, saving time, but leading + to potential crashes. If False, validation for finiteness will be performed, + avoiding error. + :param versions: add columns with versions of used packages, + :epkg:`numpy`, :epkg:`scikit-learn`, :epkg:`onnx`, :epkg:`onnxruntime`, + :epkg:`sklearn-onnx` + :param extended_list: extends the list of :epkg:`scikit-learn` converters + with converters implemented in this module + :param separate_process: run every model in a separate process, + this option must be used to run all model in one row + even if one of them is crashing + :param time_kwargs: a dictionary which defines the number of rows and + the parameter *number* and *repeat* when benchmarking a model, + the value must follow :epkg:`json` format + :param n_features: change the default number of features for + a specific problem, it can also be a comma separated list + :param force_return: forces the function to return the results, + used when the results are produces through a separate process + :param out_graph: image name, to output a graph which summarizes + a benchmark in case it was run + :param dtype: '32' or '64' or None for both, + limits the test to one specific number types + :param skip_long_test: skips tests for high values of N if + they seem too long + :param number: to multiply number values in *time_kwargs* + :param repeat: to multiply repeat values in *time_kwargs* + :param time_kwargs_fact: to multiply number and repeat in + *time_kwargs* depending on the model + (see :func:`_multiply_time_kwargs `) + :param time_limit: to stop benchmarking after this limit of time + :param n_jobs: force the number of jobs to have this value, + by default, it is equal to the number of CPU + :param fLOG: logging function + + .. cmdref:: + :title: Validates a runtime against scikit-learn + :cmd: -m mlprodict validate_runtime --help + :lid: l-cmd-validate_runtime + + The command walks through all scikit-learn operators, + tries to convert them, checks the predictions, + and produces a report. + + Example:: + + python -m mlprodict validate_runtime --models LogisticRegression,LinearRegression + + Following example benchmarks models + :epkg:`sklearn:ensemble:RandomForestRegressor`, + :epkg:`sklearn:tree:DecisionTreeRegressor`, it compares + :epkg:`onnxruntime` against :epkg:`scikit-learn` for opset 10. + + :: + + python -m mlprodict validate_runtime -v 1 -o 10 -op 10 -c 1 -r onnxruntime1 + -m RandomForestRegressor,DecisionTreeRegressor -out bench_onnxruntime.xlsx -b 1 + + Parameter ``--time_kwargs`` may be used to reduce or increase + bencharmak precisions. The following value tells the function + to run a benchmarks with datasets of 1 or 10 number, to repeat + a given number of time *number* predictions in one row. + The total time is divided by :math:`number \\times repeat``. + Parameter ``--time_kwargs_fact`` may be used to increase these + number for some specific models. ``'lin'`` multiplies + by 10 number when the model is linear. + + :: + + -t "{\\"1\\":{\\"number\\":10,\\"repeat\\":10},\\"10\\":{\\"number\\":5,\\"repeat\\":5}}" + + The following example dumps every model in the list: + + :: + + python -m mlprodict validate_runtime --out_raw raw.csv --out_summary sum.csv + --models LinearRegression,LogisticRegression,DecisionTreeRegressor,DecisionTreeClassifier + -r python,onnxruntime1 -o 10 -op 10 -v 1 -b 1 -dum 1 + -du model_dump -n 20,100,500 --out_graph benchmark.png --dtype 32 + + The command line generates a graph produced by function + :func:`plot_validate_benchmark + `. + """ + if separate_process: + return _validate_runtime_separate_process( + verbose=verbose, opset_min=opset_min, opset_max=opset_max, + check_runtime=check_runtime, runtime=runtime, debug=debug, + models=models, out_raw=out_raw, + out_summary=out_summary, dump_all=dump_all, + dump_folder=dump_folder, benchmark=benchmark, + catch_warnings=catch_warnings, assume_finite=assume_finite, + versions=versions, skip_models=skip_models, + extended_list=extended_list, time_kwargs=time_kwargs, + n_features=n_features, fLOG=fLOG, force_return=True, + out_graph=None, dtype=dtype, skip_long_test=skip_long_test, + time_kwargs_fact=time_kwargs_fact, time_limit=time_limit, + n_jobs=n_jobs) + + from ..onnxrt.validate import enumerate_validated_operator_opsets # pylint: disable=E0402 + + if not isinstance(models, list): + models = (None if models in (None, "") + else models.strip().split(',')) + if not isinstance(skip_models, list): + skip_models = ({} if skip_models in (None, "") + else skip_models.strip().split(',')) + if verbose <= 1: + logger = getLogger('skl2onnx') + logger.disabled = True + if not dump_folder: + dump_folder = None + if dump_folder and not os.path.exists(dump_folder): + os.mkdir(dump_folder) # pragma: no cover + if dump_folder and not os.path.exists(dump_folder): + raise FileNotFoundError( # pragma: no cover + "Cannot find dump_folder '{0}'.".format( + dump_folder)) + + # handling parameters + if opset_max == "": + opset_max = None # pragma: no cover + if isinstance(opset_min, str): + opset_min = int(opset_min) # pragma: no cover + if isinstance(opset_max, str): + opset_max = int(opset_max) + if isinstance(verbose, str): + verbose = int(verbose) # pragma: no cover + if isinstance(extended_list, str): + extended_list = extended_list in ( + '1', 'True', 'true') # pragma: no cover + if time_kwargs in (None, ''): + time_kwargs = None + if isinstance(time_kwargs, str): + time_kwargs = json.loads(time_kwargs) + # json only allows string as keys + time_kwargs = {int(k): v for k, v in time_kwargs.items()} + if isinstance(n_jobs, str): + n_jobs = int(n_jobs) + if n_jobs == 0: + n_jobs = None + if time_kwargs is not None and not isinstance(time_kwargs, dict): + raise ValueError( # pragma: no cover + "time_kwargs must be a dictionary not {}\n{}".format( + type(time_kwargs), time_kwargs)) + if not isinstance(n_features, list): + if n_features in (None, ""): + n_features = None + elif ',' in n_features: + n_features = list(map(int, n_features.split(','))) + else: + n_features = int(n_features) + if not isinstance(runtime, list) and ',' in runtime: + runtime = runtime.split(',') + + def fct_filter_exp(m, s): + cl = m.__name__ + if cl in skip_models: + return False + pair = "%s[%s]" % (cl, s) + if pair in skip_models: + return False + return True + + if dtype in ('', None): + fct_filter = fct_filter_exp + elif dtype == '32': + def fct_filter_exp2(m, p): + return fct_filter_exp(m, p) and '64' not in p + fct_filter = fct_filter_exp2 + elif dtype == '64': # pragma: no cover + def fct_filter_exp3(m, p): + return fct_filter_exp(m, p) and '64' in p + fct_filter = fct_filter_exp3 + else: + raise ValueError( # pragma: no cover + "dtype must be empty, 32, 64 not '{}'.".format(dtype)) + + # time_kwargs + + if benchmark: + if time_kwargs is None: + from ..onnxrt.validate.validate_helper import default_time_kwargs # pylint: disable=E0402 + time_kwargs = default_time_kwargs() + for _, v in time_kwargs.items(): + v['number'] *= number + v['repeat'] *= repeat + if verbose > 0: + fLOG("time_kwargs=%r" % time_kwargs) + + # body + + def build_rows(models_): + rows = list(enumerate_validated_operator_opsets( + verbose, models=models_, fLOG=fLOG, runtime=runtime, debug=debug, + dump_folder=dump_folder, opset_min=opset_min, opset_max=opset_max, + benchmark=benchmark, assume_finite=assume_finite, versions=versions, + extended_list=extended_list, time_kwargs=time_kwargs, dump_all=dump_all, + n_features=n_features, filter_exp=fct_filter, + skip_long_test=skip_long_test, time_limit=time_limit, + time_kwargs_fact=time_kwargs_fact, n_jobs=n_jobs)) + return rows + + def catch_build_rows(models_): + if catch_warnings: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", + (UserWarning, ConvergenceWarning, + RuntimeWarning, FutureWarning)) + rows = build_rows(models_) + else: + rows = build_rows(models_) # pragma: no cover + return rows + + rows = catch_build_rows(models) + res = _finalize(rows, out_raw, out_summary, + verbose, models, out_graph, fLOG) + return res if (force_return or verbose >= 2) else None + + +def _finalize(rows, out_raw, out_summary, verbose, models, out_graph, fLOG): + from ..onnxrt.validate import summary_report # pylint: disable=E0402 + from ..tools.cleaning import clean_error_msg # pylint: disable=E0402 + + # Drops data which cannot be serialized. + for row in rows: + keys = [] + for k in row: + if 'lambda' in k: + keys.append(k) + for k in keys: + del row[k] + + df = DataFrame(rows) + + if out_raw: + if verbose > 0: + fLOG("Saving raw_data into '{}'.".format(out_raw)) + if os.path.splitext(out_raw)[-1] == ".xlsx": + df.to_excel(out_raw, index=False) + else: + clean_error_msg(df).to_csv(out_raw, index=False) + + if df.shape[0] == 0: + raise RuntimeError("No result produced by the benchmark.") + piv = summary_report(df) + if 'optim' not in piv: + raise RuntimeError( # pragma: no cover + "Unable to produce a summary. Missing column in \n{}".format( + piv.columns)) + + if out_summary: + if verbose > 0: + fLOG("Saving summary into '{}'.".format(out_summary)) + if os.path.splitext(out_summary)[-1] == ".xlsx": + piv.to_excel(out_summary, index=False) + else: + clean_error_msg(piv).to_csv(out_summary, index=False) + + if verbose > 1 and models is not None: + fLOG(piv.T) + if out_graph is not None: + if verbose > 0: + fLOG("Saving graph into '{}'.".format(out_graph)) + from ..plotting.plotting import plot_validate_benchmark + fig = plot_validate_benchmark(piv)[0] + fig.savefig(out_graph) + + return rows + + +def _validate_runtime_dict(kwargs): + return validate_runtime(**kwargs) + + +def _validate_runtime_separate_process(**kwargs): + models = kwargs['models'] + if models in (None, ""): + from ..onnxrt.validate.validate_helper import sklearn_operators # pragma: no cover + models = [_['name'] + for _ in sklearn_operators(extended=True)] # pragma: no cover + elif not isinstance(models, list): + models = models.strip().split(',') + + skip_models = kwargs['skip_models'] + skip_models = {} if skip_models in ( + None, "") else skip_models.strip().split(',') + + verbose = kwargs['verbose'] + fLOG = kwargs['fLOG'] + all_rows = [] + skls = [m for m in models if m not in skip_models] + skls.sort() + + if verbose > 0: + from tqdm import tqdm + pbar = tqdm(skls) + else: + pbar = skls # pragma: no cover + + for op in pbar: + if not isinstance(pbar, list): + pbar.set_description("[%s]" % (op + " " * (25 - len(op)))) + + if kwargs['out_raw']: + out_raw = os.path.splitext(kwargs['out_raw']) + out_raw = "".join([out_raw[0], "_", op, out_raw[1]]) + else: + out_raw = None # pragma: no cover + + if kwargs['out_summary']: + out_summary = os.path.splitext(kwargs['out_summary']) + out_summary = "".join([out_summary[0], "_", op, out_summary[1]]) + else: + out_summary = None # pragma: no cover + + new_kwargs = kwargs.copy() + if 'fLOG' in new_kwargs: + del new_kwargs['fLOG'] + new_kwargs['out_raw'] = out_raw + new_kwargs['out_summary'] = out_summary + new_kwargs['models'] = op + new_kwargs['verbose'] = 0 # tqdm fails + new_kwargs['out_graph'] = None + + with Pool(1) as p: + try: + result = p.apply_async(_validate_runtime_dict, [new_kwargs]) + lrows = result.get(timeout=150) # timeout fixed to 150s + all_rows.extend(lrows) + except Exception as e: # pylint: disable=W0703 + all_rows.append({ # pragma: no cover + 'name': op, 'scenario': 'CRASH', + 'ERROR-msg': str(e).replace("\n", " -- ") + }) + + return _finalize(all_rows, kwargs['out_raw'], kwargs['out_summary'], + verbose, models, kwargs.get('out_graph', None), fLOG) + + +def latency(model, law='normal', size=1, number=10, repeat=10, max_time=0, + runtime="onnxruntime", device='cpu', fmt=None, + profiling=None, profile_output='profiling.csv'): + """ + Measures the latency of a model (python API). + + :param model: ONNX graph + :param law: random law used to generate fake inputs + :param size: batch size, it replaces the first dimension + of every input if it is left unknown + :param number: number of calls to measure + :param repeat: number of times to repeat the experiment + :param max_time: if it is > 0, it runs as many time during + that period of time + :param runtime: available runtime + :param device: device, `cpu`, `cuda:0` or a list of providers + `CPUExecutionProvider, CUDAExecutionProvider + :param fmt: None or `csv`, it then + returns a string formatted like a csv file + :param profiling: if True, profile the execution of every + node, if can be sorted by name or type, + the value for this parameter should e in `(None, 'name', 'type')` + :param profile_output: output name for the profiling + if profiling is specified + + .. cmdref:: + :title: Measures model latency + :cmd: -m mlprodict latency --help + :lid: l-cmd-latency + + The command generates random inputs and call many times the + model on these inputs. It returns the processing time for one + iteration. + + Example:: + + python -m mlprodict latency --model "model.onnx" + """ + from ..onnxrt.validate.validate_latency import latency as _latency # pylint: disable=E0402 + + if not os.path.exists(model): + raise FileNotFoundError( # pragma: no cover + "Unable to find model %r." % model) + if profiling not in (None, '', 'name', 'type'): + raise ValueError( # pragma: no cover + "Unexpected value for profiling: %r." % profiling) + size = int(size) + number = int(number) + repeat = int(repeat) + if max_time in (None, 0, ""): + max_time = None + else: + max_time = float(max_time) + if max_time <= 0: + max_time = None + + if law != "normal": + raise ValueError( + "Only law='normal' is supported, not %r." % law) + + if profiling in ('name', 'type') and profile_output in (None, ''): + raise ValueError( # pragma: no cover + 'profiling is enabled but profile_output is wrong (%r).' + '' % profile_output) + + res = _latency( + model, law=law, size=size, number=number, repeat=repeat, + max_time=max_time, runtime=runtime, device=device, + profiling=profiling) + + if profiling not in (None, ''): + res, gr = res + ext = os.path.splitext(profile_output)[-1] + gr = gr.reset_index(drop=False) + if ext == '.csv': + gr.to_csv(profile_output, index=False) + elif ext == '.xlsx': + gr.to_excel(profile_output, index=False) + else: + raise ValueError( # pragma: no cover + "Unexpected extension for profile_output=%r." + "" % profile_output) + + if fmt == 'csv': + st = StringIO() + df = DataFrame([res]) + df.to_csv(st, index=False) + return st.getvalue() + if fmt in (None, ''): + return res + raise ValueError( # pragma: no cover + "Unexpected value for fmt: %r." % fmt)