diff --git a/_unittests/ut_torch_models/test_validate_whole_models1.py b/_unittests/ut_torch_models/test_validate_whole_models1.py index 6a8d48ff..06c6fd88 100644 --- a/_unittests/ut_torch_models/test_validate_whole_models1.py +++ b/_unittests/ut_torch_models/test_validate_whole_models1.py @@ -122,6 +122,7 @@ def test_g_validate_model_onnx_dynamo_os_ort(self): @hide_stdout() @ignore_warnings(FutureWarning) @requires_experimental() + @requires_transformers("4.52") # empty_cache None does not work def test_i_validate_model_custom(self): mid = "arnir0/Tiny-LLM" summary, data = validate_model( @@ -150,6 +151,7 @@ def test_i_validate_model_custom(self): @requires_torch("2.7") @hide_stdout() @ignore_warnings(FutureWarning) + @requires_transformers("4.52") # empty_cache None does not work @requires_experimental() def test_j_validate_model_custom_torch(self): mid = "arnir0/Tiny-LLM" diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py index fa0e4168..670826af 100644 --- a/onnx_diagnostic/helpers/helper.py +++ b/onnx_diagnostic/helpers/helper.py @@ -1016,6 +1016,8 @@ def max_diff( You may use :func:`string_diff` to display the discrepancies in one string. """ + if verbose >= 10: + print(f"[max_diff] {type(expected)} ? {type(got)}") if expected is None and got is None: return dict(abs=0, rel=0, sum=0, n=0, dnan=0) @@ -1061,8 +1063,8 @@ def max_diff( if expected.__class__.__name__ == "CausalLMOutputWithPast": if verbose >= 6: print( - f"[max_diff] CausalLMOutputWithPast: {string_type(expected)} " - f"? {string_type(got)}" + f"[max_diff] CausalLMOutputWithPast: {string_type(expected, with_shape=True)} " + f"? {string_type(got, with_shape=True)}" ) if got.__class__.__name__ == "CausalLMOutputWithPast": return max_diff( diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py index abf816f9..5eb1eda5 100644 --- a/onnx_diagnostic/torch_models/validate.py +++ b/onnx_diagnostic/torch_models/validate.py @@ -1,11 +1,11 @@ -import gc import datetime +import gc import inspect import os import pprint import sys -from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union import time +from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union import numpy as np import onnx import torch @@ -273,8 +273,8 @@ def _quiet_or_not_quiet( summary[f"time_{suffix}_latency_std"] = a.std() summary[f"time_{suffix}_latency_min"] = a.min() summary[f"time_{suffix}_latency_max"] = a.max() - summary[f"time_{suffix}_latency_098"] = a[-i2] - summary[f"time_{suffix}_latency_095"] = a[-i5] + summary[f"time_{suffix}_latency_098"] = a[-(max(i2, 1))] + summary[f"time_{suffix}_latency_095"] = a[-max(i5, 1)] summary[f"time_{suffix}_latency_005"] = a[i5] summary[f"time_{suffix}_latency_002"] = a[i2] summary[f"time_{suffix}_n"] = len(a) @@ -323,128 +323,33 @@ def make_patch_kwargs( return patch_kwargs -def validate_model( - model_id: str, - task: Optional[str] = None, - do_run: bool = False, - exporter: Optional[str] = None, - do_same: bool = False, - verbose: int = 0, - dtype: Optional[Union[str, torch.dtype]] = None, - device: Optional[Union[str, torch.device]] = None, - same_as_pretrained: bool = False, - use_pretrained: bool = False, - optimization: Optional[str] = None, - quiet: bool = False, - patch: Union[bool, str, Dict[str, bool]] = False, - rewrite: bool = False, - stop_if_static: int = 1, - dump_folder: Optional[str] = None, - drop_inputs: Optional[List[str]] = None, - ortfusiontype: Optional[str] = None, - input_options: Optional[Dict[str, Any]] = None, - model_options: Optional[Dict[str, Any]] = None, - exporter_options: Optional[Dict[str, Any]] = None, - subfolder: Optional[str] = None, - opset: Optional[int] = None, - runtime: str = "onnxruntime", - repeat: int = 1, - warmup: int = 0, - inputs2: int = 1, - output_names: Optional[List[str]] = None, - ort_logs: bool = False, - quiet_input_sets: Optional[Set[str]] = None, -) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]: - """ - Validates a model. - The function can also be called through the command line - :ref:`l-cmd-validate`. - - :param model_id: model id to validate - :param task: task used to generate the necessary inputs, - can be left empty to use the default task for this model - if it can be determined - :param do_run: checks the model works with the defined inputs - :param exporter: exporter the model using this exporter, - available list: ``export-strict``, ``export-nostrict``, ... - see below - :param do_same: checks the discrepancies of the exported model - :param verbose: verbosity level - :param dtype: uses this dtype to check the model - :param device: do the verification on this device - :param same_as_pretrained: use a model equivalent to the trained, - this is not always possible - :param use_pretrained: use the trained model, not the untrained one - :param optimization: optimization to apply to the exported model, - depend on the the exporter - :param quiet: if quiet, catches exception if any issue - :param patch: applies patches (``patch_transformers=True, path_diffusers=True``) - if True before exporting - see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`, - a string can be used to specify only one of them - :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting, - see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches` - :param stop_if_static: stops if a dynamic dimension becomes static, - see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches` - :param dump_folder: dumps everything in a subfolder of this one - :param drop_inputs: drops this list of inputs (given their names) - :param ortfusiontype: runs ort fusion, the parameters defines the fusion type, - it accepts multiple values separated by ``|``, - see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion` - :param input_options: additional options to define the dummy inputs - used to export - :param model_options: additional options when creating the model such as - ``num_hidden_layers`` or ``attn_implementation`` - :param exporter_options: additional options when exporting the model such as - ``report=True`` or ``verify=True`` - :param subfolder: version or subfolders to uses when retrieving a model id - :param opset: onnx opset to use for the conversion - :param runtime: onnx runtime to use to check about discrepancies, - possible values ``onnxruntime``, ``torch``, ``orteval``, - ``orteval10``, ``ref`` only if `do_run` is true - :param repeat: number of time to measure the model - :param warmup: warmup the model first - :param inputs2: checks that other sets of inputs are running as well, - this ensures that the model does support dynamism, the value is used - as an increment to the first set of values (added to dimensions), - or an empty cache for example - :param output_names: output names the onnx exporter should use - :param ort_logs: increases onnxruntime verbosity when creating the session - :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set - even if quiet is False - :return: two dictionaries, one with some metrics, - another one with whatever the function produces - - The following environment variables can be used to print out some - information: - - * ``PRINT_CONFIG``: prints the model configuration - - The following exporters are available: - - * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False) - * ``onnx-dynamo``: run :func:`torch.onnx.export` (...), - models can be optimized with ``optimization`` in ``("ir", "os_ort")`` - * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model - * ``custom``: custom exporter (see :epkg:`experimental-experiment`), - models can be optimized with ``optimization`` in - ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")`` - - The default runtime, :epkg:`onnxruntime` is used to validate a model and check the - exported model returns the same outputs as the original one, otherwise, - :class:`onnx_diagnostic.reference.TorchOnnxEvaluator` - if ``runtime == 'torch'`` or - :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator` - if ``runtime == 'orteval'`` or - :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator` - if ``runtime == 'ref'``, - ``orteval10`` increases the verbosity. - - .. versionchanged:: 0.7.13 - *inputs2* not only means a second set of inputs but many - such as ``input_empty_cache`` - which refers to a set of inputs using an empty cache. - """ +def _prepare_validation( + model_id, + subfolder, + same_as_pretrained, + use_pretrained, + patch, + rewrite, + do_run, + dtype, + device, + optimization, + quiet, + drop_inputs, + ortfusiontype, + stop_if_static, + exporter, + runtime, + inputs2, + input_options, + model_options, + exporter_options, + opset, + task, + verbose, + output_names, + dump_folder, +): main_validation_begin = time.perf_counter() model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id( model_id, @@ -530,6 +435,32 @@ def validate_model( summary["model_id"] = model_id summary["model_subfolder"] = subfolder or "" + return ( + summary, + model_id, + subfolder, + same_as_pretrained, + use_pretrained, + dump_folder, + folder_name, + patch_kwargs, + ) + + +def _get_untrained_model_with_inputs( + summary, + model_id, + verbose, + task, + use_pretrained, + same_as_pretrained, + input_options, + model_options, + subfolder, + inputs2, + quiet, + dump_folder, +): iop = input_options or {} mop = model_options or {} data = _quiet_or_not_quiet( @@ -554,8 +485,6 @@ def validate_model( ), ) - second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"] - if dump_folder: with open(os.path.join(dump_folder, "model_config.txt"), "w") as f: f.write(f"model_id: {model_id}\n------\n") @@ -572,25 +501,45 @@ def validate_model( f.write(f"model_id: {model_id}\n------\n") f.write(pprint.pformat(dump_info)) - if exporter == "modelbuilder": - # Models used with ModelBuilder do not like batch size > 1. - # Let's change that. - for k in ["inputs", "inputs2"]: - if k not in data: - continue - if verbose: - print(f"[validate_model] set batch=1 for data[{k!r}]") - print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}") - cpl = CoupleInputsDynamicShapes( - tuple(), data[k], dynamic_shapes=data["dynamic_shapes"] - ) - with register_additional_serialization_functions(patch_transformers=True): # type: ignore[arg-type] - data[k] = cpl.change_dynamic_dimensions( - desired_values=dict(batch=1), only_desired=True - ) - if verbose: - print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}") + return data, iop, mop + +def _update_data_for_modelbuilder(data, verbose): + # Models used with ModelBuilder do not like batch size > 1. + # Let's change that. + for k in ["inputs", "inputs2"]: + if k not in data: + continue + if verbose: + print(f"[validate_model] set batch=1 for data[{k!r}]") + print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}") + cpl = CoupleInputsDynamicShapes( + tuple(), data[k], dynamic_shapes=data["dynamic_shapes"] + ) + with register_additional_serialization_functions(patch_transformers=True): # type: ignore[arg-type] + data[k] = cpl.change_dynamic_dimensions( + desired_values=dict(batch=1), only_desired=True + ) + if verbose: + print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}") + + +def _update_inputs_outputs( + data, + summary, + exporter, + iop, + mop, + dump_folder, + opset, + device, + dtype, + rewrite, + drop_inputs, + verbose, + second_input_keys, + model_id, +): # modelbuilder needs different treatments sometimes, so # we mark it for later usage. # for example, it has different past_kv ordering than @@ -677,7 +626,7 @@ def validate_model( for k in ["task", "size", "n_weights"]: summary[f"model_{k.replace('_','')}"] = data[k] summary["second_input_keys"] = ",".join(second_input_keys) - summary["model_inputs_options"] = str(input_options or "") + summary["model_inputs_options"] = str(iop or "") summary["model_inputs"] = string_type(data["inputs"], with_shape=True) summary["model_shapes"] = string_type(data["dynamic_shapes"]) summary["model_class"] = data["model"].__class__.__name__ @@ -694,6 +643,8 @@ def validate_model( ).replace(" ", "") summary["model_id"] = model_id + +def _verbose_validate(data, second_input_keys, verbose): if verbose: print("[validate_model] --") print(f"[validate_model] task={data['task']}") @@ -706,28 +657,21 @@ def validate_model( print(f"[validate_model] second_input_keys={second_input_keys}") print("[validate_model] --") - if do_run: - validation_begin = time.perf_counter() - - _validate_do_run_model( - data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet - ) - if second_input_keys: - for k in second_input_keys: - _validate_do_run_model( - data, - summary, - k, - f"run2{k[6:]}", - f"run_expected2{k[6:]}", - verbose, - 1, - 0, - quiet, - ) - - summary["time_total_validation_torch"] = time.perf_counter() - validation_begin +def _call_exporter( + data, + summary, + exporter, + patch_kwargs, + stop_if_static, + verbose, + dump_folder, + quiet, + optimization, + do_run, + output_names, + exporter_options, +): if exporter: expop = exporter_options or {} if verbose: @@ -786,6 +730,8 @@ def validate_model( summary.update(summary_export) summary["time_total_exporter"] = time.perf_counter() - exporter_begin + +def _dump_onnx_model(data, summary, dump_folder, verbose, exporter, folder_name): dump_stats = None if dump_folder: if "exported_program" in data: @@ -850,26 +796,392 @@ def validate_model( ): if verbose: print("[validate_model] -- done (final)") - if dump_stats: - with open(dump_stats, "w") as f: - for k, v in sorted(summary.items()): - f.write(f":{k}:{v};\n") + return False, dump_stats + return True, dump_stats + + +def validate_model( + model_id: str, + task: Optional[str] = None, + do_run: bool = False, + exporter: Optional[str] = None, + do_same: bool = False, + verbose: int = 0, + dtype: Optional[Union[str, torch.dtype]] = None, + device: Optional[Union[str, torch.device]] = None, + same_as_pretrained: bool = False, + use_pretrained: bool = False, + optimization: Optional[str] = None, + quiet: bool = False, + patch: Union[bool, str, Dict[str, bool]] = False, + rewrite: bool = False, + stop_if_static: int = 1, + dump_folder: Optional[str] = None, + drop_inputs: Optional[List[str]] = None, + ortfusiontype: Optional[str] = None, + input_options: Optional[Dict[str, Any]] = None, + model_options: Optional[Dict[str, Any]] = None, + exporter_options: Optional[Dict[str, Any]] = None, + subfolder: Optional[str] = None, + opset: Optional[int] = None, + runtime: str = "onnxruntime", + repeat: int = 1, + warmup: int = 0, + inputs2: int = 1, + output_names: Optional[List[str]] = None, + ort_logs: bool = False, + quiet_input_sets: Optional[Set[str]] = None, +) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]: + """ + Validates a model. + The function can also be called through the command line + :ref:`l-cmd-validate`. + + :param model_id: model id to validate + :param task: task used to generate the necessary inputs, + can be left empty to use the default task for this model + if it can be determined + :param do_run: checks the model works with the defined inputs + :param exporter: exporter the model using this exporter, + available list: ``export-strict``, ``export-nostrict``, ... + see below + :param do_same: checks the discrepancies of the exported model + :param verbose: verbosity level + :param dtype: uses this dtype to check the model + :param device: do the verification on this device + :param same_as_pretrained: use a model equivalent to the trained, + this is not always possible + :param use_pretrained: use the trained model, not the untrained one + :param optimization: optimization to apply to the exported model, + depend on the the exporter + :param quiet: if quiet, catches exception if any issue + :param patch: applies patches (``patch_transformers=True, path_diffusers=True``) + if True before exporting + see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`, + a string can be used to specify only one of them + :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting, + see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches` + :param stop_if_static: stops if a dynamic dimension becomes static, + see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches` + :param dump_folder: dumps everything in a subfolder of this one + :param drop_inputs: drops this list of inputs (given their names) + :param ortfusiontype: runs ort fusion, the parameters defines the fusion type, + it accepts multiple values separated by ``|``, + see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion` + :param input_options: additional options to define the dummy inputs + used to export + :param model_options: additional options when creating the model such as + ``num_hidden_layers`` or ``attn_implementation`` + :param exporter_options: additional options when exporting the model such as + ``report=True`` or ``verify=True`` + :param subfolder: version or subfolders to uses when retrieving a model id + :param opset: onnx opset to use for the conversion + :param runtime: onnx runtime to use to check about discrepancies, + possible values ``onnxruntime``, ``torch``, ``orteval``, + ``orteval10``, ``ref`` only if `do_run` is true + :param repeat: number of time to measure the model + :param warmup: warmup the model first + :param inputs2: checks that other sets of inputs are running as well, + this ensures that the model does support dynamism, the value is used + as an increment to the first set of values (added to dimensions), + or an empty cache for example + :param output_names: output names the onnx exporter should use + :param ort_logs: increases onnxruntime verbosity when creating the session + :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set + even if quiet is False + :return: two dictionaries, one with some metrics, + another one with whatever the function produces + + The following environment variables can be used to print out some + information: + + * ``PRINT_CONFIG``: prints the model configuration + + The following exporters are available: + + * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False) + * ``onnx-dynamo``: run :func:`torch.onnx.export` (...), + models can be optimized with ``optimization`` in ``("ir", "os_ort")`` + * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model + * ``custom``: custom exporter (see :epkg:`experimental-experiment`), + models can be optimized with ``optimization`` in + ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")`` + + The default runtime, :epkg:`onnxruntime` is used to validate a model and check the + exported model returns the same outputs as the original one, otherwise, + :class:`onnx_diagnostic.reference.TorchOnnxEvaluator` + if ``runtime == 'torch'`` or + :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator` + if ``runtime == 'orteval'`` or + :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator` + if ``runtime == 'ref'``, + ``orteval10`` increases the verbosity. + + .. versionchanged:: 0.7.13 + *inputs2* not only means a second set of inputs but many + such as ``input_empty_cache`` + which refers to a set of inputs using an empty cache. + """ + main_validation_begin = time.perf_counter() + cont, summary, data, dump_stats, second_input_keys = _validate_model_step1( + model_id=model_id, + do_same=do_same, + do_run=do_run, + patch=patch, + rewrite=rewrite, + dtype=dtype, + device=device, + optimization=optimization, + quiet=quiet, + drop_inputs=drop_inputs, + ortfusiontype=ortfusiontype, + stop_if_static=stop_if_static, + exporter=exporter, + verbose=verbose, + task=task, + runtime=runtime, + inputs2=inputs2, + input_options=input_options, + model_options=model_options, + exporter_options=exporter_options, + opset=opset, + output_names=output_names, + repeat=repeat, + warmup=warmup, + dump_folder=dump_folder, + subfolder=subfolder, + use_pretrained=use_pretrained, + same_as_pretrained=same_as_pretrained, + ) + if dump_folder: + with open(dump_stats, "w") as f: + for k, v in sorted(summary.items()): + f.write(f":{k}:{v};\n") + if not cont: return summary, data + data, summary = _clean_data_remove_model_and_proto(data, summary) + _validate_model_step2( + summary=summary, + data=data, + do_run=do_run, + quiet=quiet, + verbose=verbose, + runtime=runtime, + repeat=repeat, + warmup=warmup, + second_input_keys=second_input_keys, + ort_logs=ort_logs, + quiet_input_sets=quiet_input_sets, + ortfusiontype=ortfusiontype, + model_id=model_id, + ) + + summary["time_total"] = time.perf_counter() - main_validation_begin + + if verbose: + print("[validate_model] -- done (final)") + with open(dump_stats, "w") as f: + for k, v in sorted(summary.items()): + f.write(f":{k}:{v};\n") + return summary, data + + +def _clean_data_remove_model_and_proto(data, summary): + assert isinstance(data, dict) and isinstance(data, dict) + data = _clean_data_remove_model_and_proto_(data) + summary = _clean_data_remove_model_and_proto_(summary) + gc.collect() + return data, summary + + +def _clean_data_remove_model_and_proto_(obj): + if type(obj) is dict: + # do not use isinstance otherwise CausalLMOutputWithPast becomes a dictionary + return {k: _clean_data_remove_model_and_proto_(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_clean_data_remove_model_and_proto_(v) for v in obj] + if isinstance(obj, tuple): + return tuple(_clean_data_remove_model_and_proto_(v) for v in obj) + if isinstance(obj, set): + return {_clean_data_remove_model_and_proto_(v) for v in obj} + if isinstance(obj, (torch.nn.Module, onnx.ModelProto)): + return None + return obj + + +def _validate_model_step1( + model_id, + do_same, + do_run, + patch, + rewrite, + dtype, + device, + optimization, + quiet, + drop_inputs, + ortfusiontype, + stop_if_static, + exporter, + verbose, + task, + runtime, + inputs2, + input_options, + model_options, + exporter_options, + opset, + output_names, + repeat, + warmup, + dump_folder, + subfolder, + use_pretrained, + same_as_pretrained, +): + assert not do_same or do_run, ( + f"Discrepancies cannot be measured if the model is not run, " + f"do_run={do_run}, do_same={do_same}" + ) + ( + summary, + model_id, + subfolder, + same_as_pretrained, + use_pretrained, + dump_folder, + folder_name, + patch_kwargs, + ) = _prepare_validation( + model_id=model_id, + subfolder=subfolder, + same_as_pretrained=same_as_pretrained, + use_pretrained=use_pretrained, + patch=patch, + rewrite=rewrite, + do_run=do_run, + dtype=dtype, + device=device, + optimization=optimization, + quiet=quiet, + drop_inputs=drop_inputs, + ortfusiontype=ortfusiontype, + stop_if_static=stop_if_static, + exporter=exporter, + runtime=runtime, + inputs2=inputs2, + input_options=input_options, + model_options=model_options, + exporter_options=exporter_options, + opset=opset, + task=task, + verbose=verbose, + output_names=output_names, + dump_folder=dump_folder, + ) + + data, iop, mop = _get_untrained_model_with_inputs( + summary=summary, + model_id=model_id, + verbose=verbose, + task=task, + use_pretrained=use_pretrained, + same_as_pretrained=same_as_pretrained, + input_options=input_options, + model_options=model_options, + subfolder=subfolder, + inputs2=inputs2, + quiet=quiet, + dump_folder=dump_folder, + ) + + second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"] + if exporter == "modelbuilder": + _update_data_for_modelbuilder(data, verbose) + + _update_inputs_outputs( + data=data, + summary=summary, + exporter=exporter, + iop=iop, + mop=mop, + dump_folder=dump_folder, + opset=opset, + device=device, + dtype=dtype, + rewrite=rewrite, + drop_inputs=drop_inputs, + verbose=verbose, + second_input_keys=second_input_keys, + model_id=model_id, + ) + + _verbose_validate(data, second_input_keys, verbose) if do_run: - # Let's move the model to CPU to make sure it frees GPU memory. - if verbose: - # It does not really work for the time being and the model - # gets loaded twice, one by torch, one by onnxruntime - print("[validation_model] -- delete the model") - for key in ["model", "onnx_program", "config"]: - if key in data: - del data[key] - if device is not None and "cuda" in str(device).lower(): - torch.cuda.empty_cache() - gc.collect() - print("[validation_model] -- done") + validation_begin = time.perf_counter() + + _validate_do_run_model( + data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet + ) + if second_input_keys: + for k in second_input_keys: + _validate_do_run_model( + data, + summary, + k, + f"run2{k[6:]}", + f"run_expected2{k[6:]}", + verbose, + 1, + 0, + quiet, + ) + + summary["time_total_validation_torch"] = time.perf_counter() - validation_begin + _call_exporter( + data=data, + summary=summary, + exporter=exporter, + patch_kwargs=patch_kwargs, + stop_if_static=stop_if_static, + verbose=verbose, + dump_folder=dump_folder, + quiet=quiet, + optimization=optimization, + do_run=do_run, + output_names=output_names, + exporter_options=exporter_options, + ) + + cont, dump_stats = _dump_onnx_model( + data=data, + summary=summary, + dump_folder=dump_folder, + verbose=verbose, + exporter=exporter, + folder_name=folder_name, + ) + return cont, summary, data, dump_stats, second_input_keys + + +def _validate_model_step2( + summary, + data, + do_run, + quiet, + verbose, + runtime, + repeat, + warmup, + second_input_keys, + ort_logs, + quiet_input_sets, + ortfusiontype, + model_id, +): + if do_run: validation_begin = time.perf_counter() summary_valid, data = validate_onnx_model( data=data, @@ -948,16 +1260,6 @@ def validate_model( summary.update(summary_valid) _compute_final_statistics(summary) - summary["time_total"] = time.perf_counter() - main_validation_begin - - if verbose: - print("[validate_model] -- done (final)") - if dump_stats: - # Dumps again the statistics. - with open(dump_stats, "w") as f: - for k, v in sorted(summary.items()): - f.write(f":{k}:{v};\n") - return summary, data def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]: @@ -1041,7 +1343,7 @@ def _validate_do_run_model( summary[expected_tag] = string_type(expected, with_shape=True) if verbose: - print(f"[validate_model] done ([{tag}])") + print(f"[validate_model] done ([{tag}]) - {string_type(expected, with_shape=True)}") data[expected_tag] = expected assert hash_inputs == string_type(data[key], with_shape=True), ( f"The model execution did modified the inputs:\n" @@ -1051,7 +1353,6 @@ def _validate_do_run_model( def _validate_do_run_exported_program(data, summary, verbose, quiet): - # We run a second time the model to check the patch did not # introduce any discrepancies if verbose: @@ -1076,7 +1377,13 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet): if "ERR_run_patched" in summary: return summary, data - disc = max_diff(data["run_expected"], expected) + verbose_diff = int(os.environ.get("MAXDIFF", "0")) + if verbose_diff >= 10: + print("[_validate_do_run_exported_program] with inputs_export") + disc = max_diff(data["run_expected"], expected, verbose=verbose_diff) + assert not verbose_diff or ( + not np.isnan(disc["abs"]) and not np.isinf(disc["abs"]) + ), f"something went wrong disc={disc}" for k, v in disc.items(): summary[f"disc_patched_{k}"] = str(v) if verbose: @@ -1321,7 +1628,14 @@ def call_torch_export_export( if "ERR_export_export" in summary: return summary, data - disc = max_diff(data["run_expected"], expected) + verbose_diff = int(os.environ.get("MAXDIFF", "0")) + if verbose_diff >= 10: + print("[call_torch_export_export] with inputs_export") + disc = max_diff(data["run_expected"], expected, verbose=verbose_diff) + assert not verbose_diff or ( + not np.isnan(disc["abs"]) and not np.isinf(disc["abs"]) + ), f"something went wrong disc={disc}" + for k, v in disc.items(): summary[f"disc_exported_{k}"] = str(v) if verbose: @@ -1541,7 +1855,16 @@ def _mk(key, flavour=flavour): print(f"[validate_onnx_model] got={string_type(got, with_shape=True)}") # compute discrepancies - disc = max_diff(data[k_expected], got, flatten=True) + verbose_diff = int(os.environ.get("MAXDIFF", "0")) + if verbose_diff >= 10: + print( + f"[validate_onnx_model] k_input={k_input!r}, " + f"k_expected={k_expected!r}, suffix={suffix!r}" + ) + disc = max_diff(data[k_expected], got, flatten=True, verbose=verbose_diff) + assert not verbose_diff or ( + not np.isnan(disc["abs"]) and not np.isinf(disc["abs"]) + ), f"something went wrong disc={disc}" if verbose: print(f"[validate_onnx_model] discrepancies={string_diff(disc)}") for k, v in disc.items():