diff --git a/_unittests/ut_torch_models/test_validate_whole_models1.py b/_unittests/ut_torch_models/test_validate_whole_models1.py
index 6a8d48ff..06c6fd88 100644
--- a/_unittests/ut_torch_models/test_validate_whole_models1.py
+++ b/_unittests/ut_torch_models/test_validate_whole_models1.py
@@ -122,6 +122,7 @@ def test_g_validate_model_onnx_dynamo_os_ort(self):
     @hide_stdout()
     @ignore_warnings(FutureWarning)
     @requires_experimental()
+    @requires_transformers("4.52")  # empty_cache None does not work
     def test_i_validate_model_custom(self):
         mid = "arnir0/Tiny-LLM"
         summary, data = validate_model(
@@ -150,6 +151,7 @@ def test_i_validate_model_custom(self):
     @requires_torch("2.7")
     @hide_stdout()
     @ignore_warnings(FutureWarning)
+    @requires_transformers("4.52")  # empty_cache None does not work
     @requires_experimental()
     def test_j_validate_model_custom_torch(self):
         mid = "arnir0/Tiny-LLM"
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
index fa0e4168..670826af 100644
--- a/onnx_diagnostic/helpers/helper.py
+++ b/onnx_diagnostic/helpers/helper.py
@@ -1016,6 +1016,8 @@ def max_diff(
 
     You may use :func:`string_diff` to display the discrepancies in one string.
     """
+    if verbose >= 10:
+        print(f"[max_diff] {type(expected)} ? {type(got)}")
     if expected is None and got is None:
         return dict(abs=0, rel=0, sum=0, n=0, dnan=0)
 
@@ -1061,8 +1063,8 @@ def max_diff(
     if expected.__class__.__name__ == "CausalLMOutputWithPast":
         if verbose >= 6:
             print(
-                f"[max_diff] CausalLMOutputWithPast: {string_type(expected)} "
-                f"? {string_type(got)}"
+                f"[max_diff] CausalLMOutputWithPast: {string_type(expected, with_shape=True)} "
+                f"? {string_type(got, with_shape=True)}"
             )
         if got.__class__.__name__ == "CausalLMOutputWithPast":
             return max_diff(
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
index abf816f9..5eb1eda5 100644
--- a/onnx_diagnostic/torch_models/validate.py
+++ b/onnx_diagnostic/torch_models/validate.py
@@ -1,11 +1,11 @@
-import gc
 import datetime
+import gc
 import inspect
 import os
 import pprint
 import sys
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
 import time
+from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 import onnx
 import torch
@@ -273,8 +273,8 @@ def _quiet_or_not_quiet(
         summary[f"time_{suffix}_latency_std"] = a.std()
         summary[f"time_{suffix}_latency_min"] = a.min()
         summary[f"time_{suffix}_latency_max"] = a.max()
-        summary[f"time_{suffix}_latency_098"] = a[-i2]
-        summary[f"time_{suffix}_latency_095"] = a[-i5]
+        summary[f"time_{suffix}_latency_098"] = a[-(max(i2, 1))]
+        summary[f"time_{suffix}_latency_095"] = a[-max(i5, 1)]
         summary[f"time_{suffix}_latency_005"] = a[i5]
         summary[f"time_{suffix}_latency_002"] = a[i2]
         summary[f"time_{suffix}_n"] = len(a)
@@ -323,128 +323,33 @@ def make_patch_kwargs(
     return patch_kwargs
 
 
-def validate_model(
-    model_id: str,
-    task: Optional[str] = None,
-    do_run: bool = False,
-    exporter: Optional[str] = None,
-    do_same: bool = False,
-    verbose: int = 0,
-    dtype: Optional[Union[str, torch.dtype]] = None,
-    device: Optional[Union[str, torch.device]] = None,
-    same_as_pretrained: bool = False,
-    use_pretrained: bool = False,
-    optimization: Optional[str] = None,
-    quiet: bool = False,
-    patch: Union[bool, str, Dict[str, bool]] = False,
-    rewrite: bool = False,
-    stop_if_static: int = 1,
-    dump_folder: Optional[str] = None,
-    drop_inputs: Optional[List[str]] = None,
-    ortfusiontype: Optional[str] = None,
-    input_options: Optional[Dict[str, Any]] = None,
-    model_options: Optional[Dict[str, Any]] = None,
-    exporter_options: Optional[Dict[str, Any]] = None,
-    subfolder: Optional[str] = None,
-    opset: Optional[int] = None,
-    runtime: str = "onnxruntime",
-    repeat: int = 1,
-    warmup: int = 0,
-    inputs2: int = 1,
-    output_names: Optional[List[str]] = None,
-    ort_logs: bool = False,
-    quiet_input_sets: Optional[Set[str]] = None,
-) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
-    """
-    Validates a model.
-    The function can also be called through the command line
-    :ref:`l-cmd-validate`.
-
-    :param model_id: model id to validate
-    :param task: task used to generate the necessary inputs,
-        can be left empty to use the default task for this model
-        if it can be determined
-    :param do_run: checks the model works with the defined inputs
-    :param exporter: exporter the model using this exporter,
-        available list: ``export-strict``, ``export-nostrict``, ...
-        see below
-    :param do_same: checks the discrepancies of the exported model
-    :param verbose: verbosity level
-    :param dtype: uses this dtype to check the model
-    :param device: do the verification on this device
-    :param same_as_pretrained: use a model equivalent to the trained,
-        this is not always possible
-    :param use_pretrained: use the trained model, not the untrained one
-    :param optimization: optimization to apply to the exported model,
-        depend on the the exporter
-    :param quiet: if quiet, catches exception if any issue
-    :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
-        if True before exporting
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`,
-        a string can be used to specify only one of them
-    :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting,
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
-    :param stop_if_static: stops if a dynamic dimension becomes static,
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
-    :param dump_folder: dumps everything in a subfolder of this one
-    :param drop_inputs: drops this list of inputs (given their names)
-    :param ortfusiontype: runs ort fusion, the parameters defines the fusion type,
-        it accepts multiple values separated by ``|``,
-        see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`
-    :param input_options: additional options to define the dummy inputs
-        used to export
-    :param model_options: additional options when creating the model such as
-        ``num_hidden_layers`` or ``attn_implementation``
-    :param exporter_options: additional options when exporting the model such as
-        ``report=True`` or ``verify=True``
-    :param subfolder: version or subfolders to uses when retrieving a model id
-    :param opset: onnx opset to use for the conversion
-    :param runtime: onnx runtime to use to check about discrepancies,
-        possible values ``onnxruntime``, ``torch``, ``orteval``,
-        ``orteval10``, ``ref`` only if `do_run` is true
-    :param repeat: number of time to measure the model
-    :param warmup: warmup the model first
-    :param inputs2: checks that other sets of inputs are running as well,
-        this ensures that the model does support dynamism, the value is used
-        as an increment to the first set of values (added to dimensions),
-        or an empty cache for example
-    :param output_names: output names the onnx exporter should use
-    :param ort_logs: increases onnxruntime verbosity when creating the session
-    :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set
-        even if quiet is False
-    :return: two dictionaries, one with some metrics,
-        another one with whatever the function produces
-
-    The following environment variables can be used to print out some
-    information:
-
-    * ``PRINT_CONFIG``: prints the model configuration
-
-    The following exporters are available:
-
-    * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False)
-    * ``onnx-dynamo``: run :func:`torch.onnx.export` (...),
-      models can be optimized with ``optimization`` in ``("ir", "os_ort")``
-    * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model
-    * ``custom``: custom exporter (see :epkg:`experimental-experiment`),
-      models can be optimized with ``optimization`` in
-      ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")``
-
-    The default runtime, :epkg:`onnxruntime` is used to validate a model and check the
-    exported model returns the same outputs as the original one, otherwise,
-    :class:`onnx_diagnostic.reference.TorchOnnxEvaluator`
-    if ``runtime == 'torch'`` or
-    :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator`
-    if ``runtime == 'orteval'`` or
-    :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator`
-    if ``runtime == 'ref'``,
-    ``orteval10`` increases the verbosity.
-
-    .. versionchanged:: 0.7.13
-        *inputs2* not only means a second set of inputs but many
-        such as ``input_empty_cache``
-        which refers to a set of inputs using an empty cache.
-    """
+def _prepare_validation(
+    model_id,
+    subfolder,
+    same_as_pretrained,
+    use_pretrained,
+    patch,
+    rewrite,
+    do_run,
+    dtype,
+    device,
+    optimization,
+    quiet,
+    drop_inputs,
+    ortfusiontype,
+    stop_if_static,
+    exporter,
+    runtime,
+    inputs2,
+    input_options,
+    model_options,
+    exporter_options,
+    opset,
+    task,
+    verbose,
+    output_names,
+    dump_folder,
+):
     main_validation_begin = time.perf_counter()
     model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
         model_id,
@@ -530,6 +435,32 @@ def validate_model(
         summary["model_id"] = model_id
         summary["model_subfolder"] = subfolder or ""
 
+    return (
+        summary,
+        model_id,
+        subfolder,
+        same_as_pretrained,
+        use_pretrained,
+        dump_folder,
+        folder_name,
+        patch_kwargs,
+    )
+
+
+def _get_untrained_model_with_inputs(
+    summary,
+    model_id,
+    verbose,
+    task,
+    use_pretrained,
+    same_as_pretrained,
+    input_options,
+    model_options,
+    subfolder,
+    inputs2,
+    quiet,
+    dump_folder,
+):
     iop = input_options or {}
     mop = model_options or {}
     data = _quiet_or_not_quiet(
@@ -554,8 +485,6 @@ def validate_model(
         ),
     )
 
-    second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]
-
     if dump_folder:
         with open(os.path.join(dump_folder, "model_config.txt"), "w") as f:
             f.write(f"model_id: {model_id}\n------\n")
@@ -572,25 +501,45 @@ def validate_model(
                 f.write(f"model_id: {model_id}\n------\n")
                 f.write(pprint.pformat(dump_info))
 
-    if exporter == "modelbuilder":
-        # Models used with ModelBuilder do not like batch size > 1.
-        # Let's change that.
-        for k in ["inputs", "inputs2"]:
-            if k not in data:
-                continue
-            if verbose:
-                print(f"[validate_model] set batch=1 for data[{k!r}]")
-                print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}")
-            cpl = CoupleInputsDynamicShapes(
-                tuple(), data[k], dynamic_shapes=data["dynamic_shapes"]
-            )
-            with register_additional_serialization_functions(patch_transformers=True):  # type: ignore[arg-type]
-                data[k] = cpl.change_dynamic_dimensions(
-                    desired_values=dict(batch=1), only_desired=True
-                )
-            if verbose:
-                print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+    return data, iop, mop
+
 
+def _update_data_for_modelbuilder(data, verbose):
+    # Models used with ModelBuilder do not like batch size > 1.
+    # Let's change that.
+    for k in ["inputs", "inputs2"]:
+        if k not in data:
+            continue
+        if verbose:
+            print(f"[validate_model] set batch=1 for data[{k!r}]")
+            print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}")
+        cpl = CoupleInputsDynamicShapes(
+            tuple(), data[k], dynamic_shapes=data["dynamic_shapes"]
+        )
+        with register_additional_serialization_functions(patch_transformers=True):  # type: ignore[arg-type]
+            data[k] = cpl.change_dynamic_dimensions(
+                desired_values=dict(batch=1), only_desired=True
+            )
+        if verbose:
+            print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+
+
+def _update_inputs_outputs(
+    data,
+    summary,
+    exporter,
+    iop,
+    mop,
+    dump_folder,
+    opset,
+    device,
+    dtype,
+    rewrite,
+    drop_inputs,
+    verbose,
+    second_input_keys,
+    model_id,
+):
     # modelbuilder needs different treatments sometimes, so
     # we mark it for later usage.
     # for example, it has different past_kv ordering than
@@ -677,7 +626,7 @@ def validate_model(
     for k in ["task", "size", "n_weights"]:
         summary[f"model_{k.replace('_','')}"] = data[k]
     summary["second_input_keys"] = ",".join(second_input_keys)
-    summary["model_inputs_options"] = str(input_options or "")
+    summary["model_inputs_options"] = str(iop or "")
     summary["model_inputs"] = string_type(data["inputs"], with_shape=True)
     summary["model_shapes"] = string_type(data["dynamic_shapes"])
     summary["model_class"] = data["model"].__class__.__name__
@@ -694,6 +643,8 @@ def validate_model(
     ).replace(" ", "")
     summary["model_id"] = model_id
 
+
+def _verbose_validate(data, second_input_keys, verbose):
     if verbose:
         print("[validate_model] --")
         print(f"[validate_model] task={data['task']}")
@@ -706,28 +657,21 @@ def validate_model(
         print(f"[validate_model] second_input_keys={second_input_keys}")
         print("[validate_model] --")
 
-    if do_run:
-        validation_begin = time.perf_counter()
-
-        _validate_do_run_model(
-            data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet
-        )
-        if second_input_keys:
-            for k in second_input_keys:
-                _validate_do_run_model(
-                    data,
-                    summary,
-                    k,
-                    f"run2{k[6:]}",
-                    f"run_expected2{k[6:]}",
-                    verbose,
-                    1,
-                    0,
-                    quiet,
-                )
-
-        summary["time_total_validation_torch"] = time.perf_counter() - validation_begin
 
+def _call_exporter(
+    data,
+    summary,
+    exporter,
+    patch_kwargs,
+    stop_if_static,
+    verbose,
+    dump_folder,
+    quiet,
+    optimization,
+    do_run,
+    output_names,
+    exporter_options,
+):
     if exporter:
         expop = exporter_options or {}
         if verbose:
@@ -786,6 +730,8 @@ def validate_model(
         summary.update(summary_export)
         summary["time_total_exporter"] = time.perf_counter() - exporter_begin
 
+
+def _dump_onnx_model(data, summary, dump_folder, verbose, exporter, folder_name):
     dump_stats = None
     if dump_folder:
         if "exported_program" in data:
@@ -850,26 +796,392 @@ def validate_model(
     ):
         if verbose:
             print("[validate_model] -- done (final)")
-        if dump_stats:
-            with open(dump_stats, "w") as f:
-                for k, v in sorted(summary.items()):
-                    f.write(f":{k}:{v};\n")
+        return False, dump_stats
+    return True, dump_stats
+
+
+def validate_model(
+    model_id: str,
+    task: Optional[str] = None,
+    do_run: bool = False,
+    exporter: Optional[str] = None,
+    do_same: bool = False,
+    verbose: int = 0,
+    dtype: Optional[Union[str, torch.dtype]] = None,
+    device: Optional[Union[str, torch.device]] = None,
+    same_as_pretrained: bool = False,
+    use_pretrained: bool = False,
+    optimization: Optional[str] = None,
+    quiet: bool = False,
+    patch: Union[bool, str, Dict[str, bool]] = False,
+    rewrite: bool = False,
+    stop_if_static: int = 1,
+    dump_folder: Optional[str] = None,
+    drop_inputs: Optional[List[str]] = None,
+    ortfusiontype: Optional[str] = None,
+    input_options: Optional[Dict[str, Any]] = None,
+    model_options: Optional[Dict[str, Any]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
+    subfolder: Optional[str] = None,
+    opset: Optional[int] = None,
+    runtime: str = "onnxruntime",
+    repeat: int = 1,
+    warmup: int = 0,
+    inputs2: int = 1,
+    output_names: Optional[List[str]] = None,
+    ort_logs: bool = False,
+    quiet_input_sets: Optional[Set[str]] = None,
+) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
+    """
+    Validates a model.
+    The function can also be called through the command line
+    :ref:`l-cmd-validate`.
+
+    :param model_id: model id to validate
+    :param task: task used to generate the necessary inputs,
+        can be left empty to use the default task for this model
+        if it can be determined
+    :param do_run: checks the model works with the defined inputs
+    :param exporter: exporter the model using this exporter,
+        available list: ``export-strict``, ``export-nostrict``, ...
+        see below
+    :param do_same: checks the discrepancies of the exported model
+    :param verbose: verbosity level
+    :param dtype: uses this dtype to check the model
+    :param device: do the verification on this device
+    :param same_as_pretrained: use a model equivalent to the trained,
+        this is not always possible
+    :param use_pretrained: use the trained model, not the untrained one
+    :param optimization: optimization to apply to the exported model,
+        depend on the the exporter
+    :param quiet: if quiet, catches exception if any issue
+    :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
+        if True before exporting
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`,
+        a string can be used to specify only one of them
+    :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting,
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
+    :param stop_if_static: stops if a dynamic dimension becomes static,
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
+    :param dump_folder: dumps everything in a subfolder of this one
+    :param drop_inputs: drops this list of inputs (given their names)
+    :param ortfusiontype: runs ort fusion, the parameters defines the fusion type,
+        it accepts multiple values separated by ``|``,
+        see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`
+    :param input_options: additional options to define the dummy inputs
+        used to export
+    :param model_options: additional options when creating the model such as
+        ``num_hidden_layers`` or ``attn_implementation``
+    :param exporter_options: additional options when exporting the model such as
+        ``report=True`` or ``verify=True``
+    :param subfolder: version or subfolders to uses when retrieving a model id
+    :param opset: onnx opset to use for the conversion
+    :param runtime: onnx runtime to use to check about discrepancies,
+        possible values ``onnxruntime``, ``torch``, ``orteval``,
+        ``orteval10``, ``ref`` only if `do_run` is true
+    :param repeat: number of time to measure the model
+    :param warmup: warmup the model first
+    :param inputs2: checks that other sets of inputs are running as well,
+        this ensures that the model does support dynamism, the value is used
+        as an increment to the first set of values (added to dimensions),
+        or an empty cache for example
+    :param output_names: output names the onnx exporter should use
+    :param ort_logs: increases onnxruntime verbosity when creating the session
+    :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set
+        even if quiet is False
+    :return: two dictionaries, one with some metrics,
+        another one with whatever the function produces
+
+    The following environment variables can be used to print out some
+    information:
+
+    * ``PRINT_CONFIG``: prints the model configuration
+
+    The following exporters are available:
+
+    * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False)
+    * ``onnx-dynamo``: run :func:`torch.onnx.export` (...),
+      models can be optimized with ``optimization`` in ``("ir", "os_ort")``
+    * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model
+    * ``custom``: custom exporter (see :epkg:`experimental-experiment`),
+      models can be optimized with ``optimization`` in
+      ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")``
+
+    The default runtime, :epkg:`onnxruntime` is used to validate a model and check the
+    exported model returns the same outputs as the original one, otherwise,
+    :class:`onnx_diagnostic.reference.TorchOnnxEvaluator`
+    if ``runtime == 'torch'`` or
+    :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator`
+    if ``runtime == 'orteval'`` or
+    :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator`
+    if ``runtime == 'ref'``,
+    ``orteval10`` increases the verbosity.
+
+    .. versionchanged:: 0.7.13
+        *inputs2* not only means a second set of inputs but many
+        such as ``input_empty_cache``
+        which refers to a set of inputs using an empty cache.
+    """
+    main_validation_begin = time.perf_counter()
+    cont, summary, data, dump_stats, second_input_keys = _validate_model_step1(
+        model_id=model_id,
+        do_same=do_same,
+        do_run=do_run,
+        patch=patch,
+        rewrite=rewrite,
+        dtype=dtype,
+        device=device,
+        optimization=optimization,
+        quiet=quiet,
+        drop_inputs=drop_inputs,
+        ortfusiontype=ortfusiontype,
+        stop_if_static=stop_if_static,
+        exporter=exporter,
+        verbose=verbose,
+        task=task,
+        runtime=runtime,
+        inputs2=inputs2,
+        input_options=input_options,
+        model_options=model_options,
+        exporter_options=exporter_options,
+        opset=opset,
+        output_names=output_names,
+        repeat=repeat,
+        warmup=warmup,
+        dump_folder=dump_folder,
+        subfolder=subfolder,
+        use_pretrained=use_pretrained,
+        same_as_pretrained=same_as_pretrained,
+    )
+    if dump_folder:
+        with open(dump_stats, "w") as f:
+            for k, v in sorted(summary.items()):
+                f.write(f":{k}:{v};\n")
+    if not cont:
         return summary, data
+    data, summary = _clean_data_remove_model_and_proto(data, summary)
+    _validate_model_step2(
+        summary=summary,
+        data=data,
+        do_run=do_run,
+        quiet=quiet,
+        verbose=verbose,
+        runtime=runtime,
+        repeat=repeat,
+        warmup=warmup,
+        second_input_keys=second_input_keys,
+        ort_logs=ort_logs,
+        quiet_input_sets=quiet_input_sets,
+        ortfusiontype=ortfusiontype,
+        model_id=model_id,
+    )
+
+    summary["time_total"] = time.perf_counter() - main_validation_begin
+
+    if verbose:
+        print("[validate_model] -- done (final)")
+    with open(dump_stats, "w") as f:
+        for k, v in sorted(summary.items()):
+            f.write(f":{k}:{v};\n")
+    return summary, data
+
+
+def _clean_data_remove_model_and_proto(data, summary):
+    assert isinstance(data, dict) and isinstance(data, dict)
+    data = _clean_data_remove_model_and_proto_(data)
+    summary = _clean_data_remove_model_and_proto_(summary)
+    gc.collect()
+    return data, summary
+
+
+def _clean_data_remove_model_and_proto_(obj):
+    if type(obj) is dict:
+        # do not use isinstance otherwise CausalLMOutputWithPast becomes a dictionary
+        return {k: _clean_data_remove_model_and_proto_(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_clean_data_remove_model_and_proto_(v) for v in obj]
+    if isinstance(obj, tuple):
+        return tuple(_clean_data_remove_model_and_proto_(v) for v in obj)
+    if isinstance(obj, set):
+        return {_clean_data_remove_model_and_proto_(v) for v in obj}
+    if isinstance(obj, (torch.nn.Module, onnx.ModelProto)):
+        return None
+    return obj
+
+
+def _validate_model_step1(
+    model_id,
+    do_same,
+    do_run,
+    patch,
+    rewrite,
+    dtype,
+    device,
+    optimization,
+    quiet,
+    drop_inputs,
+    ortfusiontype,
+    stop_if_static,
+    exporter,
+    verbose,
+    task,
+    runtime,
+    inputs2,
+    input_options,
+    model_options,
+    exporter_options,
+    opset,
+    output_names,
+    repeat,
+    warmup,
+    dump_folder,
+    subfolder,
+    use_pretrained,
+    same_as_pretrained,
+):
+    assert not do_same or do_run, (
+        f"Discrepancies cannot be measured if the model is not run, "
+        f"do_run={do_run}, do_same={do_same}"
+    )
+    (
+        summary,
+        model_id,
+        subfolder,
+        same_as_pretrained,
+        use_pretrained,
+        dump_folder,
+        folder_name,
+        patch_kwargs,
+    ) = _prepare_validation(
+        model_id=model_id,
+        subfolder=subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+        patch=patch,
+        rewrite=rewrite,
+        do_run=do_run,
+        dtype=dtype,
+        device=device,
+        optimization=optimization,
+        quiet=quiet,
+        drop_inputs=drop_inputs,
+        ortfusiontype=ortfusiontype,
+        stop_if_static=stop_if_static,
+        exporter=exporter,
+        runtime=runtime,
+        inputs2=inputs2,
+        input_options=input_options,
+        model_options=model_options,
+        exporter_options=exporter_options,
+        opset=opset,
+        task=task,
+        verbose=verbose,
+        output_names=output_names,
+        dump_folder=dump_folder,
+    )
+
+    data, iop, mop = _get_untrained_model_with_inputs(
+        summary=summary,
+        model_id=model_id,
+        verbose=verbose,
+        task=task,
+        use_pretrained=use_pretrained,
+        same_as_pretrained=same_as_pretrained,
+        input_options=input_options,
+        model_options=model_options,
+        subfolder=subfolder,
+        inputs2=inputs2,
+        quiet=quiet,
+        dump_folder=dump_folder,
+    )
+
+    second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]
+    if exporter == "modelbuilder":
+        _update_data_for_modelbuilder(data, verbose)
+
+    _update_inputs_outputs(
+        data=data,
+        summary=summary,
+        exporter=exporter,
+        iop=iop,
+        mop=mop,
+        dump_folder=dump_folder,
+        opset=opset,
+        device=device,
+        dtype=dtype,
+        rewrite=rewrite,
+        drop_inputs=drop_inputs,
+        verbose=verbose,
+        second_input_keys=second_input_keys,
+        model_id=model_id,
+    )
+
+    _verbose_validate(data, second_input_keys, verbose)
 
     if do_run:
-        # Let's move the model to CPU to make sure it frees GPU memory.
-        if verbose:
-            # It does not really work for the time being and the model
-            # gets loaded twice, one by torch, one by onnxruntime
-            print("[validation_model] -- delete the model")
-            for key in ["model", "onnx_program", "config"]:
-                if key in data:
-                    del data[key]
-            if device is not None and "cuda" in str(device).lower():
-                torch.cuda.empty_cache()
-            gc.collect()
-            print("[validation_model] -- done")
+        validation_begin = time.perf_counter()
+
+        _validate_do_run_model(
+            data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet
+        )
+        if second_input_keys:
+            for k in second_input_keys:
+                _validate_do_run_model(
+                    data,
+                    summary,
+                    k,
+                    f"run2{k[6:]}",
+                    f"run_expected2{k[6:]}",
+                    verbose,
+                    1,
+                    0,
+                    quiet,
+                )
+
+        summary["time_total_validation_torch"] = time.perf_counter() - validation_begin
 
+    _call_exporter(
+        data=data,
+        summary=summary,
+        exporter=exporter,
+        patch_kwargs=patch_kwargs,
+        stop_if_static=stop_if_static,
+        verbose=verbose,
+        dump_folder=dump_folder,
+        quiet=quiet,
+        optimization=optimization,
+        do_run=do_run,
+        output_names=output_names,
+        exporter_options=exporter_options,
+    )
+
+    cont, dump_stats = _dump_onnx_model(
+        data=data,
+        summary=summary,
+        dump_folder=dump_folder,
+        verbose=verbose,
+        exporter=exporter,
+        folder_name=folder_name,
+    )
+    return cont, summary, data, dump_stats, second_input_keys
+
+
+def _validate_model_step2(
+    summary,
+    data,
+    do_run,
+    quiet,
+    verbose,
+    runtime,
+    repeat,
+    warmup,
+    second_input_keys,
+    ort_logs,
+    quiet_input_sets,
+    ortfusiontype,
+    model_id,
+):
+    if do_run:
         validation_begin = time.perf_counter()
         summary_valid, data = validate_onnx_model(
             data=data,
@@ -948,16 +1260,6 @@ def validate_model(
                 summary.update(summary_valid)
 
     _compute_final_statistics(summary)
-    summary["time_total"] = time.perf_counter() - main_validation_begin
-
-    if verbose:
-        print("[validate_model] -- done (final)")
-    if dump_stats:
-        # Dumps again the statistics.
-        with open(dump_stats, "w") as f:
-            for k, v in sorted(summary.items()):
-                f.write(f":{k}:{v};\n")
-    return summary, data
 
 
 def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
@@ -1041,7 +1343,7 @@ def _validate_do_run_model(
 
     summary[expected_tag] = string_type(expected, with_shape=True)
     if verbose:
-        print(f"[validate_model] done ([{tag}])")
+        print(f"[validate_model] done ([{tag}]) - {string_type(expected, with_shape=True)}")
     data[expected_tag] = expected
     assert hash_inputs == string_type(data[key], with_shape=True), (
         f"The model execution did modified the inputs:\n"
@@ -1051,7 +1353,6 @@ def _validate_do_run_model(
 
 
 def _validate_do_run_exported_program(data, summary, verbose, quiet):
-
     # We run a second time the model to check the patch did not
     # introduce any discrepancies
     if verbose:
@@ -1076,7 +1377,13 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet):
     if "ERR_run_patched" in summary:
         return summary, data
 
-    disc = max_diff(data["run_expected"], expected)
+    verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+    if verbose_diff >= 10:
+        print("[_validate_do_run_exported_program] with inputs_export")
+    disc = max_diff(data["run_expected"], expected, verbose=verbose_diff)
+    assert not verbose_diff or (
+        not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+    ), f"something went wrong disc={disc}"
     for k, v in disc.items():
         summary[f"disc_patched_{k}"] = str(v)
     if verbose:
@@ -1321,7 +1628,14 @@ def call_torch_export_export(
         if "ERR_export_export" in summary:
             return summary, data
 
-        disc = max_diff(data["run_expected"], expected)
+        verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+        if verbose_diff >= 10:
+            print("[call_torch_export_export] with inputs_export")
+        disc = max_diff(data["run_expected"], expected, verbose=verbose_diff)
+        assert not verbose_diff or (
+            not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+        ), f"something went wrong disc={disc}"
+
         for k, v in disc.items():
             summary[f"disc_exported_{k}"] = str(v)
         if verbose:
@@ -1541,7 +1855,16 @@ def _mk(key, flavour=flavour):
             print(f"[validate_onnx_model] got={string_type(got, with_shape=True)}")
 
         # compute discrepancies
-        disc = max_diff(data[k_expected], got, flatten=True)
+        verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+        if verbose_diff >= 10:
+            print(
+                f"[validate_onnx_model] k_input={k_input!r}, "
+                f"k_expected={k_expected!r}, suffix={suffix!r}"
+            )
+        disc = max_diff(data[k_expected], got, flatten=True, verbose=verbose_diff)
+        assert not verbose_diff or (
+            not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+        ), f"something went wrong disc={disc}"
         if verbose:
             print(f"[validate_onnx_model] discrepancies={string_diff(disc)}")
         for k, v in disc.items():