diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py index 1b051a50..39657e8b 100644 --- a/_unittests/ut_torch_models/test_hghub_model.py +++ b/_unittests/ut_torch_models/test_hghub_model.py @@ -17,7 +17,7 @@ class TestHuggingFaceHubModel(ExtTestCase): @hide_stdout() def test_get_untrained_model_with_inputs_tiny_llm(self): mid = "arnir0/Tiny-LLM" - data = get_untrained_model_with_inputs(mid, verbose=1) + data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=0) self.assertEqual( set(data), { diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py index 6170d34c..46a765fe 100644 --- a/onnx_diagnostic/_command_lines_parser.py +++ b/onnx_diagnostic/_command_lines_parser.py @@ -349,6 +349,15 @@ def get_parser_validate() -> ArgumentParser: python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\ --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\ --dtype float16 --device cuda --export modelbuilder + + position_ids is usually not needed, they can be removed by adding: + + --drop position_ids + + The behaviour may be modified compare the original configuration, + the following argument can be rope_scaling to dynamic: + + --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\"" """ ), formatter_class=RawTextHelpFormatter, @@ -403,10 +412,12 @@ def get_parser_validate() -> ArgumentParser: ) parser.add_argument( "--inputs2", - default=True, - action=BooleanOptionalAction, + default=1, + type=int, help="Validates the model on a second set of inputs\n" - "to check the exported model supports dynamism.", + "to check the exported model supports dynamism. The values is used " + "as an increment to the first set of inputs. A high value may trick " + "a different behavior in the model and missed by the exporter.", ) parser.add_argument( "--runtime", @@ -422,7 +433,8 @@ def get_parser_validate() -> ArgumentParser: parser.add_argument( "--drop", help="Drops the following inputs names, it should be a list\n" - "with comma separated values.", + "with comma separated values, example:\n" + "--drop position_ids", ) parser.add_argument( "--opset", diff --git a/onnx_diagnostic/tasks/automatic_speech_recognition.py b/onnx_diagnostic/tasks/automatic_speech_recognition.py index f1b4ae6b..b6da7e7a 100644 --- a/onnx_diagnostic/tasks/automatic_speech_recognition.py +++ b/onnx_diagnostic/tasks/automatic_speech_recognition.py @@ -33,7 +33,7 @@ def get_inputs( head_dim: int, batch_size: int = 2, sequence_length: int = 30, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -132,6 +132,9 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, @@ -144,7 +147,8 @@ def get_inputs( decoder_layers=decoder_layers, head_dim=head_dim, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py index 4bac2aed..1d49147c 100644 --- a/onnx_diagnostic/tasks/feature_extraction.py +++ b/onnx_diagnostic/tasks/feature_extraction.py @@ -22,7 +22,7 @@ def get_inputs( batch_size: int, sequence_length: int, dummy_max_token_id: int, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -52,12 +52,16 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, dummy_max_token_id=dummy_max_token_id, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/fill_mask.py b/onnx_diagnostic/tasks/fill_mask.py index 63a05811..167993d2 100644 --- a/onnx_diagnostic/tasks/fill_mask.py +++ b/onnx_diagnostic/tasks/fill_mask.py @@ -22,7 +22,7 @@ def get_inputs( batch_size: int, sequence_length: int, dummy_max_token_id: int, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -54,12 +54,16 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, dummy_max_token_id=dummy_max_token_id, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/image_classification.py b/onnx_diagnostic/tasks/image_classification.py index cc14e4a3..3a993399 100644 --- a/onnx_diagnostic/tasks/image_classification.py +++ b/onnx_diagnostic/tasks/image_classification.py @@ -34,7 +34,7 @@ def get_inputs( input_channels: int, batch_size: int = 2, dynamic_rope: bool = False, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -75,14 +75,18 @@ def get_inputs( shapes["interpolate_pos_encoding"] = None # type: ignore[assignment] res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, - input_width=input_width + 1, - input_height=input_height + 1, + input_width=input_width + add_second_input, + input_height=input_height + add_second_input, input_channels=input_channels, batch_size=batch_size + 1, dynamic_rope=dynamic_rope, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py index 4400b772..e7b17a17 100644 --- a/onnx_diagnostic/tasks/image_text_to_text.py +++ b/onnx_diagnostic/tasks/image_text_to_text.py @@ -32,7 +32,7 @@ def get_inputs( sequence_length2: int = 3, n_images: int = 2, dynamic_rope: bool = False, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -105,6 +105,9 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, @@ -116,10 +119,11 @@ def get_inputs( height=height, num_channels=num_channels, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, sequence_length2=sequence_length2 + 1, n_images=n_images + 1, dynamic_rope=dynamic_rope, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/mixture_of_expert.py b/onnx_diagnostic/tasks/mixture_of_expert.py index be6b7828..1376ade2 100644 --- a/onnx_diagnostic/tasks/mixture_of_expert.py +++ b/onnx_diagnostic/tasks/mixture_of_expert.py @@ -41,7 +41,7 @@ def get_inputs( sequence_length2: int = 3, n_images: int = 2, dynamic_rope: bool = False, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ diff --git a/onnx_diagnostic/tasks/object_detection.py b/onnx_diagnostic/tasks/object_detection.py index d8ce8073..e85e6355 100644 --- a/onnx_diagnostic/tasks/object_detection.py +++ b/onnx_diagnostic/tasks/object_detection.py @@ -27,7 +27,7 @@ def get_inputs( input_channels: int, batch_size: int = 2, dynamic_rope: bool = False, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -65,14 +65,18 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, - input_width=input_width + 1, - input_height=input_height + 1, + input_width=input_width + add_second_input, + input_height=input_height + add_second_input, input_channels=input_channels, batch_size=batch_size + 1, dynamic_rope=dynamic_rope, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/sentence_similarity.py b/onnx_diagnostic/tasks/sentence_similarity.py index 4e304c47..5c7b7b04 100644 --- a/onnx_diagnostic/tasks/sentence_similarity.py +++ b/onnx_diagnostic/tasks/sentence_similarity.py @@ -22,7 +22,7 @@ def get_inputs( batch_size: int, sequence_length: int, dummy_max_token_id: int, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -54,12 +54,16 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, dummy_max_token_id=dummy_max_token_id, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/summarization.py b/onnx_diagnostic/tasks/summarization.py index 3b2231a1..4384f29d 100644 --- a/onnx_diagnostic/tasks/summarization.py +++ b/onnx_diagnostic/tasks/summarization.py @@ -29,7 +29,7 @@ def get_inputs( batch_size: int = 2, sequence_length: int = 30, sequence_length2: int = 3, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -144,6 +144,9 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, @@ -154,8 +157,9 @@ def get_inputs( head_dim_encoder=head_dim_encoder, head_dim_decoder=head_dim_decoder, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, sequence_length2=sequence_length2 + 1, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/text2text_generation.py b/onnx_diagnostic/tasks/text2text_generation.py index 6dd0e3b6..989782f5 100644 --- a/onnx_diagnostic/tasks/text2text_generation.py +++ b/onnx_diagnostic/tasks/text2text_generation.py @@ -30,7 +30,7 @@ def get_inputs( batch_size: int = 2, sequence_length: int = 30, sequence_length2: int = 3, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -149,6 +149,9 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, @@ -160,8 +163,9 @@ def get_inputs( head_dim_decoder=head_dim_decoder, encoder_dim=encoder_dim, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, sequence_length2=sequence_length2 + 1, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/text_classification.py b/onnx_diagnostic/tasks/text_classification.py index e3a1d727..14866f7c 100644 --- a/onnx_diagnostic/tasks/text_classification.py +++ b/onnx_diagnostic/tasks/text_classification.py @@ -22,7 +22,7 @@ def get_inputs( batch_size: int, sequence_length: int, dummy_max_token_id: int, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -54,12 +54,16 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, dummy_max_token_id=dummy_max_token_id, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/text_generation.py b/onnx_diagnostic/tasks/text_generation.py index 873fa4fc..599062bc 100644 --- a/onnx_diagnostic/tasks/text_generation.py +++ b/onnx_diagnostic/tasks/text_generation.py @@ -72,7 +72,7 @@ def get_inputs( num_key_value_heads: Optional[int] = None, head_dim: Optional[int] = None, cls_cache: Optional[Union[type, str]] = None, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -260,13 +260,15 @@ def get_inputs( config=config, dummy_max_token_id=dummy_max_token_id, num_hidden_layers=num_hidden_layers, - batch_size=batch_size + 1, + batch_size=(batch_size + 1) if add_second_input > 0 else 1, sequence_length=sequence_length + 1, - sequence_length2=sequence_length2 + 1, + sequence_length2=sequence_length2 + + (add_second_input if add_second_input > 0 else -add_second_input), dynamic_rope=dynamic_rope, num_key_value_heads=num_key_value_heads, head_dim=head_dim, cls_cache=cls_cache, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/text_to_image.py b/onnx_diagnostic/tasks/text_to_image.py index 983d9bec..fd49fe5d 100644 --- a/onnx_diagnostic/tasks/text_to_image.py +++ b/onnx_diagnostic/tasks/text_to_image.py @@ -25,7 +25,7 @@ def get_inputs( in_channels: int, sample_size: int, cross_attention_dim: int, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -58,15 +58,19 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, batch_size=batch_size + 1, sequence_length=sequence_length, - cache_length=cache_length + 1, + cache_length=cache_length + add_second_input, in_channels=in_channels, sample_size=sample_size, cross_attention_dim=cross_attention_dim, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/tasks/zero_shot_image_classification.py b/onnx_diagnostic/tasks/zero_shot_image_classification.py index 83163552..61fee29e 100644 --- a/onnx_diagnostic/tasks/zero_shot_image_classification.py +++ b/onnx_diagnostic/tasks/zero_shot_image_classification.py @@ -34,7 +34,7 @@ def get_inputs( input_height: int = 224, input_channels: int = 3, batch_size_image=3, - add_second_input: bool = False, + add_second_input: int = 1, **kwargs, # unused ): """ @@ -87,16 +87,20 @@ def get_inputs( ) res = dict(inputs=inputs, dynamic_shapes=shapes) if add_second_input: + assert ( + add_second_input > 0 + ), f"Not implemented for add_second_input={add_second_input}." res["inputs2"] = get_inputs( model=model, config=config, dummy_max_token_id=dummy_max_token_id, batch_size=batch_size + 1, - sequence_length=sequence_length + 1, + sequence_length=sequence_length + add_second_input, input_width=input_width, input_height=input_height, input_channels=input_channels, batch_size_image=batch_size_image + 1, + add_second_input=0, **kwargs, )["inputs"] return res diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index 1961e049..74531560 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -26,7 +26,7 @@ def get_untrained_model_with_inputs( use_pretrained: bool = False, same_as_pretrained: bool = False, use_preinstalled: bool = True, - add_second_input: bool = False, + add_second_input: int = 1, subfolder: Optional[str] = None, use_only_preinstalled: bool = False, ) -> Dict[str, Any]: diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py index 179b1f05..989a8101 100644 --- a/onnx_diagnostic/torch_models/validate.py +++ b/onnx_diagnostic/torch_models/validate.py @@ -156,6 +156,12 @@ def version_summary() -> Dict[str, Union[int, float, str]]: "version_torch": torch.__version__, "version_numpy": numpy.__version__, } + try: + import scipy + + summary["version_scipy"] = getattr(scipy, "__version__", "?") + except ImportError: + pass try: import transformers @@ -180,6 +186,12 @@ def version_summary() -> Dict[str, Union[int, float, str]]: summary["version_onnxruntime"] = getattr(onnxruntime, "__version__", "?") except ImportError: pass + try: + import onnx_ir + + summary["version_onnx_ir"] = getattr(onnx_ir, "__version__", "?") + except ImportError: + pass import onnx_diagnostic summary["version_onnx_diagnostic"] = onnx_diagnostic.__version__ @@ -275,7 +287,7 @@ def validate_model( runtime: str = "onnxruntime", repeat: int = 1, warmup: int = 0, - inputs2: bool = True, + inputs2: int = 1, ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]: """ Validates a model. @@ -324,7 +336,8 @@ def validate_model( :param repeat: number of time to measure the model :param warmup: warmup the model first :param inputs2: checks that the second set of inputs is reunning as well, - this ensures that the model does support dynamism + this ensures that the model does support dynamism, the value is used + as an increment to the first set of values (added to dimensions) :return: two dictionaries, one with some metrics, another one with whatever the function produces @@ -1053,7 +1066,7 @@ def validate_onnx_model( runtime: str = "onnxruntime", repeat: int = 1, warmup: int = 0, - inputs2: bool = True, + inputs2: int = 1, ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Verifies that an onnx model produces the same @@ -1069,8 +1082,9 @@ def validate_onnx_model( :param runtime: onnx runtime to use, onnxruntime or torch :param repeat: run that number of times the model :param warmup: warmup the model - :param inputs: to validate the model on the second input set - to make sure the exported model supports dynamism + :param inputs2: to validate the model on the second input set + to make sure the exported model supports dynamism, the value is + used as an increment added to the first set of inputs (added to dimensions) :return: two dictionaries, one with some metrics, another one with whatever the function produces """