From a1a65c33a19296a95c81c7758eaf3c434677a163 Mon Sep 17 00:00:00 2001 From: Ganesan Ramalingam Date: Thu, 26 Dec 2024 11:17:26 -0800 Subject: [PATCH 1/6] Allow abstraction over initializers --- onnxscript/backend/onnx_export.py | 68 +++++++++++++++++++++++++++---- tools/onnx2script.py | 13 ++++-- 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/onnxscript/backend/onnx_export.py b/onnxscript/backend/onnx_export.py index c8a6a9a640..040dd0177c 100644 --- a/onnxscript/backend/onnx_export.py +++ b/onnxscript/backend/onnx_export.py @@ -251,7 +251,7 @@ class Exporter: """Class used for recursive traversal of Proto structures.""" def __init__( - self, rename: bool, use_operators: bool = False, inline_const: bool = False + self, rename: bool, use_operators: bool = False, inline_const: bool = False, skip_initializers: bool = False ) -> None: self.use_operators = use_operators if rename: @@ -266,6 +266,8 @@ def __init__( # _name_remappings: used to undo the SSA-renaming in ONNX control-flow ops. # We map the multiple SSA-variants back to the same Python variable name. self._name_remappings: list[dict[str, str]] = [] + self.skip_initializers = skip_initializers + self.skipped_initializers: list[onnx.TensorProto] = [] def _handle_attrname_conflict(self, renamer): """Add ref-attr-name-conflict handling logic to renaming function.""" @@ -338,6 +340,9 @@ def _translate_graph_body(self, graph, opsets, indent=0): code = [] if hasattr(graph, "initializer"): for init in graph.initializer: + if self.skip_initializers: + self.skipped_initializers.append(init) + continue node = make_node( "Constant", [], @@ -684,15 +689,63 @@ def _translate_graph(self, model: onnx.ModelProto, function_name: Optional[str]) def add(line: str) -> None: result.append(line) - add("@script()") - add(f"def {function_name}{_translate_signature(graph.input, graph.output)}") + if self.skip_initializers: + indent_level = 2 + indent = _SINGLE_INDENT + else: + indent_level = 1 + indent = "" + add(f"{indent}@script()") + add(f"{indent}def {function_name}{_translate_signature(graph.input, graph.output)}") + indent = indent + _SINGLE_INDENT doc = graph.doc_string if doc: - add(f' """{doc}"""') - add(self._translate_graph_body(graph, opsets, indent=1)) + add(f'{indent}"""{doc}"""') + add(self._translate_graph_body(graph, opsets, indent=indent_level)) return_values = ", ".join(self._translate_onnx_var(x) for x in graph.output) - add(f" return {return_values}") - return "\n".join(result) + add(f"{indent}return {return_values}") + script = "\n".join(result) + if self.skipped_initializers: + return self._substitute_initializers(script, function_name) + return script + + def _substitute_initializers(self, script: str, script_function_name: str) -> str: + init_names = [self._translate_onnx_var(x.name) for x in self.skipped_initializers] + # Formal parameters representing initializers (single level indentation) + initializers_as_params = "\n".join( + f"{_SINGLE_INDENT}{x}," for x in init_names + ) + def generate_rand(x: TensorProto) -> str: + name = self._translate_onnx_var(x.name) + shape = ",".join(str(d) for d in x.dims) + if x.data_type != TensorProto.FLOAT: + raise NotImplementedError( + f"Unable to generate random initializer for data type {x.data_type}." + ) + return f"{_SINGLE_INDENT}{name} = numpy.random.rand({shape}).astype(numpy.float32)" + random_initializer_values = "\n".join( + generate_rand(x) for x in self.skipped_initializers + ) + # Actual parameter values for initializers (double level indentation) + indented_initializers_as_params = "\n".join( + f"{_SINGLE_INDENT}{_SINGLE_INDENT}{x}," for x in init_names + ) + return """ +def make_model( +{initializers_as_params}): +): + {script} + + model = {script_function_name}.to_model_proto() + return model + +def make_model_with_random_weights(): +{random_initializer_values} + model = make_model( +{indented_initializers_as_params} + ) + return model +""" def _import_onnx_types( self, proto: onnx.ModelProto | onnx.GraphProto | onnx.FunctionProto @@ -781,6 +834,7 @@ def export2python( rename: bool = False, use_operators: bool = False, inline_const: bool = False, + skip_initializers: bool = False, ): """Exports an ONNX model to the *python* syntax. diff --git a/tools/onnx2script.py b/tools/onnx2script.py index 02b220799a..668bb8dded 100644 --- a/tools/onnx2script.py +++ b/tools/onnx2script.py @@ -28,11 +28,11 @@ def convert2script( - input_file_name: str, output_file_name: Optional[str], verbose: bool + input_file_name: str, output_file_name: Optional[str], verbose: bool, initializers: bool ) -> None: model = onnx.load(input_file_name, load_external_data=False) python_code = onnxscript.proto2python( - model, use_operators=not verbose, inline_const=not verbose + model, use_operators=not verbose, inline_const=not verbose, skip_initializers=not initializers ) # If output file name is not provided, use the input file name with .py extension @@ -55,6 +55,13 @@ def convert2script( help="Verbose mode, suppresses use of overloaded operators and inline constants", default=False, ) + parser.add_argument( + "-i", + "--initializers", + action="store_true", + help="Include initializers in the generated script", + default=False + ) args = parser.parse_args() - convert2script(args.input, args.output, args.verbose) + convert2script(args.input, args.output, args.verbose, args.initializers) From bccee83ce518e7ec1e32d98e7faba3d100626956 Mon Sep 17 00:00:00 2001 From: Ganesan Ramalingam Date: Thu, 26 Dec 2024 14:38:25 -0800 Subject: [PATCH 2/6] Extend onnx2script to abstract initializers --- onnxscript/backend/onnx_export.py | 18 +++++++++--------- tools/onnx2external.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 tools/onnx2external.py diff --git a/onnxscript/backend/onnx_export.py b/onnxscript/backend/onnx_export.py index 040dd0177c..3c18143312 100644 --- a/onnxscript/backend/onnx_export.py +++ b/onnxscript/backend/onnx_export.py @@ -730,21 +730,21 @@ def generate_rand(x: TensorProto) -> str: indented_initializers_as_params = "\n".join( f"{_SINGLE_INDENT}{_SINGLE_INDENT}{x}," for x in init_names ) - return """ + return f""" def make_model( -{initializers_as_params}): +{initializers_as_params} ): - {script} +{script} - model = {script_function_name}.to_model_proto() - return model +{_SINGLE_INDENT}model = {script_function_name}.to_model_proto() +{_SINGLE_INDENT}return model def make_model_with_random_weights(): {random_initializer_values} - model = make_model( +{_SINGLE_INDENT}model = make_model( {indented_initializers_as_params} - ) - return model +{_SINGLE_INDENT}) +{_SINGLE_INDENT}return model """ def _import_onnx_types( @@ -869,5 +869,5 @@ def export2python( if not isinstance(model_onnx, (ModelProto, FunctionProto)): raise TypeError(f"The function expects a ModelProto not {type(model_onnx)!r}.") - exporter = Exporter(rename, use_operators, inline_const) + exporter = Exporter(rename, use_operators, inline_const, skip_initializers) return exporter.export(model_onnx, function_name) diff --git a/tools/onnx2external.py b/tools/onnx2external.py new file mode 100644 index 0000000000..c1954338c7 --- /dev/null +++ b/tools/onnx2external.py @@ -0,0 +1,31 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import argparse +import os + +import onnx +import onnx.external_data_helper + + +def convert2external(input_file_name: str) -> None: + dir_name = os.path.dirname(input_file_name) + base_name, suffix = os.path.splitext(os.path.basename(input_file_name)) + model = onnx.load(input_file_name) + os.makedirs(os.path.join(dir_name, base_name), exist_ok=True) + onnx.external_data_helper.convert_model_to_external_data( + model, location="external_data.onnx", size_threshold=128 + ) + onnx.save(model, os.path.join(dir_name, base_name, "model.onnx")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Convert ONNX model file to external data format" + ) + parser.add_argument("input", help="ONNX model file to convert") + args = parser.parse_args() + + convert2external(args.input) From 0f70bb4751c408c1cbe3ff29aa77ab0bcadad842 Mon Sep 17 00:00:00 2001 From: Ganesan Ramalingam Date: Fri, 27 Dec 2024 13:42:07 -0800 Subject: [PATCH 3/6] Minor cleanup --- onnxscript/backend/onnx_export.py | 60 ++++++++++++++++++------------- tools/onnx2script.py | 7 ++-- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/onnxscript/backend/onnx_export.py b/onnxscript/backend/onnx_export.py index 3c18143312..a671a92493 100644 --- a/onnxscript/backend/onnx_export.py +++ b/onnxscript/backend/onnx_export.py @@ -247,11 +247,11 @@ def _cond_is_used_in_loop_body(graph: GraphProto) -> bool: return False -class Exporter: +class _Exporter: """Class used for recursive traversal of Proto structures.""" def __init__( - self, rename: bool, use_operators: bool = False, inline_const: bool = False, skip_initializers: bool = False + self, *, rename: bool, use_operators: bool, inline_const: bool, skip_initializers: bool ) -> None: self.use_operators = use_operators if rename: @@ -267,7 +267,7 @@ def __init__( # We map the multiple SSA-variants back to the same Python variable name. self._name_remappings: list[dict[str, str]] = [] self.skip_initializers = skip_initializers - self.skipped_initializers: list[onnx.TensorProto] = [] + self.skipped_initializers: dict[str, onnx.TensorProto] = {} def _handle_attrname_conflict(self, renamer): """Add ref-attr-name-conflict handling logic to renaming function.""" @@ -341,7 +341,12 @@ def _translate_graph_body(self, graph, opsets, indent=0): if hasattr(graph, "initializer"): for init in graph.initializer: if self.skip_initializers: - self.skipped_initializers.append(init) + init_py_name = self._translate_onnx_var(init.name) + if init_py_name in self.skipped_initializers: + raise RuntimeError( + f"Initializer {init.name!r} is already present in skipped_initializers." + ) + self.skipped_initializers[init_py_name] = init continue node = make_node( "Constant", @@ -710,41 +715,39 @@ def add(line: str) -> None: return script def _substitute_initializers(self, script: str, script_function_name: str) -> str: - init_names = [self._translate_onnx_var(x.name) for x in self.skipped_initializers] + init_names = self.skipped_initializers.keys() # Formal parameters representing initializers (single level indentation) - initializers_as_params = "\n".join( - f"{_SINGLE_INDENT}{x}," for x in init_names - ) - def generate_rand(x: TensorProto) -> str: - name = self._translate_onnx_var(x.name) - shape = ",".join(str(d) for d in x.dims) - if x.data_type != TensorProto.FLOAT: + __ = _SINGLE_INDENT + initializers_as_params = "\n".join(f"{__}{x}," for x in init_names) + + def generate_rand(name: str, value: TensorProto) -> str: + shape = ",".join(str(d) for d in value.dims) + if value.data_type != TensorProto.FLOAT: raise NotImplementedError( - f"Unable to generate random initializer for data type {x.data_type}." + f"Unable to generate random initializer for data type {value.data_type}." ) - return f"{_SINGLE_INDENT}{name} = numpy.random.rand({shape}).astype(numpy.float32)" + return f"{__}{name} = numpy.random.rand({shape}).astype(numpy.float32)" + random_initializer_values = "\n".join( - generate_rand(x) for x in self.skipped_initializers + generate_rand(key, value) for key, value in self.skipped_initializers.items() ) # Actual parameter values for initializers (double level indentation) - indented_initializers_as_params = "\n".join( - f"{_SINGLE_INDENT}{_SINGLE_INDENT}{x}," for x in init_names - ) + indented_initializers_as_params = "\n".join(f"{__}{__}{x}," for x in init_names) return f""" def make_model( {initializers_as_params} ): {script} -{_SINGLE_INDENT}model = {script_function_name}.to_model_proto() -{_SINGLE_INDENT}return model +{__}model = {script_function_name}.to_model_proto() +{__}return model def make_model_with_random_weights(): {random_initializer_values} -{_SINGLE_INDENT}model = make_model( +{__}model = make_model( {indented_initializers_as_params} -{_SINGLE_INDENT}) -{_SINGLE_INDENT}return model +{__}) +{__}return model """ def _import_onnx_types( @@ -831,6 +834,7 @@ def visit_graph(graph: onnx.GraphProto) -> None: def export2python( model_onnx, function_name: Optional[str] = None, + *, rename: bool = False, use_operators: bool = False, inline_const: bool = False, @@ -844,6 +848,9 @@ def export2python( function_name: main function name use_operators: use Python operators. inline_const: replace ONNX constants inline if compact + skip_initializers: generated script will not include initializers. + Instead, a function that generates the model, given initializer values, is generated, + along with one that generates random values for the initializers. Returns: python code @@ -869,5 +876,10 @@ def export2python( if not isinstance(model_onnx, (ModelProto, FunctionProto)): raise TypeError(f"The function expects a ModelProto not {type(model_onnx)!r}.") - exporter = Exporter(rename, use_operators, inline_const, skip_initializers) + exporter = _Exporter( + rename=rename, + use_operators=use_operators, + inline_const=inline_const, + skip_initializers=skip_initializers, + ) return exporter.export(model_onnx, function_name) diff --git a/tools/onnx2script.py b/tools/onnx2script.py index 668bb8dded..7b57bf91d6 100644 --- a/tools/onnx2script.py +++ b/tools/onnx2script.py @@ -32,7 +32,10 @@ def convert2script( ) -> None: model = onnx.load(input_file_name, load_external_data=False) python_code = onnxscript.proto2python( - model, use_operators=not verbose, inline_const=not verbose, skip_initializers=not initializers + model, + use_operators=not verbose, + inline_const=not verbose, + skip_initializers=not initializers, ) # If output file name is not provided, use the input file name with .py extension @@ -60,7 +63,7 @@ def convert2script( "--initializers", action="store_true", help="Include initializers in the generated script", - default=False + default=False, ) args = parser.parse_args() From c77b7937513eee1a924e695e26783301632fa272 Mon Sep 17 00:00:00 2001 From: "G. Ramalingam" Date: Mon, 30 Dec 2024 10:29:54 -0800 Subject: [PATCH 4/6] Update tools/onnx2external.py Co-authored-by: Justin Chu --- tools/onnx2external.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/onnx2external.py b/tools/onnx2external.py index c1954338c7..603cc168f9 100644 --- a/tools/onnx2external.py +++ b/tools/onnx2external.py @@ -1,7 +1,5 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. +# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -# -------------------------------------------------------------------------- import argparse import os From 1fc056fcb081204a0b3cce20359c5060abdec948 Mon Sep 17 00:00:00 2001 From: "G. Ramalingam" Date: Mon, 30 Dec 2024 10:30:02 -0800 Subject: [PATCH 5/6] Update tools/onnx2external.py Co-authored-by: Justin Chu --- tools/onnx2external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/onnx2external.py b/tools/onnx2external.py index 603cc168f9..1685458251 100644 --- a/tools/onnx2external.py +++ b/tools/onnx2external.py @@ -10,7 +10,7 @@ def convert2external(input_file_name: str) -> None: dir_name = os.path.dirname(input_file_name) - base_name, suffix = os.path.splitext(os.path.basename(input_file_name)) + base_name, _suffix = os.path.splitext(os.path.basename(input_file_name)) model = onnx.load(input_file_name) os.makedirs(os.path.join(dir_name, base_name), exist_ok=True) onnx.external_data_helper.convert_model_to_external_data( From 3682a07d6db0a3c6b4c52c4804950037dee6c92f Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 30 Dec 2024 12:25:03 -0800 Subject: [PATCH 6/6] Update onnxscript/backend/onnx_export.py --- onnxscript/backend/onnx_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxscript/backend/onnx_export.py b/onnxscript/backend/onnx_export.py index a671a92493..b3f695d700 100644 --- a/onnxscript/backend/onnx_export.py +++ b/onnxscript/backend/onnx_export.py @@ -747,7 +747,7 @@ def make_model_with_random_weights(): {__}model = make_model( {indented_initializers_as_params} {__}) -{__}return model +{__}return model """ def _import_onnx_types(