From 5e6d7291b4d1bd952dbddc1e1cc34e61929f3f95 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Thu, 16 May 2024 22:35:02 +0000
Subject: [PATCH 01/10] [IR][fix] Save value info for initializers

---
 onnxscript/ir/serde.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index 3e0b51a2ca..134cb20745 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -1093,17 +1093,20 @@ def serialize_graph_into(
         graph_proto.doc_string = from_.doc_string
     for input_ in from_.inputs:
         serialize_value_into(graph_proto.input.add(), input_)
+    input_names = {input_.name for input_ in from_.inputs}
     # TODO(justinchuby): Support sparse_initializer
-    for initializer in from_.initializers.values():
-        if initializer.const_value is None:
+    for value in from_.initializers.values():
+        if _should_create_value_info_for_value(value) and value.name not in input_names:
+            # Serialize information about all initializers into value_info,
+            # except for those that are also graph inputs
+            serialize_value_into(graph_proto.value_info.add(), value)
+        if value.const_value is None:
             # Skip initializers without constant values
-            logger.warning(
-                "Initializer '%s' does not have a constant value set.", initializer.name
-            )
+            logger.warning("Initializer '%s' does not have a constant value set.", value.name)
             continue
         # Make sure the tensor's name is the same as the value's name
-        initializer.const_value.name = initializer.name
-        serialize_tensor_into(graph_proto.initializer.add(), from_=initializer.const_value)
+        value.const_value.name = value.name
+        serialize_tensor_into(graph_proto.initializer.add(), from_=value.const_value)
     for node in from_:
         serialize_node_into(graph_proto.node.add(), from_=node)
         for node_output in node.outputs:

From 3533a654d3c4b53f046d839f7246fb074a873352 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 17 May 2024 00:46:59 +0000
Subject: [PATCH 02/10] Update deser

---
 onnxscript/ir/serde.py | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index 134cb20745..910d08e913 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -527,38 +527,55 @@ def _deserialize_graph(
     for info, value in zip(proto.input, inputs):
         deserialize_value_info_proto(info, value)
 
+    # Build the value info dictionary to allow for quick lookup for this graph scope
+    value_info = {info.name: info for info in proto.value_info}
+
     # Initialize the values dictionary for this graph scope with the inputs and initializers
     values: dict[str, _core.Value] = {v.name: v for v in inputs}  # type: ignore[misc]
+
+    # Enter the graph scope by pushing the values for this scope to the stack
     scoped_values.append(values)
+
     initializer_values = []
-    for tensor in initializer_tensors:
-        if tensor.name in values:
+    for i, tensor in enumerate(initializer_tensors):
+        initializer_name = tensor.name
+        if not initializer_name:
+            logger.warning(
+                "Initializer tensor must have a name but the %s-th initializer does not. Skipping this initializer.",
+                i,
+            )
+            continue
+        if initializer_name in values:
             # The initializer is for an input
-            initializer_value = values[tensor.name]
+            initializer_value = values[initializer_name]
             initializer_value.const_value = tensor
         else:
             # The initializer is for some other value. Create this value first
             initializer_value = _core.Value(
                 None,
                 index=None,
-                name=tensor.name,
-                # TODO(justinchuby): Fix type hinting for shape and dtype
-                shape=tensor.shape,  # type: ignore
-                type=_core.TensorType(tensor.dtype),
+                name=initializer_name,
+                # Do not include shape or type as we need to respect the ONNX file
+                # if the shape or type is not provided as ValueInfoProto
+                # The shape/type information will be filled in in the subsequent ValueInfoProto
+                # deserialization step
                 const_value=tensor,
             )
-            values[tensor.name] = initializer_value  # type: ignore[index]
+            if initializer_name in value_info:
+                # This is where we fill in the shape and type information for the initializer
+                deserialize_value_info_proto(value_info[initializer_name], initializer_value)
+            values[initializer_name] = initializer_value  # type: ignore[index]
         initializer_values.append(initializer_value)
 
-    # Add ValueInfos for this graph scope
-    value_info = {info.name: info for info in proto.value_info}
-
     # Deserialize nodes with all known values
     nodes = [_deserialize_node(node, scoped_values, value_info) for node in proto.node]
 
     # Fill in values for graph outputs
     outputs = [deserialize_value_info_proto(info, values[info.name]) for info in proto.output]
+
+    # Exit the graph scope by popping the values for this scope from the stack
     scoped_values.pop()
+
     return _core.Graph(
         inputs,
         outputs,

From ee38b6d4dbc05a2959e8bc48f8e6d9cbd80da095 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Thu, 16 May 2024 17:52:16 -0700
Subject: [PATCH 03/10] Update onnxscript/ir/serde.py

---
 onnxscript/ir/serde.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index 910d08e913..9ba8979142 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -1122,6 +1122,7 @@ def serialize_graph_into(
             logger.warning("Initializer '%s' does not have a constant value set.", value.name)
             continue
         # Make sure the tensor's name is the same as the value's name
+        # TODO(#1554): Handle tensor alias better
         value.const_value.name = value.name
         serialize_tensor_into(graph_proto.initializer.add(), from_=value.const_value)
     for node in from_:

From 5156668cf56b3115aff64e98343118e85f1cfef0 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchu@microsoft.com>
Date: Fri, 24 May 2024 18:27:29 +0000
Subject: [PATCH 04/10] snap

---
 .../instance_to_group_normalization.py        | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/onnxscript/rewriter/onnxruntime/instance_to_group_normalization.py b/onnxscript/rewriter/onnxruntime/instance_to_group_normalization.py
index 559033a7cb..7923b50c5a 100644
--- a/onnxscript/rewriter/onnxruntime/instance_to_group_normalization.py
+++ b/onnxscript/rewriter/onnxruntime/instance_to_group_normalization.py
@@ -5,6 +5,7 @@
 import numpy as np
 import onnx
 
+from onnxscript import ir
 from onnxscript.rewriter import _ir_utils, pattern
 
 torch_module_op = pattern.torch_module_op
@@ -12,15 +13,15 @@
 logger = logging.getLogger(__name__)
 
 
-def check_if_simulated_instance_norm_is_used(
+def _simulated_instance_norm(
     context,
-    input_x,
-    adjusted_input_shape,
-    original_input_shape,
-    weight_for_norm,
-    bias_for_norm,
-    weight_full,
-    bias_full,
+    input_x: ir.Value,
+    adjusted_input_shape: ir.Value,
+    original_input_shape: ir.Value,
+    weight_for_norm: ir.Value,
+    bias_for_norm: ir.Value,
+    weight_full: ir.Value,
+    bias_full: ir.Value,
     **_,
 ) -> bool:
     """Check if the simulated instance normalization is used.
@@ -38,16 +39,16 @@ def check_if_simulated_instance_norm_is_used(
     6. original_input_shape is the same as input_x shape.
 
     Returns:
-        bool: True if the simulated instance normalization is used, False otherwise.
+        True if the simulated instance normalization is used, False otherwise.
     """
-    weight_for_norm_prop = _ir_utils.propagate_const_value(weight_for_norm)
-    weight_for_norm_const_value = weight_for_norm_prop.const_value
+    _ir_utils.propagate_const_value(weight_for_norm)
+    weight_for_norm_const_value = weight_for_norm.const_value
     if weight_for_norm_const_value is None:
         return False
     weight_for_norm = weight_for_norm_const_value.numpy()
 
-    bias_for_norm_prop = _ir_utils.propagate_const_value(bias_for_norm)
-    bias_for_norm_const_value = bias_for_norm_prop.const_value
+    _ir_utils.propagate_const_value(bias_for_norm)
+    bias_for_norm_const_value = bias_for_norm.const_value
     if bias_for_norm_const_value is None:
         return False
     bias_for_norm = bias_for_norm_const_value.numpy()
@@ -57,7 +58,7 @@ def check_if_simulated_instance_norm_is_used(
     if not np.all(bias_for_norm == 0):
         return False
 
-    input_rank_minus_one = len(input_x.shape) - 1
+    input_rank_minus_one = input_x.shape.rank() - 1
     weight_full_rank = len(weight_full.shape)
     bias_full_rank = len(bias_full.shape)
     if weight_full_rank != input_rank_minus_one or bias_full_rank != input_rank_minus_one:
@@ -74,7 +75,7 @@ def check_if_simulated_instance_norm_is_used(
     if not all(dim == 1 for dim in bias_full_shape[1:]):
         return False
 
-    adjusted_input_shape = _ir_utils.propagate_const_value(adjusted_input_shape)
+    _ir_utils.propagate_const_value(adjusted_input_shape)
     adjusted_input_shape_const_value = adjusted_input_shape.const_value
 
     g = weight_for_norm.shape[0]
@@ -85,7 +86,7 @@ def check_if_simulated_instance_norm_is_used(
         return False
 
     # NOTE: Restrict the rule to only support constant shape
-    original_input_shape = _ir_utils.propagate_const_value(original_input_shape)
+    _ir_utils.propagate_const_value(original_input_shape)
     original_input_shape_const_value = original_input_shape.const_value
     if (
         original_input_shape_const_value is None
@@ -149,7 +150,7 @@ def group_normalization(op, input_x, weight_for_norm, weight_full, bias_full, ep
 instance_norm_to_group_norm_rule = pattern.RewriteRule(
     instance_simulates_group_normalization_pattern,
     group_normalization,
-    check_if_simulated_instance_norm_is_used,
+    _simulated_instance_norm,
 )
 
 # NOTE: instance_norm_to_group_norm_rule is subset of instance_norm_to_group_norm_with_silu_rule,

From a55451726289944cb5b3b000f4557f5ba0fd559a Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 28 Mar 2025 10:27:51 -0700
Subject: [PATCH 05/10] revert

---
 .../instance_to_group_normalization.py        | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/onnxscript/rewriter/ort_fusions/instance_to_group_normalization.py b/onnxscript/rewriter/ort_fusions/instance_to_group_normalization.py
index 3b6f31b443..fa0f67c5e8 100644
--- a/onnxscript/rewriter/ort_fusions/instance_to_group_normalization.py
+++ b/onnxscript/rewriter/ort_fusions/instance_to_group_normalization.py
@@ -14,15 +14,15 @@
 logger = logging.getLogger(__name__)
 
 
-def _simulated_instance_norm(
+def check_if_simulated_instance_norm_is_used(
     context,
-    input_x: ir.Value,
-    adjusted_input_shape: ir.Value,
-    original_input_shape: ir.Value,
-    weight_for_norm: ir.Value,
-    bias_for_norm: ir.Value,
-    weight_full: ir.Value,
-    bias_full: ir.Value,
+    input_x,
+    adjusted_input_shape,
+    original_input_shape,
+    weight_for_norm,
+    bias_for_norm,
+    weight_full,
+    bias_full,
     **_,
 ) -> bool:
     """Check if the simulated instance normalization is used.
@@ -40,7 +40,7 @@ def _simulated_instance_norm(
     6. original_input_shape is the same as input_x shape.
 
     Returns:
-        True if the simulated instance normalization is used, False otherwise.
+        bool: True if the simulated instance normalization is used, False otherwise.
     """
     weight_for_norm_const_value = weight_for_norm.const_value
     if weight_for_norm_const_value is None:
@@ -57,7 +57,7 @@ def _simulated_instance_norm(
     if not np.all(bias_for_norm == 0):
         return False
 
-    input_rank_minus_one = input_x.shape.rank() - 1
+    input_rank_minus_one = len(input_x.shape) - 1
     weight_full_rank = len(weight_full.shape)
     bias_full_rank = len(bias_full.shape)
     if weight_full_rank != input_rank_minus_one or bias_full_rank != input_rank_minus_one:
@@ -147,7 +147,7 @@ def group_normalization(op, input_x, weight_for_norm, weight_full, bias_full, ep
 instance_norm_to_group_norm_rule = pattern.RewriteRule(
     instance_simulates_group_normalization_pattern,
     group_normalization,
-    _simulated_instance_norm,
+    check_if_simulated_instance_norm_is_used,
 )
 
 # NOTE: instance_norm_to_group_norm_rule is subset of instance_norm_to_group_norm_with_silu_rule,

From c934e526056cfff5279b0d9071312b97c980588a Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 18 Apr 2025 20:42:47 -0700
Subject: [PATCH 06/10] Update onnxscript/ir/serde.py

---
 onnxscript/ir/serde.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index b917134313..53bc038e94 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -1314,7 +1314,6 @@ def serialize_graph_into(
             logger.warning("Initializer '%s' does not have a constant value set.", value.name)
             continue
         # Make sure the tensor's name is the same as the value's name
-        # TODO(#1554): Handle tensor alias better
         value.const_value.name = value.name
         serialize_tensor_into(graph_proto.initializer.add(), from_=value.const_value)
     for node in from_:

From a4c35fb5edd0b2b56b13ed0a7acdffbff5c406cb Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 18 Apr 2025 20:47:36 -0700
Subject: [PATCH 07/10] reorder

---
 onnxscript/ir/serde.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index 53bc038e94..14e2a932fa 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -656,17 +656,17 @@ def _deserialize_graph(
                 # Do not include shape or type as we need to respect the ONNX file
                 # if the shape or type is not provided as ValueInfoProto
                 # The shape/type information will be filled in in the subsequent ValueInfoProto
-                # deserialization step
+                # deserialization step (deserialize_value_info_proto)
                 const_value=tensor,
             )
             if initializer_name in value_info:
                 # This is where we fill in the shape and type information for the initializer
                 deserialize_value_info_proto(value_info[initializer_name], initializer_value)
-            values[initializer_name] = initializer_value  # type: ignore[index]
             if initializer_value.name in quantization_annotations:
                 _deserialize_quantization_annotation(
                     quantization_annotations[initializer_value.name], initializer_value
                 )
+            values[initializer_name] = initializer_value
         initializer_values.append(initializer_value)
 
     # Deserialize nodes with all known values

From 75ea900be58848d012d0ef3a17dc8576b6ecb801 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 18 Apr 2025 20:59:43 -0700
Subject: [PATCH 08/10] update

---
 onnxscript/ir/serde.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index 14e2a932fa..def60ec5bd 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -625,9 +625,6 @@ def _deserialize_graph(
         if value.name in quantization_annotations:
             _deserialize_quantization_annotation(quantization_annotations[value.name], value)
 
-    # Build the value info dictionary to allow for quick lookup for this graph scope
-    value_info = {info.name: info for info in proto.value_info}
-
     # Initialize the values dictionary for this graph scope with the inputs and initializers
     values: dict[str, _core.Value] = {v.name: v for v in inputs}  # type: ignore[misc]
 
@@ -653,15 +650,12 @@ def _deserialize_graph(
                 None,
                 index=None,
                 name=initializer_name,
-                # Do not include shape or type as we need to respect the ONNX file
-                # if the shape or type is not provided as ValueInfoProto
-                # The shape/type information will be filled in in the subsequent ValueInfoProto
-                # deserialization step (deserialize_value_info_proto)
+                # Include shape and type even if the shape or type is not provided as ValueInfoProto.
+                # Users expect initialized values to have shape and type information.
+                type=_core.TensorType(tensor.dtype),
+                shape=tensor.shape,  # type: ignore[arg-type]
                 const_value=tensor,
             )
-            if initializer_name in value_info:
-                # This is where we fill in the shape and type information for the initializer
-                deserialize_value_info_proto(value_info[initializer_name], initializer_value)
             if initializer_value.name in quantization_annotations:
                 _deserialize_quantization_annotation(
                     quantization_annotations[initializer_value.name], initializer_value
@@ -669,6 +663,9 @@ def _deserialize_graph(
             values[initializer_name] = initializer_value
         initializer_values.append(initializer_value)
 
+    # Build the value info dictionary to allow for quick lookup for this graph scope
+    value_info = {info.name: info for info in proto.value_info}
+
     # Deserialize nodes with all known values
     nodes = [
         _deserialize_node(node, scoped_values, value_info, quantization_annotations)

From fcc460f212d8c6d1ffdac15a055a82c9d69ad353 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 18 Apr 2025 21:01:58 -0700
Subject: [PATCH 09/10] Update models to include value info for initializers

---
 .../dynamo/Speech2Text2ForCausalLM_dynamo.onnx                | 4 ++--
 .../mobilenetv2_100/dynamo/mobilenetv2_100_dynamo.onnx        | 4 ++--
 testdata/e2e_models/resnet18/dynamo/resnet18_dynamo.onnx      | 4 ++--
 testdata/e2e_models/torchscript_model/torchscript_model.onnx  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/testdata/e2e_models/Speech2Text2ForCausalLM/dynamo/Speech2Text2ForCausalLM_dynamo.onnx b/testdata/e2e_models/Speech2Text2ForCausalLM/dynamo/Speech2Text2ForCausalLM_dynamo.onnx
index e0d380b46b..77cfc7709c 100644
--- a/testdata/e2e_models/Speech2Text2ForCausalLM/dynamo/Speech2Text2ForCausalLM_dynamo.onnx
+++ b/testdata/e2e_models/Speech2Text2ForCausalLM/dynamo/Speech2Text2ForCausalLM_dynamo.onnx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06d78f841f26ec59cea1d15dd2c2a086cb907d6644ef8dac15e6d366935413e8
-size 43087292
+oid sha256:6dcf6976f8e324c497b0b74b2b9733c4b454f2c259488f5544bbc1aaaf57714c
+size 43091738
diff --git a/testdata/e2e_models/mobilenetv2_100/dynamo/mobilenetv2_100_dynamo.onnx b/testdata/e2e_models/mobilenetv2_100/dynamo/mobilenetv2_100_dynamo.onnx
index 2eede96c91..69a9c4c073 100644
--- a/testdata/e2e_models/mobilenetv2_100/dynamo/mobilenetv2_100_dynamo.onnx
+++ b/testdata/e2e_models/mobilenetv2_100/dynamo/mobilenetv2_100_dynamo.onnx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a336102b11d8439daa2c1a164a851f34414529a5610a046943fd869b1b44336f
-size 14665355
+oid sha256:ba424976b53bf2f141bfd001b48c0cc1c5c798b49def51f39a72f17e1f74e3a2
+size 14673089
diff --git a/testdata/e2e_models/resnet18/dynamo/resnet18_dynamo.onnx b/testdata/e2e_models/resnet18/dynamo/resnet18_dynamo.onnx
index 61122be18a..a5433b830e 100644
--- a/testdata/e2e_models/resnet18/dynamo/resnet18_dynamo.onnx
+++ b/testdata/e2e_models/resnet18/dynamo/resnet18_dynamo.onnx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31fbebb580ff85ed8eefa7fb95d4e2cbda41fe267afeaae2d4f4177264d1f4e7
-size 46918368
+oid sha256:12d24be13a03ea8ddebcc5ea229390d49fb0da08ad1df896b03703c664e2def1
+size 46921843
diff --git a/testdata/e2e_models/torchscript_model/torchscript_model.onnx b/testdata/e2e_models/torchscript_model/torchscript_model.onnx
index 7d450d2b8b..dd9bd08100 100644
--- a/testdata/e2e_models/torchscript_model/torchscript_model.onnx
+++ b/testdata/e2e_models/torchscript_model/torchscript_model.onnx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efd167b736106103235f42b480027c28c798dd46117526ca49067a2bdbc7b327
-size 311182
+oid sha256:6519a87ecf89132a9d39c59c47a442ae5833faf14811575d0b2323e8d13e30a8
+size 313873

From 1d4c1bcc375582d75d16e0b85b301baffc4873b7 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Fri, 18 Apr 2025 21:13:42 -0700
Subject: [PATCH 10/10] _serialize_metadata_props_into

---
 onnxscript/ir/serde.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
index def60ec5bd..bf39c1ea31 100644
--- a/onnxscript/ir/serde.py
+++ b/onnxscript/ir/serde.py
@@ -1218,24 +1218,24 @@ def _serialize_opset_imports_into(
         opset_ids.add(domain=domain, version=version)
 
 
-def _serialize_metadata_props_into(
+def _serialize_string_string_maps(
     string_string_entries: proto_containers.RepeatedCompositeFieldContainer[
         onnx.StringStringEntryProto
     ],
     from_: Mapping[str, str],
 ) -> None:
-    """Serialize metadata properties into a repeated field of string-string entries.
+    """Serialize a <str, str> mapping into a repeated field of string-string entries.
 
     Args:
         string_string_entries: The repeated field to serialize into.
-        from_: The mapping of metadata properties to serialize.
+        from_: The mapping of a <str, str> mapping to serialize.
     """
     # Sort names for deterministic serialization
     for key in sorted(from_):
         string_string_entries.add(key=key, value=from_[key])
 
 
-_serialize_string_string_maps = _serialize_metadata_props_into
+_serialize_metadata_props_into = _serialize_string_string_maps
 
 
 def _maybe_add_quantization_annotation(