microsoft · gramalingam · Oct 12, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
diff --git a/onnxscript/optimizer/__init__.py b/onnxscript/optimizer/__init__.py
@@ -111,17 +111,46 @@ def optimize(
     return model
 
 
+_DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = (
+    _constant_folding._DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT
+)
+
+_DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = (
+    _constant_folding._DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT
+)
+
+
 def optimize_ir(
     model: ir.Model,
     num_iterations: int = 2,
     *,
     onnx_shape_inference: bool = True,
     stop_if_no_change: bool = True,
+    input_size_limit: int = _DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT,
+    output_size_limit: int = _DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT,
 ) -> None:
+    """Optimizes a model.
+
+    Args:
+        model: The model to be optimized.
+        num_iterations: Number of times the optimization loop is repeated.
+        onnx_shape_inference: Applies node-level shape-inference as part of optimization
+        input_size_limit: Will not apply constant folding to ops with any input of size
+            greater than this. Does not apply to special ops like Shape() and Size().
+        output_size_limit: Will not rewrite any foldable-op into a Constant op if the size
+            of the output tensor is greater than this.
+        stop_if_no_change: Not supported currently (has no effect). Meant to stop the
+            outer optimization loop if no change is detected in one iteration.
+    """
     del stop_if_no_change  # Looks like rewriter doesn't support this yet.
     _inliner.inline(model)
     for _ in range(num_iterations):
-        _constant_folding.fold_constants(model, onnx_shape_inference=onnx_shape_inference)
+        _constant_folding.fold_constants(
+            model,
+            onnx_shape_inference=onnx_shape_inference,
+            input_size_limit=input_size_limit,
+            output_size_limit=output_size_limit,
+        )
         rewriter.rewrite(model, pattern_rewrite_rules=_DEFAULT_REWRITE_RULES)
     remove_unused_nodes(model)
 

diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -43,7 +43,9 @@ def is_constant_op(node: ir.Node) -> bool:
     )
 
 
-_DEFAULT_CONSTANT_FOLD_SIZE_LIMIT = constant_folding._DEFAULT_CONSTANT_FOLD_SIZE_LIMIT
+_DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024
+
+_DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = constant_folding._DEFAULT_CONSTANT_FOLD_SIZE_LIMIT
 
 logger = logging.getLogger(__name__)
 
@@ -550,11 +552,16 @@ class ConstantFolder:
 
     def __init__(
         self,
+        *,
         external_data_folder: str,
-        do_shape_inference: bool,
+        shape_inference: bool,
+        input_size_limit: int,
+        output_size_limit: int,
     ) -> None:
         self._external_data_folder = external_data_folder
-        self._do_shape_inference = do_shape_inference
+        self._shape_inference = shape_inference
+        self._input_size_limit = input_size_limit
+        self._output_size_limit = output_size_limit
         self._init()
 
     def _init(self) -> None:
@@ -632,7 +639,7 @@ def new_constant(self, irvalue: ir.Value, value):
 
         irvalue.const_value = _convenience.tensor(value)
 
-        if value.nbytes > _DEFAULT_CONSTANT_FOLD_SIZE_LIMIT:
+        if value.nbytes > self._output_size_limit:
             logger.info(
                 "Skip storing constant folded nvalue %s due to large size %s.",
                 irvalue.name,
@@ -667,7 +674,7 @@ def process_node(self, node: ir.Node):
                 # TODO(rama): consider merging type/other info from both values
 
         # Do incremental shape inference
-        if self._do_shape_inference and not is_control_flow_op(node):
+        if self._shape_inference and not is_control_flow_op(node):
             self._do_inference(node)
 
         if node.domain not in self.opset_imports:
@@ -696,6 +703,16 @@ def process_node(self, node: ir.Node):
         if any(x is None for x in input_values):
             return None
 
+        if any(input.size > self._input_size_limit for input in input_values):  # type: ignore[union-attr]
+            if logger.isEnabledFor(logging.DEBUG):
+                input_sizes = [input.size for input in input_values]  # type: ignore[union-attr]
+                logger.debug(
+                    "Skipping constant folding for op %s due to large input size: %s",
+                    node.op_type,
+                    input_sizes,
+                )
+            return None
+
         # Filter out bfloat16 cases?
         def convert(av):
             if av.type == ir.AttributeType.TENSOR:
@@ -770,14 +787,18 @@ def fold_constants(
     external_data_folder: str = "",
     *,
     onnx_shape_inference: bool = False,
+    input_size_limit: int = _DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT,
+    output_size_limit: int = _DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT,
 ) -> bool:
     """
     Applies constant folding optimization to the model.
     Returns true iff the model was modified.
     """
     folder = ConstantFolder(
-        external_data_folder,
-        onnx_shape_inference,
+        external_data_folder=external_data_folder,
+        shape_inference=onnx_shape_inference,
+        input_size_limit=input_size_limit,
+        output_size_limit=output_size_limit,
     )
     folder.visit_model(model)
     for op in folder.counts: