From 28901cc0e2586df72d9b7b3817790debd2fb1da9 Mon Sep 17 00:00:00 2001
From: Zonglin Peng <zonglinpeng@fb.com>
Date: Tue, 16 Sep 2025 14:37:32 -0700
Subject: [PATCH 1/2] limit facto tensor size to random 4000 numel

Differential Revision: D82483921
---
 backends/cadence/utils/facto_util.py | 62 +++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py
index 5b204e99fcb..2ab5f731210 100644
--- a/backends/cadence/utils/facto_util.py
+++ b/backends/cadence/utils/facto_util.py
@@ -23,8 +23,66 @@
 
 
 def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
-    # Constraint to limit tensor size product to < 4000
-    max_size_constraint = cp.Size.Le(lambda deps, r, d: max(1, int((3999) ** (1 / r))))
+    # Constraint to limit tensor size product to < 4000 with fully randomized shapes
+    import random
+
+    # Global cache to store generated shapes per tensor to ensure consistency
+    _shape_cache: dict[str, list[int]] = {}
+
+    def generate_random_shape_with_product_limit(
+        rank: int, max_product: int = 3999, seed_base: int = 42
+    ) -> list[int]:
+        """Generate a random shape with given rank ensuring product < max_product"""
+        random.seed(seed_base + rank)
+
+        # Start with all dimensions as 1
+        shape = [1] * rank
+        remaining_product = max_product - 1  # Leave room since we start with product=1
+
+        # Randomly distribute the remaining capacity across dimensions
+        for i in range(rank):
+            if remaining_product <= 1:
+                break
+
+            # Calculate maximum size this dimension can have without exceeding limit
+            current_product = 1
+            for j in range(rank):
+                if j != i:
+                    current_product *= shape[j]
+
+            max_size_for_dim = min(
+                remaining_product // current_product, 50
+            )  # Cap at 50
+            if max_size_for_dim > shape[i]:
+                # Randomly choose a size between current and max
+                new_size = random.randint(shape[i], max_size_for_dim)
+                shape[i] = new_size
+                remaining_product = max_product // (current_product * new_size)
+                remaining_product = max(1, remaining_product)
+
+        # Final random shuffle of the dimensions to make it more random
+        random.shuffle(shape)
+        return shape
+
+    def random_size_constraint(deps: object, r: int, d: int) -> int:
+        """Generate random sizes ensuring total product < 4000"""
+        # Create a unique key for this tensor configuration
+        cache_key = f"{r}_{d}"
+
+        if cache_key not in _shape_cache:
+            # Generate a new random shape for this rank
+            shape = generate_random_shape_with_product_limit(
+                r, max_product=3999, seed_base=42 + r * 10
+            )
+            _shape_cache[cache_key] = shape
+
+        # Return the size for dimension d, ensuring we don't go out of bounds
+        cached_shape = _shape_cache[cache_key]
+        return cached_shape[d] if d < len(cached_shape) else 1
+
+    max_size_constraint = cp.Size.Le(
+        lambda deps, r, d: random_size_constraint(deps, r, d)
+    )
 
     tensor_constraints = (
         [

From ddc7ace5287e343839f49f14144c5c09d78874f9 Mon Sep 17 00:00:00 2001
From: Zonglin Peng <zonglinpeng@fb.com>
Date: Tue, 16 Sep 2025 14:37:32 -0700
Subject: [PATCH 2/2] limit facto to 4000 bytes than numel

Differential Revision: D82483935
---
 backends/cadence/utils/facto_util.py | 60 ++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py
index 2ab5f731210..173f543a46e 100644
--- a/backends/cadence/utils/facto_util.py
+++ b/backends/cadence/utils/facto_util.py
@@ -22,26 +22,50 @@
 MAX_CASES = 50
 
 
+# Global cache to store generated shapes per tensor to ensure consistency
+_shape_cache: dict[str, list[int]] = {}
+
+
 def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
-    # Constraint to limit tensor size product to < 4000 with fully randomized shapes
+    # Constraint to limit tensor size to < 4000 bytes with fully randomized shapes
     import random
 
-    # Global cache to store generated shapes per tensor to ensure consistency
-    _shape_cache: dict[str, list[int]] = {}
+    def get_dtype_bytes(dtype: torch.dtype) -> int:
+        """Get the number of bytes per element for a given dtype"""
+        dtype_bytes = {
+            torch.int8: 1,
+            torch.uint8: 1,
+            torch.int16: 2,
+            torch.uint16: 2,
+            torch.int32: 4,
+            torch.float32: 4,
+            torch.int64: 8,
+            torch.float64: 8,
+            torch.bool: 1,
+            torch.float: 4,  # alias for float32
+            torch.int: 4,  # alias for int32
+            torch.long: 8,  # alias for int64
+        }
+        return dtype_bytes.get(dtype, 4)  # Default to 4 bytes if dtype not found
 
-    def generate_random_shape_with_product_limit(
-        rank: int, max_product: int = 3999, seed_base: int = 42
+    def generate_random_shape_with_byte_limit(
+        rank: int, dtype: torch.dtype, max_bytes: int = 3999, seed_base: int = 42
     ) -> list[int]:
-        """Generate a random shape with given rank ensuring product < max_product"""
+        """Generate a random shape with given rank ensuring total byte size < max_bytes"""
         random.seed(seed_base + rank)
 
+        bytes_per_element = get_dtype_bytes(dtype)
+        max_elements = max_bytes // bytes_per_element
+
         # Start with all dimensions as 1
         shape = [1] * rank
-        remaining_product = max_product - 1  # Leave room since we start with product=1
+        remaining_elements = (
+            max_elements - 1
+        )  # Leave room since we start with product=1
 
         # Randomly distribute the remaining capacity across dimensions
         for i in range(rank):
-            if remaining_product <= 1:
+            if remaining_elements <= 1:
                 break
 
             # Calculate maximum size this dimension can have without exceeding limit
@@ -51,28 +75,32 @@ def generate_random_shape_with_product_limit(
                     current_product *= shape[j]
 
             max_size_for_dim = min(
-                remaining_product // current_product, 50
+                remaining_elements // current_product, 50
             )  # Cap at 50
             if max_size_for_dim > shape[i]:
                 # Randomly choose a size between current and max
                 new_size = random.randint(shape[i], max_size_for_dim)
                 shape[i] = new_size
-                remaining_product = max_product // (current_product * new_size)
-                remaining_product = max(1, remaining_product)
+                remaining_elements = max_elements // (current_product * new_size)
+                remaining_elements = max(1, remaining_elements)
 
         # Final random shuffle of the dimensions to make it more random
         random.shuffle(shape)
         return shape
 
     def random_size_constraint(deps: object, r: int, d: int) -> int:
-        """Generate random sizes ensuring total product < 4000"""
+        """Generate random sizes ensuring total byte size < 4000 bytes"""
+        # Use conservative approach: assume worst case is 4 bytes per element (float32/int32)
+        # This ensures we never exceed 4000 bytes regardless of actual dtype
+        worst_case_dtype = torch.float32  # 4 bytes per element
+
         # Create a unique key for this tensor configuration
-        cache_key = f"{r}_{d}"
+        cache_key = f"{r}_{d}_conservative"
 
         if cache_key not in _shape_cache:
-            # Generate a new random shape for this rank
-            shape = generate_random_shape_with_product_limit(
-                r, max_product=3999, seed_base=42 + r * 10
+            # Generate a new random shape for this rank using worst-case byte estimation
+            shape = generate_random_shape_with_byte_limit(
+                r, worst_case_dtype, max_bytes=3999, seed_base=42 + r * 10 + d
             )
             _shape_cache[cache_key] = shape